diff options
Diffstat (limited to 'fs')
153 files changed, 7910 insertions, 2292 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 94b9d861bf9b..613df554728d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -991,7 +991,7 @@ config TMPFS_POSIX_ACL | |||
991 | 991 | ||
992 | config HUGETLBFS | 992 | config HUGETLBFS |
993 | bool "HugeTLB file system support" | 993 | bool "HugeTLB file system support" |
994 | depends on X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN | 994 | depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN |
995 | help | 995 | help |
996 | hugetlbfs is a filesystem backing for HugeTLB pages, based on | 996 | hugetlbfs is a filesystem backing for HugeTLB pages, based on |
997 | ramfs. For architectures that support it, say Y here and read | 997 | ramfs. For architectures that support it, say Y here and read |
@@ -1675,6 +1675,7 @@ config NFSD_V3_ACL | |||
1675 | config NFSD_V4 | 1675 | config NFSD_V4 |
1676 | bool "Provide NFSv4 server support (EXPERIMENTAL)" | 1676 | bool "Provide NFSv4 server support (EXPERIMENTAL)" |
1677 | depends on NFSD_V3 && EXPERIMENTAL | 1677 | depends on NFSD_V3 && EXPERIMENTAL |
1678 | select RPCSEC_GSS_KRB5 | ||
1678 | help | 1679 | help |
1679 | If you would like to include the NFSv4 server as well as the NFSv2 | 1680 | If you would like to include the NFSv4 server as well as the NFSv2 |
1680 | and NFSv3 servers, say Y here. This feature is experimental, and | 1681 | and NFSv3 servers, say Y here. This feature is experimental, and |
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 73ce561f3ea0..a66671082cfb 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile | |||
@@ -8,6 +8,7 @@ kafs-objs := \ | |||
8 | cmservice.o \ | 8 | cmservice.o \ |
9 | dir.o \ | 9 | dir.o \ |
10 | file.o \ | 10 | file.o \ |
11 | flock.o \ | ||
11 | fsclient.o \ | 12 | fsclient.o \ |
12 | inode.o \ | 13 | inode.o \ |
13 | main.o \ | 14 | main.o \ |
diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 245257948140..c548aa346f0d 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h | |||
@@ -37,6 +37,13 @@ typedef enum { | |||
37 | AFS_FTYPE_SYMLINK = 3, | 37 | AFS_FTYPE_SYMLINK = 3, |
38 | } afs_file_type_t; | 38 | } afs_file_type_t; |
39 | 39 | ||
40 | typedef enum { | ||
41 | AFS_LOCK_READ = 0, /* read lock request */ | ||
42 | AFS_LOCK_WRITE = 1, /* write lock request */ | ||
43 | } afs_lock_type_t; | ||
44 | |||
45 | #define AFS_LOCKWAIT (5 * 60) /* time until a lock times out (seconds) */ | ||
46 | |||
40 | /* | 47 | /* |
41 | * AFS file identifier | 48 | * AFS file identifier |
42 | */ | 49 | */ |
@@ -120,6 +127,7 @@ struct afs_file_status { | |||
120 | struct afs_fid parent; /* parent dir ID for non-dirs only */ | 127 | struct afs_fid parent; /* parent dir ID for non-dirs only */ |
121 | time_t mtime_client; /* last time client changed data */ | 128 | time_t mtime_client; /* last time client changed data */ |
122 | time_t mtime_server; /* last time server changed data */ | 129 | time_t mtime_server; /* last time server changed data */ |
130 | s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ | ||
123 | }; | 131 | }; |
124 | 132 | ||
125 | /* | 133 | /* |
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index a18c374ebe08..eb647323d8f0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h | |||
@@ -31,6 +31,9 @@ enum AFS_FS_Operations { | |||
31 | FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ | 31 | FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ |
32 | FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ | 32 | FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ |
33 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ | 33 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ |
34 | FSSETLOCK = 156, /* AFS Request a file lock */ | ||
35 | FSEXTENDLOCK = 157, /* AFS Extend a file lock */ | ||
36 | FSRELEASELOCK = 158, /* AFS Release a file lock */ | ||
34 | FSLOOKUP = 161, /* AFS lookup file in directory */ | 37 | FSLOOKUP = 161, /* AFS lookup file in directory */ |
35 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ | 38 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ |
36 | FSSTOREDATA64 = 65538, /* AFS Store file data */ | 39 | FSSTOREDATA64 = 65538, /* AFS Store file data */ |
diff --git a/fs/afs/callback.c b/fs/afs/callback.c index bacf518c6fa8..b8243945818d 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c | |||
@@ -125,6 +125,9 @@ static void afs_break_callback(struct afs_server *server, | |||
125 | spin_unlock(&server->cb_lock); | 125 | spin_unlock(&server->cb_lock); |
126 | 126 | ||
127 | queue_work(afs_callback_update_worker, &vnode->cb_broken_work); | 127 | queue_work(afs_callback_update_worker, &vnode->cb_broken_work); |
128 | if (list_empty(&vnode->granted_locks) && | ||
129 | !list_empty(&vnode->pending_locks)) | ||
130 | afs_lock_may_be_available(vnode); | ||
128 | spin_unlock(&vnode->lock); | 131 | spin_unlock(&vnode->lock); |
129 | } | 132 | } |
130 | } | 133 | } |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 546c59522eb1..33fe39ad4e03 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -44,6 +44,7 @@ const struct file_operations afs_dir_file_operations = { | |||
44 | .open = afs_dir_open, | 44 | .open = afs_dir_open, |
45 | .release = afs_release, | 45 | .release = afs_release, |
46 | .readdir = afs_readdir, | 46 | .readdir = afs_readdir, |
47 | .lock = afs_lock, | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | const struct inode_operations afs_dir_inode_operations = { | 50 | const struct inode_operations afs_dir_inode_operations = { |
diff --git a/fs/afs/file.c b/fs/afs/file.c index aede7eb66dd4..525f7c56e068 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -34,6 +34,8 @@ const struct file_operations afs_file_operations = { | |||
34 | .mmap = generic_file_readonly_mmap, | 34 | .mmap = generic_file_readonly_mmap, |
35 | .splice_read = generic_file_splice_read, | 35 | .splice_read = generic_file_splice_read, |
36 | .fsync = afs_fsync, | 36 | .fsync = afs_fsync, |
37 | .lock = afs_lock, | ||
38 | .flock = afs_flock, | ||
37 | }; | 39 | }; |
38 | 40 | ||
39 | const struct inode_operations afs_file_inode_operations = { | 41 | const struct inode_operations afs_file_inode_operations = { |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c new file mode 100644 index 000000000000..8f07f8d1bfa9 --- /dev/null +++ b/fs/afs/flock.c | |||
@@ -0,0 +1,558 @@ | |||
1 | /* AFS file locking support | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/smp_lock.h> | ||
13 | #include "internal.h" | ||
14 | |||
15 | #define AFS_LOCK_GRANTED 0 | ||
16 | #define AFS_LOCK_PENDING 1 | ||
17 | |||
18 | static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); | ||
19 | static void afs_fl_release_private(struct file_lock *fl); | ||
20 | |||
21 | static struct workqueue_struct *afs_lock_manager; | ||
22 | |||
23 | static struct file_lock_operations afs_lock_ops = { | ||
24 | .fl_copy_lock = afs_fl_copy_lock, | ||
25 | .fl_release_private = afs_fl_release_private, | ||
26 | }; | ||
27 | |||
28 | /* | ||
29 | * initialise the lock manager thread if it isn't already running | ||
30 | */ | ||
31 | static int afs_init_lock_manager(void) | ||
32 | { | ||
33 | if (!afs_lock_manager) { | ||
34 | afs_lock_manager = create_singlethread_workqueue("kafs_lockd"); | ||
35 | if (!afs_lock_manager) | ||
36 | return -ENOMEM; | ||
37 | } | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * destroy the lock manager thread if it's running | ||
43 | */ | ||
44 | void __exit afs_kill_lock_manager(void) | ||
45 | { | ||
46 | if (afs_lock_manager) | ||
47 | destroy_workqueue(afs_lock_manager); | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * if the callback is broken on this vnode, then the lock may now be available | ||
52 | */ | ||
53 | void afs_lock_may_be_available(struct afs_vnode *vnode) | ||
54 | { | ||
55 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); | ||
56 | |||
57 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * the lock will time out in 5 minutes unless we extend it, so schedule | ||
62 | * extension in a bit less than that time | ||
63 | */ | ||
64 | static void afs_schedule_lock_extension(struct afs_vnode *vnode) | ||
65 | { | ||
66 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, | ||
67 | AFS_LOCKWAIT * HZ / 2); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * do work for a lock, including: | ||
72 | * - probing for a lock we're waiting on but didn't get immediately | ||
73 | * - extending a lock that's close to timing out | ||
74 | */ | ||
75 | void afs_lock_work(struct work_struct *work) | ||
76 | { | ||
77 | struct afs_vnode *vnode = | ||
78 | container_of(work, struct afs_vnode, lock_work.work); | ||
79 | struct file_lock *fl; | ||
80 | afs_lock_type_t type; | ||
81 | struct key *key; | ||
82 | int ret; | ||
83 | |||
84 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); | ||
85 | |||
86 | spin_lock(&vnode->lock); | ||
87 | |||
88 | if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) { | ||
89 | _debug("unlock"); | ||
90 | spin_unlock(&vnode->lock); | ||
91 | |||
92 | /* attempt to release the server lock; if it fails, we just | ||
93 | * wait 5 minutes and it'll time out anyway */ | ||
94 | ret = afs_vnode_release_lock(vnode, vnode->unlock_key); | ||
95 | if (ret < 0) | ||
96 | printk(KERN_WARNING "AFS:" | ||
97 | " Failed to release lock on {%x:%x} error %d\n", | ||
98 | vnode->fid.vid, vnode->fid.vnode, ret); | ||
99 | |||
100 | spin_lock(&vnode->lock); | ||
101 | key_put(vnode->unlock_key); | ||
102 | vnode->unlock_key = NULL; | ||
103 | clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags); | ||
104 | } | ||
105 | |||
106 | /* if we've got a lock, then it must be time to extend that lock as AFS | ||
107 | * locks time out after 5 minutes */ | ||
108 | if (!list_empty(&vnode->granted_locks)) { | ||
109 | _debug("extend"); | ||
110 | |||
111 | if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) | ||
112 | BUG(); | ||
113 | fl = list_entry(vnode->granted_locks.next, | ||
114 | struct file_lock, fl_u.afs.link); | ||
115 | key = key_get(fl->fl_file->private_data); | ||
116 | spin_unlock(&vnode->lock); | ||
117 | |||
118 | ret = afs_vnode_extend_lock(vnode, key); | ||
119 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
120 | key_put(key); | ||
121 | switch (ret) { | ||
122 | case 0: | ||
123 | afs_schedule_lock_extension(vnode); | ||
124 | break; | ||
125 | default: | ||
126 | /* ummm... we failed to extend the lock - retry | ||
127 | * extension shortly */ | ||
128 | printk(KERN_WARNING "AFS:" | ||
129 | " Failed to extend lock on {%x:%x} error %d\n", | ||
130 | vnode->fid.vid, vnode->fid.vnode, ret); | ||
131 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, | ||
132 | HZ * 10); | ||
133 | break; | ||
134 | } | ||
135 | _leave(" [extend]"); | ||
136 | return; | ||
137 | } | ||
138 | |||
139 | /* if we don't have a granted lock, then we must've been called back by | ||
140 | * the server, and so if might be possible to get a lock we're | ||
141 | * currently waiting for */ | ||
142 | if (!list_empty(&vnode->pending_locks)) { | ||
143 | _debug("get"); | ||
144 | |||
145 | if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) | ||
146 | BUG(); | ||
147 | fl = list_entry(vnode->pending_locks.next, | ||
148 | struct file_lock, fl_u.afs.link); | ||
149 | key = key_get(fl->fl_file->private_data); | ||
150 | type = (fl->fl_type == F_RDLCK) ? | ||
151 | AFS_LOCK_READ : AFS_LOCK_WRITE; | ||
152 | spin_unlock(&vnode->lock); | ||
153 | |||
154 | ret = afs_vnode_set_lock(vnode, key, type); | ||
155 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
156 | switch (ret) { | ||
157 | case -EWOULDBLOCK: | ||
158 | _debug("blocked"); | ||
159 | break; | ||
160 | case 0: | ||
161 | _debug("acquired"); | ||
162 | if (type == AFS_LOCK_READ) | ||
163 | set_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
164 | else | ||
165 | set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
166 | ret = AFS_LOCK_GRANTED; | ||
167 | default: | ||
168 | spin_lock(&vnode->lock); | ||
169 | /* the pending lock may have been withdrawn due to a | ||
170 | * signal */ | ||
171 | if (list_entry(vnode->pending_locks.next, | ||
172 | struct file_lock, fl_u.afs.link) == fl) { | ||
173 | fl->fl_u.afs.state = ret; | ||
174 | if (ret == AFS_LOCK_GRANTED) | ||
175 | list_move_tail(&fl->fl_u.afs.link, | ||
176 | &vnode->granted_locks); | ||
177 | else | ||
178 | list_del_init(&fl->fl_u.afs.link); | ||
179 | wake_up(&fl->fl_wait); | ||
180 | spin_unlock(&vnode->lock); | ||
181 | } else { | ||
182 | _debug("withdrawn"); | ||
183 | clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
184 | clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
185 | spin_unlock(&vnode->lock); | ||
186 | afs_vnode_release_lock(vnode, key); | ||
187 | if (!list_empty(&vnode->pending_locks)) | ||
188 | afs_lock_may_be_available(vnode); | ||
189 | } | ||
190 | break; | ||
191 | } | ||
192 | key_put(key); | ||
193 | _leave(" [pend]"); | ||
194 | return; | ||
195 | } | ||
196 | |||
197 | /* looks like the lock request was withdrawn on a signal */ | ||
198 | spin_unlock(&vnode->lock); | ||
199 | _leave(" [no locks]"); | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * pass responsibility for the unlocking of a vnode on the server to the | ||
204 | * manager thread, lest a pending signal in the calling thread interrupt | ||
205 | * AF_RXRPC | ||
206 | * - the caller must hold the vnode lock | ||
207 | */ | ||
208 | static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) | ||
209 | { | ||
210 | cancel_delayed_work(&vnode->lock_work); | ||
211 | if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) && | ||
212 | !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) | ||
213 | BUG(); | ||
214 | if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) | ||
215 | BUG(); | ||
216 | vnode->unlock_key = key_get(key); | ||
217 | afs_lock_may_be_available(vnode); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * request a lock on a file on the server | ||
222 | */ | ||
223 | static int afs_do_setlk(struct file *file, struct file_lock *fl) | ||
224 | { | ||
225 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
226 | afs_lock_type_t type; | ||
227 | struct key *key = file->private_data; | ||
228 | int ret; | ||
229 | |||
230 | _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); | ||
231 | |||
232 | /* only whole-file locks are supported */ | ||
233 | if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) | ||
234 | return -EINVAL; | ||
235 | |||
236 | ret = afs_init_lock_manager(); | ||
237 | if (ret < 0) | ||
238 | return ret; | ||
239 | |||
240 | fl->fl_ops = &afs_lock_ops; | ||
241 | INIT_LIST_HEAD(&fl->fl_u.afs.link); | ||
242 | fl->fl_u.afs.state = AFS_LOCK_PENDING; | ||
243 | |||
244 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; | ||
245 | |||
246 | lock_kernel(); | ||
247 | |||
248 | /* make sure we've got a callback on this file and that our view of the | ||
249 | * data version is up to date */ | ||
250 | ret = afs_vnode_fetch_status(vnode, NULL, key); | ||
251 | if (ret < 0) | ||
252 | goto error; | ||
253 | |||
254 | if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) { | ||
255 | ret = -EAGAIN; | ||
256 | goto error; | ||
257 | } | ||
258 | |||
259 | spin_lock(&vnode->lock); | ||
260 | |||
261 | if (list_empty(&vnode->pending_locks)) { | ||
262 | /* if there's no-one else with a lock on this vnode, then we | ||
263 | * need to ask the server for a lock */ | ||
264 | if (list_empty(&vnode->granted_locks)) { | ||
265 | _debug("not locked"); | ||
266 | ASSERTCMP(vnode->flags & | ||
267 | ((1 << AFS_VNODE_LOCKING) | | ||
268 | (1 << AFS_VNODE_READLOCKED) | | ||
269 | (1 << AFS_VNODE_WRITELOCKED)), ==, 0); | ||
270 | list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); | ||
271 | set_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
272 | spin_unlock(&vnode->lock); | ||
273 | |||
274 | ret = afs_vnode_set_lock(vnode, key, type); | ||
275 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
276 | switch (ret) { | ||
277 | case 0: | ||
278 | goto acquired_server_lock; | ||
279 | case -EWOULDBLOCK: | ||
280 | spin_lock(&vnode->lock); | ||
281 | ASSERT(list_empty(&vnode->granted_locks)); | ||
282 | ASSERTCMP(vnode->pending_locks.next, ==, | ||
283 | &fl->fl_u.afs.link); | ||
284 | goto wait; | ||
285 | default: | ||
286 | spin_lock(&vnode->lock); | ||
287 | list_del_init(&fl->fl_u.afs.link); | ||
288 | spin_unlock(&vnode->lock); | ||
289 | goto error; | ||
290 | } | ||
291 | } | ||
292 | |||
293 | /* if we've already got a readlock on the server and no waiting | ||
294 | * writelocks, then we might be able to instantly grant another | ||
295 | * readlock */ | ||
296 | if (type == AFS_LOCK_READ && | ||
297 | vnode->flags & (1 << AFS_VNODE_READLOCKED)) { | ||
298 | _debug("instant readlock"); | ||
299 | ASSERTCMP(vnode->flags & | ||
300 | ((1 << AFS_VNODE_LOCKING) | | ||
301 | (1 << AFS_VNODE_WRITELOCKED)), ==, 0); | ||
302 | ASSERT(!list_empty(&vnode->granted_locks)); | ||
303 | goto sharing_existing_lock; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | /* otherwise, we need to wait for a local lock to become available */ | ||
308 | _debug("wait local"); | ||
309 | list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); | ||
310 | wait: | ||
311 | if (!(fl->fl_flags & FL_SLEEP)) { | ||
312 | _debug("noblock"); | ||
313 | ret = -EAGAIN; | ||
314 | goto abort_attempt; | ||
315 | } | ||
316 | spin_unlock(&vnode->lock); | ||
317 | |||
318 | /* now we need to sleep and wait for the lock manager thread to get the | ||
319 | * lock from the server */ | ||
320 | _debug("sleep"); | ||
321 | ret = wait_event_interruptible(fl->fl_wait, | ||
322 | fl->fl_u.afs.state <= AFS_LOCK_GRANTED); | ||
323 | if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) { | ||
324 | ret = fl->fl_u.afs.state; | ||
325 | if (ret < 0) | ||
326 | goto error; | ||
327 | spin_lock(&vnode->lock); | ||
328 | goto given_lock; | ||
329 | } | ||
330 | |||
331 | /* we were interrupted, but someone may still be in the throes of | ||
332 | * giving us the lock */ | ||
333 | _debug("intr"); | ||
334 | ASSERTCMP(ret, ==, -ERESTARTSYS); | ||
335 | |||
336 | spin_lock(&vnode->lock); | ||
337 | if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) { | ||
338 | ret = fl->fl_u.afs.state; | ||
339 | if (ret < 0) { | ||
340 | spin_unlock(&vnode->lock); | ||
341 | goto error; | ||
342 | } | ||
343 | goto given_lock; | ||
344 | } | ||
345 | |||
346 | abort_attempt: | ||
347 | /* we aren't going to get the lock, either because we're unwilling to | ||
348 | * wait, or because some signal happened */ | ||
349 | _debug("abort"); | ||
350 | if (list_empty(&vnode->granted_locks) && | ||
351 | vnode->pending_locks.next == &fl->fl_u.afs.link) { | ||
352 | if (vnode->pending_locks.prev != &fl->fl_u.afs.link) { | ||
353 | /* kick the next pending lock into having a go */ | ||
354 | list_del_init(&fl->fl_u.afs.link); | ||
355 | afs_lock_may_be_available(vnode); | ||
356 | } | ||
357 | } else { | ||
358 | list_del_init(&fl->fl_u.afs.link); | ||
359 | } | ||
360 | spin_unlock(&vnode->lock); | ||
361 | goto error; | ||
362 | |||
363 | acquired_server_lock: | ||
364 | /* we've acquired a server lock, but it needs to be renewed after 5 | ||
365 | * mins */ | ||
366 | spin_lock(&vnode->lock); | ||
367 | afs_schedule_lock_extension(vnode); | ||
368 | if (type == AFS_LOCK_READ) | ||
369 | set_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
370 | else | ||
371 | set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
372 | sharing_existing_lock: | ||
373 | /* the lock has been granted as far as we're concerned... */ | ||
374 | fl->fl_u.afs.state = AFS_LOCK_GRANTED; | ||
375 | list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); | ||
376 | given_lock: | ||
377 | /* ... but we do still need to get the VFS's blessing */ | ||
378 | ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING))); | ||
379 | ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) | | ||
380 | (1 << AFS_VNODE_WRITELOCKED))) != 0); | ||
381 | ret = posix_lock_file(file, fl, NULL); | ||
382 | if (ret < 0) | ||
383 | goto vfs_rejected_lock; | ||
384 | spin_unlock(&vnode->lock); | ||
385 | |||
386 | /* again, make sure we've got a callback on this file and, again, make | ||
387 | * sure that our view of the data version is up to date (we ignore | ||
388 | * errors incurred here and deal with the consequences elsewhere) */ | ||
389 | afs_vnode_fetch_status(vnode, NULL, key); | ||
390 | |||
391 | error: | ||
392 | unlock_kernel(); | ||
393 | _leave(" = %d", ret); | ||
394 | return ret; | ||
395 | |||
396 | vfs_rejected_lock: | ||
397 | /* the VFS rejected the lock we just obtained, so we have to discard | ||
398 | * what we just got */ | ||
399 | _debug("vfs refused %d", ret); | ||
400 | list_del_init(&fl->fl_u.afs.link); | ||
401 | if (list_empty(&vnode->granted_locks)) | ||
402 | afs_defer_unlock(vnode, key); | ||
403 | spin_unlock(&vnode->lock); | ||
404 | goto abort_attempt; | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * unlock on a file on the server | ||
409 | */ | ||
410 | static int afs_do_unlk(struct file *file, struct file_lock *fl) | ||
411 | { | ||
412 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
413 | struct key *key = file->private_data; | ||
414 | int ret; | ||
415 | |||
416 | _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); | ||
417 | |||
418 | /* only whole-file unlocks are supported */ | ||
419 | if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) | ||
420 | return -EINVAL; | ||
421 | |||
422 | fl->fl_ops = &afs_lock_ops; | ||
423 | INIT_LIST_HEAD(&fl->fl_u.afs.link); | ||
424 | fl->fl_u.afs.state = AFS_LOCK_PENDING; | ||
425 | |||
426 | spin_lock(&vnode->lock); | ||
427 | ret = posix_lock_file(file, fl, NULL); | ||
428 | if (ret < 0) { | ||
429 | spin_unlock(&vnode->lock); | ||
430 | _leave(" = %d [vfs]", ret); | ||
431 | return ret; | ||
432 | } | ||
433 | |||
434 | /* discard the server lock only if all granted locks are gone */ | ||
435 | if (list_empty(&vnode->granted_locks)) | ||
436 | afs_defer_unlock(vnode, key); | ||
437 | spin_unlock(&vnode->lock); | ||
438 | _leave(" = 0"); | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * return information about a lock we currently hold, if indeed we hold one | ||
444 | */ | ||
445 | static int afs_do_getlk(struct file *file, struct file_lock *fl) | ||
446 | { | ||
447 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
448 | struct key *key = file->private_data; | ||
449 | int ret, lock_count; | ||
450 | |||
451 | _enter(""); | ||
452 | |||
453 | fl->fl_type = F_UNLCK; | ||
454 | |||
455 | mutex_lock(&vnode->vfs_inode.i_mutex); | ||
456 | |||
457 | /* check local lock records first */ | ||
458 | ret = 0; | ||
459 | if (posix_test_lock(file, fl) == 0) { | ||
460 | /* no local locks; consult the server */ | ||
461 | ret = afs_vnode_fetch_status(vnode, NULL, key); | ||
462 | if (ret < 0) | ||
463 | goto error; | ||
464 | lock_count = vnode->status.lock_count; | ||
465 | if (lock_count) { | ||
466 | if (lock_count > 0) | ||
467 | fl->fl_type = F_RDLCK; | ||
468 | else | ||
469 | fl->fl_type = F_WRLCK; | ||
470 | fl->fl_start = 0; | ||
471 | fl->fl_end = OFFSET_MAX; | ||
472 | } | ||
473 | } | ||
474 | |||
475 | error: | ||
476 | mutex_unlock(&vnode->vfs_inode.i_mutex); | ||
477 | _leave(" = %d [%hd]", ret, fl->fl_type); | ||
478 | return ret; | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * manage POSIX locks on a file | ||
483 | */ | ||
484 | int afs_lock(struct file *file, int cmd, struct file_lock *fl) | ||
485 | { | ||
486 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
487 | |||
488 | _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", | ||
489 | vnode->fid.vid, vnode->fid.vnode, cmd, | ||
490 | fl->fl_type, fl->fl_flags, | ||
491 | (long long) fl->fl_start, (long long) fl->fl_end); | ||
492 | |||
493 | /* AFS doesn't support mandatory locks */ | ||
494 | if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && | ||
495 | fl->fl_type != F_UNLCK) | ||
496 | return -ENOLCK; | ||
497 | |||
498 | if (IS_GETLK(cmd)) | ||
499 | return afs_do_getlk(file, fl); | ||
500 | if (fl->fl_type == F_UNLCK) | ||
501 | return afs_do_unlk(file, fl); | ||
502 | return afs_do_setlk(file, fl); | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * manage FLOCK locks on a file | ||
507 | */ | ||
508 | int afs_flock(struct file *file, int cmd, struct file_lock *fl) | ||
509 | { | ||
510 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
511 | |||
512 | _enter("{%x:%u},%d,{t=%x,fl=%x}", | ||
513 | vnode->fid.vid, vnode->fid.vnode, cmd, | ||
514 | fl->fl_type, fl->fl_flags); | ||
515 | |||
516 | /* | ||
517 | * No BSD flocks over NFS allowed. | ||
518 | * Note: we could try to fake a POSIX lock request here by | ||
519 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
520 | * Not sure whether that would be unique, though, or whether | ||
521 | * that would break in other places. | ||
522 | */ | ||
523 | if (!(fl->fl_flags & FL_FLOCK)) | ||
524 | return -ENOLCK; | ||
525 | |||
526 | /* we're simulating flock() locks using posix locks on the server */ | ||
527 | fl->fl_owner = (fl_owner_t) file; | ||
528 | fl->fl_start = 0; | ||
529 | fl->fl_end = OFFSET_MAX; | ||
530 | |||
531 | if (fl->fl_type == F_UNLCK) | ||
532 | return afs_do_unlk(file, fl); | ||
533 | return afs_do_setlk(file, fl); | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * the POSIX lock management core VFS code copies the lock record and adds the | ||
538 | * copy into its own list, so we need to add that copy to the vnode's lock | ||
539 | * queue in the same place as the original (which will be deleted shortly | ||
540 | * after) | ||
541 | */ | ||
542 | static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) | ||
543 | { | ||
544 | _enter(""); | ||
545 | |||
546 | list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link); | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * need to remove this lock from the vnode queue when it's removed from the | ||
551 | * VFS's list | ||
552 | */ | ||
553 | static void afs_fl_release_private(struct file_lock *fl) | ||
554 | { | ||
555 | _enter(""); | ||
556 | |||
557 | list_del_init(&fl->fl_u.afs.link); | ||
558 | } | ||
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 5dff1308b6f0..023b95b0d9d7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
@@ -67,7 +67,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
67 | EXTRACT(status->group); | 67 | EXTRACT(status->group); |
68 | bp++; /* sync counter */ | 68 | bp++; /* sync counter */ |
69 | data_version |= (u64) ntohl(*bp++) << 32; | 69 | data_version |= (u64) ntohl(*bp++) << 32; |
70 | bp++; /* lock count */ | 70 | EXTRACT(status->lock_count); |
71 | size |= (u64) ntohl(*bp++) << 32; | 71 | size |= (u64) ntohl(*bp++) << 32; |
72 | bp++; /* spare 4 */ | 72 | bp++; /* spare 4 */ |
73 | *_bp = bp; | 73 | *_bp = bp; |
@@ -1748,3 +1748,156 @@ int afs_fs_get_volume_status(struct afs_server *server, | |||
1748 | 1748 | ||
1749 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | 1749 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); |
1750 | } | 1750 | } |
1751 | |||
1752 | /* | ||
1753 | * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock | ||
1754 | */ | ||
1755 | static int afs_deliver_fs_xxxx_lock(struct afs_call *call, | ||
1756 | struct sk_buff *skb, bool last) | ||
1757 | { | ||
1758 | const __be32 *bp; | ||
1759 | |||
1760 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | ||
1761 | |||
1762 | afs_transfer_reply(call, skb); | ||
1763 | if (!last) | ||
1764 | return 0; | ||
1765 | |||
1766 | if (call->reply_size != call->reply_max) | ||
1767 | return -EBADMSG; | ||
1768 | |||
1769 | /* unmarshall the reply once we've received all of it */ | ||
1770 | bp = call->buffer; | ||
1771 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | ||
1772 | |||
1773 | _leave(" = 0 [done]"); | ||
1774 | return 0; | ||
1775 | } | ||
1776 | |||
1777 | /* | ||
1778 | * FS.SetLock operation type | ||
1779 | */ | ||
1780 | static const struct afs_call_type afs_RXFSSetLock = { | ||
1781 | .name = "FS.SetLock", | ||
1782 | .deliver = afs_deliver_fs_xxxx_lock, | ||
1783 | .abort_to_error = afs_abort_to_error, | ||
1784 | .destructor = afs_flat_call_destructor, | ||
1785 | }; | ||
1786 | |||
1787 | /* | ||
1788 | * FS.ExtendLock operation type | ||
1789 | */ | ||
1790 | static const struct afs_call_type afs_RXFSExtendLock = { | ||
1791 | .name = "FS.ExtendLock", | ||
1792 | .deliver = afs_deliver_fs_xxxx_lock, | ||
1793 | .abort_to_error = afs_abort_to_error, | ||
1794 | .destructor = afs_flat_call_destructor, | ||
1795 | }; | ||
1796 | |||
1797 | /* | ||
1798 | * FS.ReleaseLock operation type | ||
1799 | */ | ||
1800 | static const struct afs_call_type afs_RXFSReleaseLock = { | ||
1801 | .name = "FS.ReleaseLock", | ||
1802 | .deliver = afs_deliver_fs_xxxx_lock, | ||
1803 | .abort_to_error = afs_abort_to_error, | ||
1804 | .destructor = afs_flat_call_destructor, | ||
1805 | }; | ||
1806 | |||
1807 | /* | ||
1808 | * get a lock on a file | ||
1809 | */ | ||
1810 | int afs_fs_set_lock(struct afs_server *server, | ||
1811 | struct key *key, | ||
1812 | struct afs_vnode *vnode, | ||
1813 | afs_lock_type_t type, | ||
1814 | const struct afs_wait_mode *wait_mode) | ||
1815 | { | ||
1816 | struct afs_call *call; | ||
1817 | __be32 *bp; | ||
1818 | |||
1819 | _enter(""); | ||
1820 | |||
1821 | call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4); | ||
1822 | if (!call) | ||
1823 | return -ENOMEM; | ||
1824 | |||
1825 | call->key = key; | ||
1826 | call->reply = vnode; | ||
1827 | call->service_id = FS_SERVICE; | ||
1828 | call->port = htons(AFS_FS_PORT); | ||
1829 | |||
1830 | /* marshall the parameters */ | ||
1831 | bp = call->request; | ||
1832 | *bp++ = htonl(FSSETLOCK); | ||
1833 | *bp++ = htonl(vnode->fid.vid); | ||
1834 | *bp++ = htonl(vnode->fid.vnode); | ||
1835 | *bp++ = htonl(vnode->fid.unique); | ||
1836 | *bp++ = htonl(type); | ||
1837 | |||
1838 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1839 | } | ||
1840 | |||
1841 | /* | ||
1842 | * extend a lock on a file | ||
1843 | */ | ||
1844 | int afs_fs_extend_lock(struct afs_server *server, | ||
1845 | struct key *key, | ||
1846 | struct afs_vnode *vnode, | ||
1847 | const struct afs_wait_mode *wait_mode) | ||
1848 | { | ||
1849 | struct afs_call *call; | ||
1850 | __be32 *bp; | ||
1851 | |||
1852 | _enter(""); | ||
1853 | |||
1854 | call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4); | ||
1855 | if (!call) | ||
1856 | return -ENOMEM; | ||
1857 | |||
1858 | call->key = key; | ||
1859 | call->reply = vnode; | ||
1860 | call->service_id = FS_SERVICE; | ||
1861 | call->port = htons(AFS_FS_PORT); | ||
1862 | |||
1863 | /* marshall the parameters */ | ||
1864 | bp = call->request; | ||
1865 | *bp++ = htonl(FSEXTENDLOCK); | ||
1866 | *bp++ = htonl(vnode->fid.vid); | ||
1867 | *bp++ = htonl(vnode->fid.vnode); | ||
1868 | *bp++ = htonl(vnode->fid.unique); | ||
1869 | |||
1870 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1871 | } | ||
1872 | |||
1873 | /* | ||
1874 | * release a lock on a file | ||
1875 | */ | ||
1876 | int afs_fs_release_lock(struct afs_server *server, | ||
1877 | struct key *key, | ||
1878 | struct afs_vnode *vnode, | ||
1879 | const struct afs_wait_mode *wait_mode) | ||
1880 | { | ||
1881 | struct afs_call *call; | ||
1882 | __be32 *bp; | ||
1883 | |||
1884 | _enter(""); | ||
1885 | |||
1886 | call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4); | ||
1887 | if (!call) | ||
1888 | return -ENOMEM; | ||
1889 | |||
1890 | call->key = key; | ||
1891 | call->reply = vnode; | ||
1892 | call->service_id = FS_SERVICE; | ||
1893 | call->port = htons(AFS_FS_PORT); | ||
1894 | |||
1895 | /* marshall the parameters */ | ||
1896 | bp = call->request; | ||
1897 | *bp++ = htonl(FSRELEASELOCK); | ||
1898 | *bp++ = htonl(vnode->fid.vid); | ||
1899 | *bp++ = htonl(vnode->fid.vnode); | ||
1900 | *bp++ = htonl(vnode->fid.unique); | ||
1901 | |||
1902 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1903 | } | ||
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2c55dd94a1de..6306438f331f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -351,10 +351,18 @@ struct afs_vnode { | |||
351 | #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ | 351 | #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ |
352 | #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ | 352 | #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ |
353 | #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ | 353 | #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ |
354 | #define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */ | ||
355 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ | ||
356 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ | ||
357 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ | ||
354 | 358 | ||
355 | long acl_order; /* ACL check count (callback break count) */ | 359 | long acl_order; /* ACL check count (callback break count) */ |
356 | 360 | ||
357 | struct list_head writebacks; /* alterations in pagecache that need writing */ | 361 | struct list_head writebacks; /* alterations in pagecache that need writing */ |
362 | struct list_head pending_locks; /* locks waiting to be granted */ | ||
363 | struct list_head granted_locks; /* locks granted on this file */ | ||
364 | struct delayed_work lock_work; /* work to be done in locking */ | ||
365 | struct key *unlock_key; /* key to be used in unlocking */ | ||
358 | 366 | ||
359 | /* outstanding callback notification on this file */ | 367 | /* outstanding callback notification on this file */ |
360 | struct rb_node server_rb; /* link in server->fs_vnodes */ | 368 | struct rb_node server_rb; /* link in server->fs_vnodes */ |
@@ -474,6 +482,15 @@ extern int afs_open(struct inode *, struct file *); | |||
474 | extern int afs_release(struct inode *, struct file *); | 482 | extern int afs_release(struct inode *, struct file *); |
475 | 483 | ||
476 | /* | 484 | /* |
485 | * flock.c | ||
486 | */ | ||
487 | extern void __exit afs_kill_lock_manager(void); | ||
488 | extern void afs_lock_work(struct work_struct *); | ||
489 | extern void afs_lock_may_be_available(struct afs_vnode *); | ||
490 | extern int afs_lock(struct file *, int, struct file_lock *); | ||
491 | extern int afs_flock(struct file *, int, struct file_lock *); | ||
492 | |||
493 | /* | ||
477 | * fsclient.c | 494 | * fsclient.c |
478 | */ | 495 | */ |
479 | extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, | 496 | extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, |
@@ -513,6 +530,15 @@ extern int afs_fs_get_volume_status(struct afs_server *, struct key *, | |||
513 | struct afs_vnode *, | 530 | struct afs_vnode *, |
514 | struct afs_volume_status *, | 531 | struct afs_volume_status *, |
515 | const struct afs_wait_mode *); | 532 | const struct afs_wait_mode *); |
533 | extern int afs_fs_set_lock(struct afs_server *, struct key *, | ||
534 | struct afs_vnode *, afs_lock_type_t, | ||
535 | const struct afs_wait_mode *); | ||
536 | extern int afs_fs_extend_lock(struct afs_server *, struct key *, | ||
537 | struct afs_vnode *, | ||
538 | const struct afs_wait_mode *); | ||
539 | extern int afs_fs_release_lock(struct afs_server *, struct key *, | ||
540 | struct afs_vnode *, | ||
541 | const struct afs_wait_mode *); | ||
516 | 542 | ||
517 | /* | 543 | /* |
518 | * inode.c | 544 | * inode.c |
@@ -681,6 +707,10 @@ extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, | |||
681 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); | 707 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); |
682 | extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, | 708 | extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, |
683 | struct afs_volume_status *); | 709 | struct afs_volume_status *); |
710 | extern int afs_vnode_set_lock(struct afs_vnode *, struct key *, | ||
711 | afs_lock_type_t); | ||
712 | extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *); | ||
713 | extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); | ||
684 | 714 | ||
685 | /* | 715 | /* |
686 | * volume.c | 716 | * volume.c |
diff --git a/fs/afs/main.c b/fs/afs/main.c index cd21195bbb24..0f60f6b35769 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -168,6 +168,7 @@ static void __exit afs_exit(void) | |||
168 | printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); | 168 | printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); |
169 | 169 | ||
170 | afs_fs_exit(); | 170 | afs_fs_exit(); |
171 | afs_kill_lock_manager(); | ||
171 | afs_close_socket(); | 172 | afs_close_socket(); |
172 | afs_purge_servers(); | 173 | afs_purge_servers(); |
173 | afs_callback_update_kill(); | 174 | afs_callback_update_kill(); |
diff --git a/fs/afs/misc.c b/fs/afs/misc.c index d1a889c40742..2d33a5f7d218 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c | |||
@@ -35,6 +35,7 @@ int afs_abort_to_error(u32 abort_code) | |||
35 | case VOVERQUOTA: return -EDQUOT; | 35 | case VOVERQUOTA: return -EDQUOT; |
36 | case VBUSY: return -EBUSY; | 36 | case VBUSY: return -EBUSY; |
37 | case VMOVED: return -ENXIO; | 37 | case VMOVED: return -ENXIO; |
38 | case 0x2f6df0a: return -EWOULDBLOCK; | ||
38 | case 0x2f6df0c: return -EACCES; | 39 | case 0x2f6df0c: return -EACCES; |
39 | case 0x2f6df0f: return -EBUSY; | 40 | case 0x2f6df0f: return -EBUSY; |
40 | case 0x2f6df10: return -EEXIST; | 41 | case 0x2f6df10: return -EEXIST; |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 13df512aea9e..6edb56683b9a 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -201,23 +201,9 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) | |||
201 | */ | 201 | */ |
202 | static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) | 202 | static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) |
203 | { | 203 | { |
204 | struct list_head *_p; | ||
205 | loff_t pos = *_pos; | ||
206 | |||
207 | /* lock the list against modification */ | 204 | /* lock the list against modification */ |
208 | down_read(&afs_proc_cells_sem); | 205 | down_read(&afs_proc_cells_sem); |
209 | 206 | return seq_list_start_head(&afs_proc_cells, *_pos); | |
210 | /* allow for the header line */ | ||
211 | if (!pos) | ||
212 | return (void *) 1; | ||
213 | pos--; | ||
214 | |||
215 | /* find the n'th element in the list */ | ||
216 | list_for_each(_p, &afs_proc_cells) | ||
217 | if (!pos--) | ||
218 | break; | ||
219 | |||
220 | return _p != &afs_proc_cells ? _p : NULL; | ||
221 | } | 207 | } |
222 | 208 | ||
223 | /* | 209 | /* |
@@ -225,14 +211,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) | |||
225 | */ | 211 | */ |
226 | static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) | 212 | static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) |
227 | { | 213 | { |
228 | struct list_head *_p; | 214 | return seq_list_next(v, &afs_proc_cells, pos); |
229 | |||
230 | (*pos)++; | ||
231 | |||
232 | _p = v; | ||
233 | _p = v == (void *) 1 ? afs_proc_cells.next : _p->next; | ||
234 | |||
235 | return _p != &afs_proc_cells ? _p : NULL; | ||
236 | } | 215 | } |
237 | 216 | ||
238 | /* | 217 | /* |
@@ -250,7 +229,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) | |||
250 | { | 229 | { |
251 | struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); | 230 | struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); |
252 | 231 | ||
253 | if (v == (void *) 1) { | 232 | if (v == &afs_proc_cells) { |
254 | /* display header on line 1 */ | 233 | /* display header on line 1 */ |
255 | seq_puts(m, "USE NAME\n"); | 234 | seq_puts(m, "USE NAME\n"); |
256 | return 0; | 235 | return 0; |
@@ -503,26 +482,13 @@ static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file) | |||
503 | */ | 482 | */ |
504 | static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) | 483 | static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) |
505 | { | 484 | { |
506 | struct list_head *_p; | ||
507 | struct afs_cell *cell = m->private; | 485 | struct afs_cell *cell = m->private; |
508 | loff_t pos = *_pos; | ||
509 | 486 | ||
510 | _enter("cell=%p pos=%Ld", cell, *_pos); | 487 | _enter("cell=%p pos=%Ld", cell, *_pos); |
511 | 488 | ||
512 | /* lock the list against modification */ | 489 | /* lock the list against modification */ |
513 | down_read(&cell->vl_sem); | 490 | down_read(&cell->vl_sem); |
514 | 491 | return seq_list_start_head(&cell->vl_list, *_pos); | |
515 | /* allow for the header line */ | ||
516 | if (!pos) | ||
517 | return (void *) 1; | ||
518 | pos--; | ||
519 | |||
520 | /* find the n'th element in the list */ | ||
521 | list_for_each(_p, &cell->vl_list) | ||
522 | if (!pos--) | ||
523 | break; | ||
524 | |||
525 | return _p != &cell->vl_list ? _p : NULL; | ||
526 | } | 492 | } |
527 | 493 | ||
528 | /* | 494 | /* |
@@ -531,17 +497,10 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) | |||
531 | static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, | 497 | static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, |
532 | loff_t *_pos) | 498 | loff_t *_pos) |
533 | { | 499 | { |
534 | struct list_head *_p; | ||
535 | struct afs_cell *cell = p->private; | 500 | struct afs_cell *cell = p->private; |
536 | 501 | ||
537 | _enter("cell=%p pos=%Ld", cell, *_pos); | 502 | _enter("cell=%p pos=%Ld", cell, *_pos); |
538 | 503 | return seq_list_next(v, &cell->vl_list, _pos); | |
539 | (*_pos)++; | ||
540 | |||
541 | _p = v; | ||
542 | _p = (v == (void *) 1) ? cell->vl_list.next : _p->next; | ||
543 | |||
544 | return (_p != &cell->vl_list) ? _p : NULL; | ||
545 | } | 504 | } |
546 | 505 | ||
547 | /* | 506 | /* |
@@ -569,11 +528,12 @@ const char afs_vlocation_states[][4] = { | |||
569 | */ | 528 | */ |
570 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) | 529 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) |
571 | { | 530 | { |
531 | struct afs_cell *cell = m->private; | ||
572 | struct afs_vlocation *vlocation = | 532 | struct afs_vlocation *vlocation = |
573 | list_entry(v, struct afs_vlocation, link); | 533 | list_entry(v, struct afs_vlocation, link); |
574 | 534 | ||
575 | /* display header on line 1 */ | 535 | /* display header on line 1 */ |
576 | if (v == (void *) 1) { | 536 | if (v == &cell->vl_list) { |
577 | seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); | 537 | seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); |
578 | return 0; | 538 | return 0; |
579 | } | 539 | } |
@@ -734,26 +694,13 @@ static int afs_proc_cell_servers_release(struct inode *inode, | |||
734 | static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) | 694 | static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) |
735 | __acquires(m->private->servers_lock) | 695 | __acquires(m->private->servers_lock) |
736 | { | 696 | { |
737 | struct list_head *_p; | ||
738 | struct afs_cell *cell = m->private; | 697 | struct afs_cell *cell = m->private; |
739 | loff_t pos = *_pos; | ||
740 | 698 | ||
741 | _enter("cell=%p pos=%Ld", cell, *_pos); | 699 | _enter("cell=%p pos=%Ld", cell, *_pos); |
742 | 700 | ||
743 | /* lock the list against modification */ | 701 | /* lock the list against modification */ |
744 | read_lock(&cell->servers_lock); | 702 | read_lock(&cell->servers_lock); |
745 | 703 | return seq_list_start_head(&cell->servers, *_pos); | |
746 | /* allow for the header line */ | ||
747 | if (!pos) | ||
748 | return (void *) 1; | ||
749 | pos--; | ||
750 | |||
751 | /* find the n'th element in the list */ | ||
752 | list_for_each(_p, &cell->servers) | ||
753 | if (!pos--) | ||
754 | break; | ||
755 | |||
756 | return _p != &cell->servers ? _p : NULL; | ||
757 | } | 704 | } |
758 | 705 | ||
759 | /* | 706 | /* |
@@ -762,17 +709,10 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) | |||
762 | static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, | 709 | static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, |
763 | loff_t *_pos) | 710 | loff_t *_pos) |
764 | { | 711 | { |
765 | struct list_head *_p; | ||
766 | struct afs_cell *cell = p->private; | 712 | struct afs_cell *cell = p->private; |
767 | 713 | ||
768 | _enter("cell=%p pos=%Ld", cell, *_pos); | 714 | _enter("cell=%p pos=%Ld", cell, *_pos); |
769 | 715 | return seq_list_next(v, &cell->servers, _pos); | |
770 | (*_pos)++; | ||
771 | |||
772 | _p = v; | ||
773 | _p = v == (void *) 1 ? cell->servers.next : _p->next; | ||
774 | |||
775 | return _p != &cell->servers ? _p : NULL; | ||
776 | } | 716 | } |
777 | 717 | ||
778 | /* | 718 | /* |
@@ -791,11 +731,12 @@ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) | |||
791 | */ | 731 | */ |
792 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v) | 732 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v) |
793 | { | 733 | { |
734 | struct afs_cell *cell = m->private; | ||
794 | struct afs_server *server = list_entry(v, struct afs_server, link); | 735 | struct afs_server *server = list_entry(v, struct afs_server, link); |
795 | char ipaddr[20]; | 736 | char ipaddr[20]; |
796 | 737 | ||
797 | /* display header on line 1 */ | 738 | /* display header on line 1 */ |
798 | if (v == (void *) 1) { | 739 | if (v == &cell->servers) { |
799 | seq_puts(m, "USE ADDR STATE\n"); | 740 | seq_puts(m, "USE ADDR STATE\n"); |
800 | return 0; | 741 | return 0; |
801 | } | 742 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 2e8496ba1205..993cdf1cce3a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -460,6 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, | |||
460 | spin_lock_init(&vnode->writeback_lock); | 460 | spin_lock_init(&vnode->writeback_lock); |
461 | spin_lock_init(&vnode->lock); | 461 | spin_lock_init(&vnode->lock); |
462 | INIT_LIST_HEAD(&vnode->writebacks); | 462 | INIT_LIST_HEAD(&vnode->writebacks); |
463 | INIT_LIST_HEAD(&vnode->pending_locks); | ||
464 | INIT_LIST_HEAD(&vnode->granted_locks); | ||
465 | INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); | ||
463 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); | 466 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); |
464 | } | 467 | } |
465 | 468 | ||
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 232c55dc245d..2f05c4fc2a70 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c | |||
@@ -561,7 +561,7 @@ no_server: | |||
561 | /* | 561 | /* |
562 | * create a hard link | 562 | * create a hard link |
563 | */ | 563 | */ |
564 | extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, | 564 | int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, |
565 | struct key *key, const char *name) | 565 | struct key *key, const char *name) |
566 | { | 566 | { |
567 | struct afs_server *server; | 567 | struct afs_server *server; |
@@ -887,11 +887,6 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, | |||
887 | vnode->fid.unique, | 887 | vnode->fid.unique, |
888 | key_serial(key)); | 888 | key_serial(key)); |
889 | 889 | ||
890 | /* this op will fetch the status */ | ||
891 | spin_lock(&vnode->lock); | ||
892 | vnode->update_cnt++; | ||
893 | spin_unlock(&vnode->lock); | ||
894 | |||
895 | do { | 890 | do { |
896 | /* pick a server to query */ | 891 | /* pick a server to query */ |
897 | server = afs_volume_pick_fileserver(vnode); | 892 | server = afs_volume_pick_fileserver(vnode); |
@@ -905,20 +900,127 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, | |||
905 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | 900 | } while (!afs_volume_release_fileserver(vnode, server, ret)); |
906 | 901 | ||
907 | /* adjust the flags */ | 902 | /* adjust the flags */ |
908 | if (ret == 0) { | 903 | if (ret == 0) |
909 | afs_vnode_finalise_status_update(vnode, server); | 904 | afs_put_server(server); |
905 | |||
906 | _leave(" = %d", ret); | ||
907 | return ret; | ||
908 | |||
909 | no_server: | ||
910 | return PTR_ERR(server); | ||
911 | } | ||
912 | |||
913 | /* | ||
914 | * get a lock on a file | ||
915 | */ | ||
916 | int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, | ||
917 | afs_lock_type_t type) | ||
918 | { | ||
919 | struct afs_server *server; | ||
920 | int ret; | ||
921 | |||
922 | _enter("%s{%x:%u.%u},%x,%u", | ||
923 | vnode->volume->vlocation->vldb.name, | ||
924 | vnode->fid.vid, | ||
925 | vnode->fid.vnode, | ||
926 | vnode->fid.unique, | ||
927 | key_serial(key), type); | ||
928 | |||
929 | do { | ||
930 | /* pick a server to query */ | ||
931 | server = afs_volume_pick_fileserver(vnode); | ||
932 | if (IS_ERR(server)) | ||
933 | goto no_server; | ||
934 | |||
935 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
936 | |||
937 | ret = afs_fs_set_lock(server, key, vnode, type, &afs_sync_call); | ||
938 | |||
939 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
940 | |||
941 | /* adjust the flags */ | ||
942 | if (ret == 0) | ||
943 | afs_put_server(server); | ||
944 | |||
945 | _leave(" = %d", ret); | ||
946 | return ret; | ||
947 | |||
948 | no_server: | ||
949 | return PTR_ERR(server); | ||
950 | } | ||
951 | |||
952 | /* | ||
953 | * extend a lock on a file | ||
954 | */ | ||
955 | int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) | ||
956 | { | ||
957 | struct afs_server *server; | ||
958 | int ret; | ||
959 | |||
960 | _enter("%s{%x:%u.%u},%x", | ||
961 | vnode->volume->vlocation->vldb.name, | ||
962 | vnode->fid.vid, | ||
963 | vnode->fid.vnode, | ||
964 | vnode->fid.unique, | ||
965 | key_serial(key)); | ||
966 | |||
967 | do { | ||
968 | /* pick a server to query */ | ||
969 | server = afs_volume_pick_fileserver(vnode); | ||
970 | if (IS_ERR(server)) | ||
971 | goto no_server; | ||
972 | |||
973 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
974 | |||
975 | ret = afs_fs_extend_lock(server, key, vnode, &afs_sync_call); | ||
976 | |||
977 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
978 | |||
979 | /* adjust the flags */ | ||
980 | if (ret == 0) | ||
981 | afs_put_server(server); | ||
982 | |||
983 | _leave(" = %d", ret); | ||
984 | return ret; | ||
985 | |||
986 | no_server: | ||
987 | return PTR_ERR(server); | ||
988 | } | ||
989 | |||
990 | /* | ||
991 | * release a lock on a file | ||
992 | */ | ||
993 | int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) | ||
994 | { | ||
995 | struct afs_server *server; | ||
996 | int ret; | ||
997 | |||
998 | _enter("%s{%x:%u.%u},%x", | ||
999 | vnode->volume->vlocation->vldb.name, | ||
1000 | vnode->fid.vid, | ||
1001 | vnode->fid.vnode, | ||
1002 | vnode->fid.unique, | ||
1003 | key_serial(key)); | ||
1004 | |||
1005 | do { | ||
1006 | /* pick a server to query */ | ||
1007 | server = afs_volume_pick_fileserver(vnode); | ||
1008 | if (IS_ERR(server)) | ||
1009 | goto no_server; | ||
1010 | |||
1011 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
1012 | |||
1013 | ret = afs_fs_release_lock(server, key, vnode, &afs_sync_call); | ||
1014 | |||
1015 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
1016 | |||
1017 | /* adjust the flags */ | ||
1018 | if (ret == 0) | ||
910 | afs_put_server(server); | 1019 | afs_put_server(server); |
911 | } else { | ||
912 | afs_vnode_status_update_failed(vnode, ret); | ||
913 | } | ||
914 | 1020 | ||
915 | _leave(" = %d", ret); | 1021 | _leave(" = %d", ret); |
916 | return ret; | 1022 | return ret; |
917 | 1023 | ||
918 | no_server: | 1024 | no_server: |
919 | spin_lock(&vnode->lock); | ||
920 | vnode->update_cnt--; | ||
921 | ASSERTCMP(vnode->update_cnt, >=, 0); | ||
922 | spin_unlock(&vnode->lock); | ||
923 | return PTR_ERR(server); | 1025 | return PTR_ERR(server); |
924 | } | 1026 | } |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index edc67486238f..b4a75880f6fd 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -53,7 +53,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = { | |||
53 | }; | 53 | }; |
54 | 54 | ||
55 | /** | 55 | /** |
56 | * anon_inode_getfd - creates a new file instance by hooking it up to and | 56 | * anon_inode_getfd - creates a new file instance by hooking it up to an |
57 | * anonymous inode, and a dentry that describe the "class" | 57 | * anonymous inode, and a dentry that describe the "class" |
58 | * of the file | 58 | * of the file |
59 | * | 59 | * |
@@ -66,7 +66,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = { | |||
66 | * | 66 | * |
67 | * Creates a new file by hooking it on a single inode. This is useful for files | 67 | * Creates a new file by hooking it on a single inode. This is useful for files |
68 | * that do not need to have a full-fledged inode in order to operate correctly. | 68 | * that do not need to have a full-fledged inode in order to operate correctly. |
69 | * All the files created with anon_inode_getfd() will share a single inode, by | 69 | * All the files created with anon_inode_getfd() will share a single inode, |
70 | * hence saving memory and avoiding code duplication for the file/inode/dentry | 70 | * hence saving memory and avoiding code duplication for the file/inode/dentry |
71 | * setup. | 71 | * setup. |
72 | */ | 72 | */ |
@@ -142,9 +142,9 @@ err_put_filp: | |||
142 | EXPORT_SYMBOL_GPL(anon_inode_getfd); | 142 | EXPORT_SYMBOL_GPL(anon_inode_getfd); |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * A single inode exist for all anon_inode files. Contrary to pipes, | 145 | * A single inode exists for all anon_inode files. Contrary to pipes, |
146 | * anon_inode inodes has no per-instance data associated, so we can avoid | 146 | * anon_inode inodes have no associated per-instance data, so we need |
147 | * the allocation of multiple of them. | 147 | * only allocate one of them. |
148 | */ | 148 | */ |
149 | static struct inode *anon_inode_mkinode(void) | 149 | static struct inode *anon_inode_mkinode(void) |
150 | { | 150 | { |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 08e4414b8374..a27e42bf3400 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -45,7 +45,7 @@ | |||
45 | 45 | ||
46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); |
47 | static int load_elf_library(struct file *); | 47 | static int load_elf_library(struct file *); |
48 | static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); | 48 | static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * If we don't support core dumping, then supply a NULL so we | 51 | * If we don't support core dumping, then supply a NULL so we |
@@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = { | |||
80 | .hasvdso = 1 | 80 | .hasvdso = 1 |
81 | }; | 81 | }; |
82 | 82 | ||
83 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) | 83 | #define BAD_ADDR(x) IS_ERR_VALUE(x) |
84 | 84 | ||
85 | static int set_brk(unsigned long start, unsigned long end) | 85 | static int set_brk(unsigned long start, unsigned long end) |
86 | { | 86 | { |
@@ -285,33 +285,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, | |||
285 | #ifndef elf_map | 285 | #ifndef elf_map |
286 | 286 | ||
287 | static unsigned long elf_map(struct file *filep, unsigned long addr, | 287 | static unsigned long elf_map(struct file *filep, unsigned long addr, |
288 | struct elf_phdr *eppnt, int prot, int type) | 288 | struct elf_phdr *eppnt, int prot, int type, |
289 | unsigned long total_size) | ||
289 | { | 290 | { |
290 | unsigned long map_addr; | 291 | unsigned long map_addr; |
291 | unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); | 292 | unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); |
293 | unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); | ||
294 | addr = ELF_PAGESTART(addr); | ||
295 | size = ELF_PAGEALIGN(size); | ||
292 | 296 | ||
293 | down_write(¤t->mm->mmap_sem); | ||
294 | /* mmap() will return -EINVAL if given a zero size, but a | 297 | /* mmap() will return -EINVAL if given a zero size, but a |
295 | * segment with zero filesize is perfectly valid */ | 298 | * segment with zero filesize is perfectly valid */ |
296 | if (eppnt->p_filesz + pageoffset) | 299 | if (!size) |
297 | map_addr = do_mmap(filep, ELF_PAGESTART(addr), | 300 | return addr; |
298 | eppnt->p_filesz + pageoffset, prot, type, | 301 | |
299 | eppnt->p_offset - pageoffset); | 302 | down_write(¤t->mm->mmap_sem); |
300 | else | 303 | /* |
301 | map_addr = ELF_PAGESTART(addr); | 304 | * total_size is the size of the ELF (interpreter) image. |
305 | * The _first_ mmap needs to know the full size, otherwise | ||
306 | * randomization might put this image into an overlapping | ||
307 | * position with the ELF binary image. (since size < total_size) | ||
308 | * So we first map the 'big' image - and unmap the remainder at | ||
309 | * the end. (which unmap is needed for ELF images with holes.) | ||
310 | */ | ||
311 | if (total_size) { | ||
312 | total_size = ELF_PAGEALIGN(total_size); | ||
313 | map_addr = do_mmap(filep, addr, total_size, prot, type, off); | ||
314 | if (!BAD_ADDR(map_addr)) | ||
315 | do_munmap(current->mm, map_addr+size, total_size-size); | ||
316 | } else | ||
317 | map_addr = do_mmap(filep, addr, size, prot, type, off); | ||
318 | |||
302 | up_write(¤t->mm->mmap_sem); | 319 | up_write(¤t->mm->mmap_sem); |
303 | return(map_addr); | 320 | return(map_addr); |
304 | } | 321 | } |
305 | 322 | ||
306 | #endif /* !elf_map */ | 323 | #endif /* !elf_map */ |
307 | 324 | ||
325 | static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) | ||
326 | { | ||
327 | int i, first_idx = -1, last_idx = -1; | ||
328 | |||
329 | for (i = 0; i < nr; i++) { | ||
330 | if (cmds[i].p_type == PT_LOAD) { | ||
331 | last_idx = i; | ||
332 | if (first_idx == -1) | ||
333 | first_idx = i; | ||
334 | } | ||
335 | } | ||
336 | if (first_idx == -1) | ||
337 | return 0; | ||
338 | |||
339 | return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - | ||
340 | ELF_PAGESTART(cmds[first_idx].p_vaddr); | ||
341 | } | ||
342 | |||
343 | |||
308 | /* This is much more generalized than the library routine read function, | 344 | /* This is much more generalized than the library routine read function, |
309 | so we keep this separate. Technically the library read function | 345 | so we keep this separate. Technically the library read function |
310 | is only provided so that we can read a.out libraries that have | 346 | is only provided so that we can read a.out libraries that have |
311 | an ELF header */ | 347 | an ELF header */ |
312 | 348 | ||
313 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | 349 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, |
314 | struct file *interpreter, unsigned long *interp_load_addr) | 350 | struct file *interpreter, unsigned long *interp_map_addr, |
351 | unsigned long no_base) | ||
315 | { | 352 | { |
316 | struct elf_phdr *elf_phdata; | 353 | struct elf_phdr *elf_phdata; |
317 | struct elf_phdr *eppnt; | 354 | struct elf_phdr *eppnt; |
@@ -319,6 +356,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
319 | int load_addr_set = 0; | 356 | int load_addr_set = 0; |
320 | unsigned long last_bss = 0, elf_bss = 0; | 357 | unsigned long last_bss = 0, elf_bss = 0; |
321 | unsigned long error = ~0UL; | 358 | unsigned long error = ~0UL; |
359 | unsigned long total_size; | ||
322 | int retval, i, size; | 360 | int retval, i, size; |
323 | 361 | ||
324 | /* First of all, some simple consistency checks */ | 362 | /* First of all, some simple consistency checks */ |
@@ -357,6 +395,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
357 | goto out_close; | 395 | goto out_close; |
358 | } | 396 | } |
359 | 397 | ||
398 | total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); | ||
399 | if (!total_size) { | ||
400 | error = -EINVAL; | ||
401 | goto out_close; | ||
402 | } | ||
403 | |||
360 | eppnt = elf_phdata; | 404 | eppnt = elf_phdata; |
361 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | 405 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { |
362 | if (eppnt->p_type == PT_LOAD) { | 406 | if (eppnt->p_type == PT_LOAD) { |
@@ -374,9 +418,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
374 | vaddr = eppnt->p_vaddr; | 418 | vaddr = eppnt->p_vaddr; |
375 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) | 419 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) |
376 | elf_type |= MAP_FIXED; | 420 | elf_type |= MAP_FIXED; |
421 | else if (no_base && interp_elf_ex->e_type == ET_DYN) | ||
422 | load_addr = -vaddr; | ||
377 | 423 | ||
378 | map_addr = elf_map(interpreter, load_addr + vaddr, | 424 | map_addr = elf_map(interpreter, load_addr + vaddr, |
379 | eppnt, elf_prot, elf_type); | 425 | eppnt, elf_prot, elf_type, total_size); |
426 | total_size = 0; | ||
427 | if (!*interp_map_addr) | ||
428 | *interp_map_addr = map_addr; | ||
380 | error = map_addr; | 429 | error = map_addr; |
381 | if (BAD_ADDR(map_addr)) | 430 | if (BAD_ADDR(map_addr)) |
382 | goto out_close; | 431 | goto out_close; |
@@ -442,8 +491,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
442 | goto out_close; | 491 | goto out_close; |
443 | } | 492 | } |
444 | 493 | ||
445 | *interp_load_addr = load_addr; | 494 | error = load_addr; |
446 | error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; | ||
447 | 495 | ||
448 | out_close: | 496 | out_close: |
449 | kfree(elf_phdata); | 497 | kfree(elf_phdata); |
@@ -540,7 +588,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
540 | int elf_exec_fileno; | 588 | int elf_exec_fileno; |
541 | int retval, i; | 589 | int retval, i; |
542 | unsigned int size; | 590 | unsigned int size; |
543 | unsigned long elf_entry, interp_load_addr = 0; | 591 | unsigned long elf_entry; |
592 | unsigned long interp_load_addr = 0; | ||
544 | unsigned long start_code, end_code, start_data, end_data; | 593 | unsigned long start_code, end_code, start_data, end_data; |
545 | unsigned long reloc_func_desc = 0; | 594 | unsigned long reloc_func_desc = 0; |
546 | char passed_fileno[6]; | 595 | char passed_fileno[6]; |
@@ -808,9 +857,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
808 | current->mm->start_stack = bprm->p; | 857 | current->mm->start_stack = bprm->p; |
809 | 858 | ||
810 | /* Now we do a little grungy work by mmaping the ELF image into | 859 | /* Now we do a little grungy work by mmaping the ELF image into |
811 | the correct location in memory. At this point, we assume that | 860 | the correct location in memory. */ |
812 | the image should be loaded at fixed address, not at a variable | ||
813 | address. */ | ||
814 | for(i = 0, elf_ppnt = elf_phdata; | 861 | for(i = 0, elf_ppnt = elf_phdata; |
815 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 862 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
816 | int elf_prot = 0, elf_flags; | 863 | int elf_prot = 0, elf_flags; |
@@ -864,11 +911,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
864 | * default mmap base, as well as whatever program they | 911 | * default mmap base, as well as whatever program they |
865 | * might try to exec. This is because the brk will | 912 | * might try to exec. This is because the brk will |
866 | * follow the loader, and is not movable. */ | 913 | * follow the loader, and is not movable. */ |
914 | #ifdef CONFIG_X86 | ||
915 | load_bias = 0; | ||
916 | #else | ||
867 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 917 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); |
918 | #endif | ||
868 | } | 919 | } |
869 | 920 | ||
870 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 921 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
871 | elf_prot, elf_flags); | 922 | elf_prot, elf_flags,0); |
872 | if (BAD_ADDR(error)) { | 923 | if (BAD_ADDR(error)) { |
873 | send_sig(SIGKILL, current, 0); | 924 | send_sig(SIGKILL, current, 0); |
874 | retval = IS_ERR((void *)error) ? | 925 | retval = IS_ERR((void *)error) ? |
@@ -944,13 +995,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
944 | } | 995 | } |
945 | 996 | ||
946 | if (elf_interpreter) { | 997 | if (elf_interpreter) { |
947 | if (interpreter_type == INTERPRETER_AOUT) | 998 | if (interpreter_type == INTERPRETER_AOUT) { |
948 | elf_entry = load_aout_interp(&loc->interp_ex, | 999 | elf_entry = load_aout_interp(&loc->interp_ex, |
949 | interpreter); | 1000 | interpreter); |
950 | else | 1001 | } else { |
1002 | unsigned long uninitialized_var(interp_map_addr); | ||
1003 | |||
951 | elf_entry = load_elf_interp(&loc->interp_elf_ex, | 1004 | elf_entry = load_elf_interp(&loc->interp_elf_ex, |
952 | interpreter, | 1005 | interpreter, |
953 | &interp_load_addr); | 1006 | &interp_map_addr, |
1007 | load_bias); | ||
1008 | if (!BAD_ADDR(elf_entry)) { | ||
1009 | /* | ||
1010 | * load_elf_interp() returns relocation | ||
1011 | * adjustment | ||
1012 | */ | ||
1013 | interp_load_addr = elf_entry; | ||
1014 | elf_entry += loc->interp_elf_ex.e_entry; | ||
1015 | } | ||
1016 | } | ||
954 | if (BAD_ADDR(elf_entry)) { | 1017 | if (BAD_ADDR(elf_entry)) { |
955 | force_sig(SIGSEGV, current); | 1018 | force_sig(SIGSEGV, current); |
956 | retval = IS_ERR((void *)elf_entry) ? | 1019 | retval = IS_ERR((void *)elf_entry) ? |
diff --git a/fs/block_dev.c b/fs/block_dev.c index b3e9bfa748cf..3635315e3b99 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -588,12 +588,10 @@ EXPORT_SYMBOL(bdget); | |||
588 | 588 | ||
589 | long nr_blockdev_pages(void) | 589 | long nr_blockdev_pages(void) |
590 | { | 590 | { |
591 | struct list_head *p; | 591 | struct block_device *bdev; |
592 | long ret = 0; | 592 | long ret = 0; |
593 | spin_lock(&bdev_lock); | 593 | spin_lock(&bdev_lock); |
594 | list_for_each(p, &all_bdevs) { | 594 | list_for_each_entry(bdev, &all_bdevs, bd_list) { |
595 | struct block_device *bdev; | ||
596 | bdev = list_entry(p, struct block_device, bd_list); | ||
597 | ret += bdev->bd_inode->i_mapping->nrpages; | 595 | ret += bdev->bd_inode->i_mapping->nrpages; |
598 | } | 596 | } |
599 | spin_unlock(&bdev_lock); | 597 | spin_unlock(&bdev_lock); |
@@ -874,7 +872,7 @@ static struct bd_holder *find_bd_holder(struct block_device *bdev, | |||
874 | */ | 872 | */ |
875 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | 873 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) |
876 | { | 874 | { |
877 | int ret; | 875 | int err; |
878 | 876 | ||
879 | if (!bo) | 877 | if (!bo) |
880 | return -EINVAL; | 878 | return -EINVAL; |
@@ -882,15 +880,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | |||
882 | if (!bd_holder_grab_dirs(bdev, bo)) | 880 | if (!bd_holder_grab_dirs(bdev, bo)) |
883 | return -EBUSY; | 881 | return -EBUSY; |
884 | 882 | ||
885 | ret = add_symlink(bo->sdir, bo->sdev); | 883 | err = add_symlink(bo->sdir, bo->sdev); |
886 | if (ret == 0) { | 884 | if (err) |
887 | ret = add_symlink(bo->hdir, bo->hdev); | 885 | return err; |
888 | if (ret) | 886 | |
889 | del_symlink(bo->sdir, bo->sdev); | 887 | err = add_symlink(bo->hdir, bo->hdev); |
888 | if (err) { | ||
889 | del_symlink(bo->sdir, bo->sdev); | ||
890 | return err; | ||
890 | } | 891 | } |
891 | if (ret == 0) | 892 | |
892 | list_add_tail(&bo->list, &bdev->bd_holder_list); | 893 | list_add_tail(&bo->list, &bdev->bd_holder_list); |
893 | return ret; | 894 | return 0; |
894 | } | 895 | } |
895 | 896 | ||
896 | /** | 897 | /** |
@@ -948,7 +949,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev, | |||
948 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | 949 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, |
949 | struct kobject *kobj) | 950 | struct kobject *kobj) |
950 | { | 951 | { |
951 | int res; | 952 | int err; |
952 | struct bd_holder *bo, *found; | 953 | struct bd_holder *bo, *found; |
953 | 954 | ||
954 | if (!kobj) | 955 | if (!kobj) |
@@ -959,21 +960,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |||
959 | return -ENOMEM; | 960 | return -ENOMEM; |
960 | 961 | ||
961 | mutex_lock(&bdev->bd_mutex); | 962 | mutex_lock(&bdev->bd_mutex); |
962 | res = bd_claim(bdev, holder); | ||
963 | if (res == 0) { | ||
964 | found = find_bd_holder(bdev, bo); | ||
965 | if (found == NULL) { | ||
966 | res = add_bd_holder(bdev, bo); | ||
967 | if (res) | ||
968 | bd_release(bdev); | ||
969 | } | ||
970 | } | ||
971 | 963 | ||
972 | if (res || found) | 964 | err = bd_claim(bdev, holder); |
973 | free_bd_holder(bo); | 965 | if (err) |
974 | mutex_unlock(&bdev->bd_mutex); | 966 | goto fail; |
975 | 967 | ||
976 | return res; | 968 | found = find_bd_holder(bdev, bo); |
969 | if (found) | ||
970 | goto fail; | ||
971 | |||
972 | err = add_bd_holder(bdev, bo); | ||
973 | if (err) | ||
974 | bd_release(bdev); | ||
975 | else | ||
976 | bo = NULL; | ||
977 | fail: | ||
978 | mutex_unlock(&bdev->bd_mutex); | ||
979 | free_bd_holder(bo); | ||
980 | return err; | ||
977 | } | 981 | } |
978 | 982 | ||
979 | /** | 983 | /** |
@@ -987,15 +991,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |||
987 | static void bd_release_from_kobject(struct block_device *bdev, | 991 | static void bd_release_from_kobject(struct block_device *bdev, |
988 | struct kobject *kobj) | 992 | struct kobject *kobj) |
989 | { | 993 | { |
990 | struct bd_holder *bo; | ||
991 | |||
992 | if (!kobj) | 994 | if (!kobj) |
993 | return; | 995 | return; |
994 | 996 | ||
995 | mutex_lock(&bdev->bd_mutex); | 997 | mutex_lock(&bdev->bd_mutex); |
996 | bd_release(bdev); | 998 | bd_release(bdev); |
997 | if ((bo = del_bd_holder(bdev, kobj))) | 999 | free_bd_holder(del_bd_holder(bdev, kobj)); |
998 | free_bd_holder(bo); | ||
999 | mutex_unlock(&bdev->bd_mutex); | 1000 | mutex_unlock(&bdev->bd_mutex); |
1000 | } | 1001 | } |
1001 | 1002 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index aa68206bd517..0f9006714230 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -356,7 +356,7 @@ static void free_more_memory(void) | |||
356 | for_each_online_pgdat(pgdat) { | 356 | for_each_online_pgdat(pgdat) { |
357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; | 357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; |
358 | if (*zones) | 358 | if (*zones) |
359 | try_to_free_pages(zones, GFP_NOFS); | 359 | try_to_free_pages(zones, 0, GFP_NOFS); |
360 | } | 360 | } |
361 | } | 361 | } |
362 | 362 | ||
@@ -676,6 +676,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) | |||
676 | EXPORT_SYMBOL(mark_buffer_dirty_inode); | 676 | EXPORT_SYMBOL(mark_buffer_dirty_inode); |
677 | 677 | ||
678 | /* | 678 | /* |
679 | * Mark the page dirty, and set it dirty in the radix tree, and mark the inode | ||
680 | * dirty. | ||
681 | * | ||
682 | * If warn is true, then emit a warning if the page is not uptodate and has | ||
683 | * not been truncated. | ||
684 | */ | ||
685 | static int __set_page_dirty(struct page *page, | ||
686 | struct address_space *mapping, int warn) | ||
687 | { | ||
688 | if (unlikely(!mapping)) | ||
689 | return !TestSetPageDirty(page); | ||
690 | |||
691 | if (TestSetPageDirty(page)) | ||
692 | return 0; | ||
693 | |||
694 | write_lock_irq(&mapping->tree_lock); | ||
695 | if (page->mapping) { /* Race with truncate? */ | ||
696 | WARN_ON_ONCE(warn && !PageUptodate(page)); | ||
697 | |||
698 | if (mapping_cap_account_dirty(mapping)) { | ||
699 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
700 | task_io_account_write(PAGE_CACHE_SIZE); | ||
701 | } | ||
702 | radix_tree_tag_set(&mapping->page_tree, | ||
703 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
704 | } | ||
705 | write_unlock_irq(&mapping->tree_lock); | ||
706 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
707 | |||
708 | return 1; | ||
709 | } | ||
710 | |||
711 | /* | ||
679 | * Add a page to the dirty page list. | 712 | * Add a page to the dirty page list. |
680 | * | 713 | * |
681 | * It is a sad fact of life that this function is called from several places | 714 | * It is a sad fact of life that this function is called from several places |
@@ -702,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
702 | */ | 735 | */ |
703 | int __set_page_dirty_buffers(struct page *page) | 736 | int __set_page_dirty_buffers(struct page *page) |
704 | { | 737 | { |
705 | struct address_space * const mapping = page_mapping(page); | 738 | struct address_space *mapping = page_mapping(page); |
706 | 739 | ||
707 | if (unlikely(!mapping)) | 740 | if (unlikely(!mapping)) |
708 | return !TestSetPageDirty(page); | 741 | return !TestSetPageDirty(page); |
@@ -719,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page) | |||
719 | } | 752 | } |
720 | spin_unlock(&mapping->private_lock); | 753 | spin_unlock(&mapping->private_lock); |
721 | 754 | ||
722 | if (TestSetPageDirty(page)) | 755 | return __set_page_dirty(page, mapping, 1); |
723 | return 0; | ||
724 | |||
725 | write_lock_irq(&mapping->tree_lock); | ||
726 | if (page->mapping) { /* Race with truncate? */ | ||
727 | if (mapping_cap_account_dirty(mapping)) { | ||
728 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
729 | task_io_account_write(PAGE_CACHE_SIZE); | ||
730 | } | ||
731 | radix_tree_tag_set(&mapping->page_tree, | ||
732 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
733 | } | ||
734 | write_unlock_irq(&mapping->tree_lock); | ||
735 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
736 | return 1; | ||
737 | } | 756 | } |
738 | EXPORT_SYMBOL(__set_page_dirty_buffers); | 757 | EXPORT_SYMBOL(__set_page_dirty_buffers); |
739 | 758 | ||
@@ -982,7 +1001,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
982 | struct buffer_head *bh; | 1001 | struct buffer_head *bh; |
983 | 1002 | ||
984 | page = find_or_create_page(inode->i_mapping, index, | 1003 | page = find_or_create_page(inode->i_mapping, index, |
985 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 1004 | (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); |
986 | if (!page) | 1005 | if (!page) |
987 | return NULL; | 1006 | return NULL; |
988 | 1007 | ||
@@ -1026,11 +1045,6 @@ failed: | |||
1026 | /* | 1045 | /* |
1027 | * Create buffers for the specified block device block's page. If | 1046 | * Create buffers for the specified block device block's page. If |
1028 | * that page was dirty, the buffers are set dirty also. | 1047 | * that page was dirty, the buffers are set dirty also. |
1029 | * | ||
1030 | * Except that's a bug. Attaching dirty buffers to a dirty | ||
1031 | * blockdev's page can result in filesystem corruption, because | ||
1032 | * some of those buffers may be aliases of filesystem data. | ||
1033 | * grow_dev_page() will go BUG() if this happens. | ||
1034 | */ | 1048 | */ |
1035 | static int | 1049 | static int |
1036 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1050 | grow_buffers(struct block_device *bdev, sector_t block, int size) |
@@ -1137,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1137 | */ | 1151 | */ |
1138 | void fastcall mark_buffer_dirty(struct buffer_head *bh) | 1152 | void fastcall mark_buffer_dirty(struct buffer_head *bh) |
1139 | { | 1153 | { |
1154 | WARN_ON_ONCE(!buffer_uptodate(bh)); | ||
1140 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | 1155 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) |
1141 | __set_page_dirty_nobuffers(bh->b_page); | 1156 | __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); |
1142 | } | 1157 | } |
1143 | 1158 | ||
1144 | /* | 1159 | /* |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8b0cbf4a4ad0..bd0f2f2353ce 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -849,6 +849,7 @@ static int cifs_oplock_thread(void * dummyarg) | |||
849 | __u16 netfid; | 849 | __u16 netfid; |
850 | int rc; | 850 | int rc; |
851 | 851 | ||
852 | set_freezable(); | ||
852 | do { | 853 | do { |
853 | if (try_to_freeze()) | 854 | if (try_to_freeze()) |
854 | continue; | 855 | continue; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f4e92661b223..0a1b8bd1dfcb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -363,6 +363,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) | |||
363 | GFP_KERNEL); | 363 | GFP_KERNEL); |
364 | } | 364 | } |
365 | 365 | ||
366 | set_freezable(); | ||
366 | while (!kthread_should_stop()) { | 367 | while (!kthread_should_stop()) { |
367 | if (try_to_freeze()) | 368 | if (try_to_freeze()) |
368 | continue; | 369 | continue; |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 1d716392c3aa..96df1d51fdc3 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
@@ -29,6 +29,7 @@ | |||
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
32 | #include <linux/exportfs.h> | ||
32 | 33 | ||
33 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 34 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
34 | 35 | ||
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b44cdc96fac..e440a7b95d02 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <linux/wireless.h> | 63 | #include <linux/wireless.h> |
64 | #include <linux/atalk.h> | 64 | #include <linux/atalk.h> |
65 | #include <linux/blktrace_api.h> | 65 | #include <linux/blktrace_api.h> |
66 | #include <linux/loop.h> | ||
66 | 67 | ||
67 | #include <net/bluetooth/bluetooth.h> | 68 | #include <net/bluetooth/bluetooth.h> |
68 | #include <net/bluetooth/hci.h> | 69 | #include <net/bluetooth/hci.h> |
@@ -3489,6 +3490,9 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) | |||
3489 | 3490 | ||
3490 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) | 3491 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) |
3491 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) | 3492 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) |
3493 | |||
3494 | /* loop */ | ||
3495 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
3492 | }; | 3496 | }; |
3493 | 3497 | ||
3494 | #define IOCTL_HASHSIZE 256 | 3498 | #define IOCTL_HASHSIZE 256 |
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 7b48c034b312..3b0185fdf9a4 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h | |||
@@ -29,10 +29,11 @@ | |||
29 | 29 | ||
30 | struct configfs_dirent { | 30 | struct configfs_dirent { |
31 | atomic_t s_count; | 31 | atomic_t s_count; |
32 | int s_dependent_count; | ||
32 | struct list_head s_sibling; | 33 | struct list_head s_sibling; |
33 | struct list_head s_children; | 34 | struct list_head s_children; |
34 | struct list_head s_links; | 35 | struct list_head s_links; |
35 | void * s_element; | 36 | void * s_element; |
36 | int s_type; | 37 | int s_type; |
37 | umode_t s_mode; | 38 | umode_t s_mode; |
38 | struct dentry * s_dentry; | 39 | struct dentry * s_dentry; |
@@ -41,8 +42,8 @@ struct configfs_dirent { | |||
41 | 42 | ||
42 | #define CONFIGFS_ROOT 0x0001 | 43 | #define CONFIGFS_ROOT 0x0001 |
43 | #define CONFIGFS_DIR 0x0002 | 44 | #define CONFIGFS_DIR 0x0002 |
44 | #define CONFIGFS_ITEM_ATTR 0x0004 | 45 | #define CONFIGFS_ITEM_ATTR 0x0004 |
45 | #define CONFIGFS_ITEM_LINK 0x0020 | 46 | #define CONFIGFS_ITEM_LINK 0x0020 |
46 | #define CONFIGFS_USET_DIR 0x0040 | 47 | #define CONFIGFS_USET_DIR 0x0040 |
47 | #define CONFIGFS_USET_DEFAULT 0x0080 | 48 | #define CONFIGFS_USET_DEFAULT 0x0080 |
48 | #define CONFIGFS_USET_DROPPING 0x0100 | 49 | #define CONFIGFS_USET_DROPPING 0x0100 |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5e6e37e58f36..2f436d4f1d6d 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry) | |||
355 | /* Mark that we've taken i_mutex */ | 355 | /* Mark that we've taken i_mutex */ |
356 | sd->s_type |= CONFIGFS_USET_DROPPING; | 356 | sd->s_type |= CONFIGFS_USET_DROPPING; |
357 | 357 | ||
358 | /* | ||
359 | * Yup, recursive. If there's a problem, blame | ||
360 | * deep nesting of default_groups | ||
361 | */ | ||
358 | ret = configfs_detach_prep(sd->s_dentry); | 362 | ret = configfs_detach_prep(sd->s_dentry); |
359 | if (!ret) | 363 | if (!ret) |
360 | continue; | 364 | continue; |
@@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group) | |||
562 | 566 | ||
563 | /* | 567 | /* |
564 | * All of link_obj/unlink_obj/link_group/unlink_group require that | 568 | * All of link_obj/unlink_obj/link_group/unlink_group require that |
565 | * subsys->su_sem is held. | 569 | * subsys->su_mutex is held. |
566 | */ | 570 | */ |
567 | 571 | ||
568 | static void unlink_obj(struct config_item *item) | 572 | static void unlink_obj(struct config_item *item) |
@@ -714,6 +718,28 @@ static void configfs_detach_group(struct config_item *item) | |||
714 | } | 718 | } |
715 | 719 | ||
716 | /* | 720 | /* |
721 | * After the item has been detached from the filesystem view, we are | ||
722 | * ready to tear it out of the hierarchy. Notify the client before | ||
723 | * we do that so they can perform any cleanup that requires | ||
724 | * navigating the hierarchy. A client does not need to provide this | ||
725 | * callback. The subsystem semaphore MUST be held by the caller, and | ||
726 | * references must be valid for both items. It also assumes the | ||
727 | * caller has validated ci_type. | ||
728 | */ | ||
729 | static void client_disconnect_notify(struct config_item *parent_item, | ||
730 | struct config_item *item) | ||
731 | { | ||
732 | struct config_item_type *type; | ||
733 | |||
734 | type = parent_item->ci_type; | ||
735 | BUG_ON(!type); | ||
736 | |||
737 | if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) | ||
738 | type->ct_group_ops->disconnect_notify(to_config_group(parent_item), | ||
739 | item); | ||
740 | } | ||
741 | |||
742 | /* | ||
717 | * Drop the initial reference from make_item()/make_group() | 743 | * Drop the initial reference from make_item()/make_group() |
718 | * This function assumes that reference is held on item | 744 | * This function assumes that reference is held on item |
719 | * and that item holds a valid reference to the parent. Also, it | 745 | * and that item holds a valid reference to the parent. Also, it |
@@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item, | |||
733 | */ | 759 | */ |
734 | if (type->ct_group_ops && type->ct_group_ops->drop_item) | 760 | if (type->ct_group_ops && type->ct_group_ops->drop_item) |
735 | type->ct_group_ops->drop_item(to_config_group(parent_item), | 761 | type->ct_group_ops->drop_item(to_config_group(parent_item), |
736 | item); | 762 | item); |
737 | else | 763 | else |
738 | config_item_put(item); | 764 | config_item_put(item); |
739 | } | 765 | } |
740 | 766 | ||
767 | #ifdef DEBUG | ||
768 | static void configfs_dump_one(struct configfs_dirent *sd, int level) | ||
769 | { | ||
770 | printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); | ||
771 | |||
772 | #define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); | ||
773 | type_print(CONFIGFS_ROOT); | ||
774 | type_print(CONFIGFS_DIR); | ||
775 | type_print(CONFIGFS_ITEM_ATTR); | ||
776 | type_print(CONFIGFS_ITEM_LINK); | ||
777 | type_print(CONFIGFS_USET_DIR); | ||
778 | type_print(CONFIGFS_USET_DEFAULT); | ||
779 | type_print(CONFIGFS_USET_DROPPING); | ||
780 | #undef type_print | ||
781 | } | ||
782 | |||
783 | static int configfs_dump(struct configfs_dirent *sd, int level) | ||
784 | { | ||
785 | struct configfs_dirent *child_sd; | ||
786 | int ret = 0; | ||
787 | |||
788 | configfs_dump_one(sd, level); | ||
789 | |||
790 | if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) | ||
791 | return 0; | ||
792 | |||
793 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { | ||
794 | ret = configfs_dump(child_sd, level + 2); | ||
795 | if (ret) | ||
796 | break; | ||
797 | } | ||
798 | |||
799 | return ret; | ||
800 | } | ||
801 | #endif | ||
802 | |||
803 | |||
804 | /* | ||
805 | * configfs_depend_item() and configfs_undepend_item() | ||
806 | * | ||
807 | * WARNING: Do not call these from a configfs callback! | ||
808 | * | ||
809 | * This describes these functions and their helpers. | ||
810 | * | ||
811 | * Allow another kernel system to depend on a config_item. If this | ||
812 | * happens, the item cannot go away until the dependant can live without | ||
813 | * it. The idea is to give client modules as simple an interface as | ||
814 | * possible. When a system asks them to depend on an item, they just | ||
815 | * call configfs_depend_item(). If the item is live and the client | ||
816 | * driver is in good shape, we'll happily do the work for them. | ||
817 | * | ||
818 | * Why is the locking complex? Because configfs uses the VFS to handle | ||
819 | * all locking, but this function is called outside the normal | ||
820 | * VFS->configfs path. So it must take VFS locks to prevent the | ||
821 | * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is | ||
822 | * why you can't call these functions underneath configfs callbacks. | ||
823 | * | ||
824 | * Note, btw, that this can be called at *any* time, even when a configfs | ||
825 | * subsystem isn't registered, or when configfs is loading or unloading. | ||
826 | * Just like configfs_register_subsystem(). So we take the same | ||
827 | * precautions. We pin the filesystem. We lock each i_mutex _in_order_ | ||
828 | * on our way down the tree. If we can find the target item in the | ||
829 | * configfs tree, it must be part of the subsystem tree as well, so we | ||
830 | * do not need the subsystem semaphore. Holding the i_mutex chain locks | ||
831 | * out mkdir() and rmdir(), who might be racing us. | ||
832 | */ | ||
833 | |||
834 | /* | ||
835 | * configfs_depend_prep() | ||
836 | * | ||
837 | * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are | ||
838 | * attributes. This is similar but not the same to configfs_detach_prep(). | ||
839 | * Note that configfs_detach_prep() expects the parent to be locked when it | ||
840 | * is called, but we lock the parent *inside* configfs_depend_prep(). We | ||
841 | * do that so we can unlock it if we find nothing. | ||
842 | * | ||
843 | * Here we do a depth-first search of the dentry hierarchy looking for | ||
844 | * our object. We take i_mutex on each step of the way down. IT IS | ||
845 | * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, | ||
846 | * we'll drop the i_mutex. | ||
847 | * | ||
848 | * If the target is not found, -ENOENT is bubbled up and we have released | ||
849 | * all locks. If the target was found, the locks will be cleared by | ||
850 | * configfs_depend_rollback(). | ||
851 | * | ||
852 | * This adds a requirement that all config_items be unique! | ||
853 | * | ||
854 | * This is recursive because the locking traversal is tricky. There isn't | ||
855 | * much on the stack, though, so folks that need this function - be careful | ||
856 | * about your stack! Patches will be accepted to make it iterative. | ||
857 | */ | ||
858 | static int configfs_depend_prep(struct dentry *origin, | ||
859 | struct config_item *target) | ||
860 | { | ||
861 | struct configfs_dirent *child_sd, *sd = origin->d_fsdata; | ||
862 | int ret = 0; | ||
863 | |||
864 | BUG_ON(!origin || !sd); | ||
865 | |||
866 | /* Lock this guy on the way down */ | ||
867 | mutex_lock(&sd->s_dentry->d_inode->i_mutex); | ||
868 | if (sd->s_element == target) /* Boo-yah */ | ||
869 | goto out; | ||
870 | |||
871 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { | ||
872 | if (child_sd->s_type & CONFIGFS_DIR) { | ||
873 | ret = configfs_depend_prep(child_sd->s_dentry, | ||
874 | target); | ||
875 | if (!ret) | ||
876 | goto out; /* Child path boo-yah */ | ||
877 | } | ||
878 | } | ||
879 | |||
880 | /* We looped all our children and didn't find target */ | ||
881 | mutex_unlock(&sd->s_dentry->d_inode->i_mutex); | ||
882 | ret = -ENOENT; | ||
883 | |||
884 | out: | ||
885 | return ret; | ||
886 | } | ||
887 | |||
888 | /* | ||
889 | * This is ONLY called if configfs_depend_prep() did its job. So we can | ||
890 | * trust the entire path from item back up to origin. | ||
891 | * | ||
892 | * We walk backwards from item, unlocking each i_mutex. We finish by | ||
893 | * unlocking origin. | ||
894 | */ | ||
895 | static void configfs_depend_rollback(struct dentry *origin, | ||
896 | struct config_item *item) | ||
897 | { | ||
898 | struct dentry *dentry = item->ci_dentry; | ||
899 | |||
900 | while (dentry != origin) { | ||
901 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
902 | dentry = dentry->d_parent; | ||
903 | } | ||
904 | |||
905 | mutex_unlock(&origin->d_inode->i_mutex); | ||
906 | } | ||
907 | |||
908 | int configfs_depend_item(struct configfs_subsystem *subsys, | ||
909 | struct config_item *target) | ||
910 | { | ||
911 | int ret; | ||
912 | struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; | ||
913 | struct config_item *s_item = &subsys->su_group.cg_item; | ||
914 | |||
915 | /* | ||
916 | * Pin the configfs filesystem. This means we can safely access | ||
917 | * the root of the configfs filesystem. | ||
918 | */ | ||
919 | ret = configfs_pin_fs(); | ||
920 | if (ret) | ||
921 | return ret; | ||
922 | |||
923 | /* | ||
924 | * Next, lock the root directory. We're going to check that the | ||
925 | * subsystem is really registered, and so we need to lock out | ||
926 | * configfs_[un]register_subsystem(). | ||
927 | */ | ||
928 | mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); | ||
929 | |||
930 | root_sd = configfs_sb->s_root->d_fsdata; | ||
931 | |||
932 | list_for_each_entry(p, &root_sd->s_children, s_sibling) { | ||
933 | if (p->s_type & CONFIGFS_DIR) { | ||
934 | if (p->s_element == s_item) { | ||
935 | subsys_sd = p; | ||
936 | break; | ||
937 | } | ||
938 | } | ||
939 | } | ||
940 | |||
941 | if (!subsys_sd) { | ||
942 | ret = -ENOENT; | ||
943 | goto out_unlock_fs; | ||
944 | } | ||
945 | |||
946 | /* Ok, now we can trust subsys/s_item */ | ||
947 | |||
948 | /* Scan the tree, locking i_mutex recursively, return 0 if found */ | ||
949 | ret = configfs_depend_prep(subsys_sd->s_dentry, target); | ||
950 | if (ret) | ||
951 | goto out_unlock_fs; | ||
952 | |||
953 | /* We hold all i_mutexes from the subsystem down to the target */ | ||
954 | p = target->ci_dentry->d_fsdata; | ||
955 | p->s_dependent_count += 1; | ||
956 | |||
957 | configfs_depend_rollback(subsys_sd->s_dentry, target); | ||
958 | |||
959 | out_unlock_fs: | ||
960 | mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); | ||
961 | |||
962 | /* | ||
963 | * If we succeeded, the fs is pinned via other methods. If not, | ||
964 | * we're done with it anyway. So release_fs() is always right. | ||
965 | */ | ||
966 | configfs_release_fs(); | ||
967 | |||
968 | return ret; | ||
969 | } | ||
970 | EXPORT_SYMBOL(configfs_depend_item); | ||
971 | |||
972 | /* | ||
973 | * Release the dependent linkage. This is much simpler than | ||
974 | * configfs_depend_item() because we know that that the client driver is | ||
975 | * pinned, thus the subsystem is pinned, and therefore configfs is pinned. | ||
976 | */ | ||
977 | void configfs_undepend_item(struct configfs_subsystem *subsys, | ||
978 | struct config_item *target) | ||
979 | { | ||
980 | struct configfs_dirent *sd; | ||
981 | |||
982 | /* | ||
983 | * Since we can trust everything is pinned, we just need i_mutex | ||
984 | * on the item. | ||
985 | */ | ||
986 | mutex_lock(&target->ci_dentry->d_inode->i_mutex); | ||
987 | |||
988 | sd = target->ci_dentry->d_fsdata; | ||
989 | BUG_ON(sd->s_dependent_count < 1); | ||
990 | |||
991 | sd->s_dependent_count -= 1; | ||
992 | |||
993 | /* | ||
994 | * After this unlock, we cannot trust the item to stay alive! | ||
995 | * DO NOT REFERENCE item after this unlock. | ||
996 | */ | ||
997 | mutex_unlock(&target->ci_dentry->d_inode->i_mutex); | ||
998 | } | ||
999 | EXPORT_SYMBOL(configfs_undepend_item); | ||
741 | 1000 | ||
742 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 1001 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
743 | { | 1002 | { |
@@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
783 | 1042 | ||
784 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); | 1043 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); |
785 | 1044 | ||
786 | down(&subsys->su_sem); | 1045 | mutex_lock(&subsys->su_mutex); |
787 | group = NULL; | 1046 | group = NULL; |
788 | item = NULL; | 1047 | item = NULL; |
789 | if (type->ct_group_ops->make_group) { | 1048 | if (type->ct_group_ops->make_group) { |
@@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
797 | if (item) | 1056 | if (item) |
798 | link_obj(parent_item, item); | 1057 | link_obj(parent_item, item); |
799 | } | 1058 | } |
800 | up(&subsys->su_sem); | 1059 | mutex_unlock(&subsys->su_mutex); |
801 | 1060 | ||
802 | kfree(name); | 1061 | kfree(name); |
803 | if (!item) { | 1062 | if (!item) { |
@@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
841 | out_unlink: | 1100 | out_unlink: |
842 | if (ret) { | 1101 | if (ret) { |
843 | /* Tear down everything we built up */ | 1102 | /* Tear down everything we built up */ |
844 | down(&subsys->su_sem); | 1103 | mutex_lock(&subsys->su_mutex); |
1104 | |||
1105 | client_disconnect_notify(parent_item, item); | ||
845 | if (group) | 1106 | if (group) |
846 | unlink_group(group); | 1107 | unlink_group(group); |
847 | else | 1108 | else |
848 | unlink_obj(item); | 1109 | unlink_obj(item); |
849 | client_drop_item(parent_item, item); | 1110 | client_drop_item(parent_item, item); |
850 | up(&subsys->su_sem); | 1111 | |
1112 | mutex_unlock(&subsys->su_mutex); | ||
851 | 1113 | ||
852 | if (module_got) | 1114 | if (module_got) |
853 | module_put(owner); | 1115 | module_put(owner); |
@@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
881 | if (sd->s_type & CONFIGFS_USET_DEFAULT) | 1143 | if (sd->s_type & CONFIGFS_USET_DEFAULT) |
882 | return -EPERM; | 1144 | return -EPERM; |
883 | 1145 | ||
1146 | /* | ||
1147 | * Here's where we check for dependents. We're protected by | ||
1148 | * i_mutex. | ||
1149 | */ | ||
1150 | if (sd->s_dependent_count) | ||
1151 | return -EBUSY; | ||
1152 | |||
884 | /* Get a working ref until we have the child */ | 1153 | /* Get a working ref until we have the child */ |
885 | parent_item = configfs_get_config_item(dentry->d_parent); | 1154 | parent_item = configfs_get_config_item(dentry->d_parent); |
886 | subsys = to_config_group(parent_item)->cg_subsys; | 1155 | subsys = to_config_group(parent_item)->cg_subsys; |
@@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
910 | if (sd->s_type & CONFIGFS_USET_DIR) { | 1179 | if (sd->s_type & CONFIGFS_USET_DIR) { |
911 | configfs_detach_group(item); | 1180 | configfs_detach_group(item); |
912 | 1181 | ||
913 | down(&subsys->su_sem); | 1182 | mutex_lock(&subsys->su_mutex); |
1183 | client_disconnect_notify(parent_item, item); | ||
914 | unlink_group(to_config_group(item)); | 1184 | unlink_group(to_config_group(item)); |
915 | } else { | 1185 | } else { |
916 | configfs_detach_item(item); | 1186 | configfs_detach_item(item); |
917 | 1187 | ||
918 | down(&subsys->su_sem); | 1188 | mutex_lock(&subsys->su_mutex); |
1189 | client_disconnect_notify(parent_item, item); | ||
919 | unlink_obj(item); | 1190 | unlink_obj(item); |
920 | } | 1191 | } |
921 | 1192 | ||
922 | client_drop_item(parent_item, item); | 1193 | client_drop_item(parent_item, item); |
923 | up(&subsys->su_sem); | 1194 | mutex_unlock(&subsys->su_mutex); |
924 | 1195 | ||
925 | /* Drop our reference from above */ | 1196 | /* Drop our reference from above */ |
926 | config_item_put(item); | 1197 | config_item_put(item); |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 3527c7c6def8..a3658f9a082c 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -27,19 +27,26 @@ | |||
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/mutex.h> | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/semaphore.h> | ||
32 | 32 | ||
33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
34 | #include "configfs_internal.h" | 34 | #include "configfs_internal.h" |
35 | 35 | ||
36 | /* | ||
37 | * A simple attribute can only be 4096 characters. Why 4k? Because the | ||
38 | * original code limited it to PAGE_SIZE. That's a bad idea, though, | ||
39 | * because an attribute of 16k on ia64 won't work on x86. So we limit to | ||
40 | * 4k, our minimum common page size. | ||
41 | */ | ||
42 | #define SIMPLE_ATTR_SIZE 4096 | ||
36 | 43 | ||
37 | struct configfs_buffer { | 44 | struct configfs_buffer { |
38 | size_t count; | 45 | size_t count; |
39 | loff_t pos; | 46 | loff_t pos; |
40 | char * page; | 47 | char * page; |
41 | struct configfs_item_operations * ops; | 48 | struct configfs_item_operations * ops; |
42 | struct semaphore sem; | 49 | struct mutex mutex; |
43 | int needs_read_fill; | 50 | int needs_read_fill; |
44 | }; | 51 | }; |
45 | 52 | ||
@@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf | |||
69 | 76 | ||
70 | count = ops->show_attribute(item,attr,buffer->page); | 77 | count = ops->show_attribute(item,attr,buffer->page); |
71 | buffer->needs_read_fill = 0; | 78 | buffer->needs_read_fill = 0; |
72 | BUG_ON(count > (ssize_t)PAGE_SIZE); | 79 | BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); |
73 | if (count >= 0) | 80 | if (count >= 0) |
74 | buffer->count = count; | 81 | buffer->count = count; |
75 | else | 82 | else |
@@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
102 | struct configfs_buffer * buffer = file->private_data; | 109 | struct configfs_buffer * buffer = file->private_data; |
103 | ssize_t retval = 0; | 110 | ssize_t retval = 0; |
104 | 111 | ||
105 | down(&buffer->sem); | 112 | mutex_lock(&buffer->mutex); |
106 | if (buffer->needs_read_fill) { | 113 | if (buffer->needs_read_fill) { |
107 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) | 114 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) |
108 | goto out; | 115 | goto out; |
@@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
112 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, | 119 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, |
113 | buffer->count); | 120 | buffer->count); |
114 | out: | 121 | out: |
115 | up(&buffer->sem); | 122 | mutex_unlock(&buffer->mutex); |
116 | return retval; | 123 | return retval; |
117 | } | 124 | } |
118 | 125 | ||
@@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size | |||
137 | if (!buffer->page) | 144 | if (!buffer->page) |
138 | return -ENOMEM; | 145 | return -ENOMEM; |
139 | 146 | ||
140 | if (count >= PAGE_SIZE) | 147 | if (count >= SIMPLE_ATTR_SIZE) |
141 | count = PAGE_SIZE - 1; | 148 | count = SIMPLE_ATTR_SIZE - 1; |
142 | error = copy_from_user(buffer->page,buf,count); | 149 | error = copy_from_user(buffer->page,buf,count); |
143 | buffer->needs_read_fill = 1; | 150 | buffer->needs_read_fill = 1; |
144 | /* if buf is assumed to contain a string, terminate it by \0, | 151 | /* if buf is assumed to contain a string, terminate it by \0, |
@@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof | |||
193 | struct configfs_buffer * buffer = file->private_data; | 200 | struct configfs_buffer * buffer = file->private_data; |
194 | ssize_t len; | 201 | ssize_t len; |
195 | 202 | ||
196 | down(&buffer->sem); | 203 | mutex_lock(&buffer->mutex); |
197 | len = fill_write_buffer(buffer, buf, count); | 204 | len = fill_write_buffer(buffer, buf, count); |
198 | if (len > 0) | 205 | if (len > 0) |
199 | len = flush_write_buffer(file->f_path.dentry, buffer, count); | 206 | len = flush_write_buffer(file->f_path.dentry, buffer, count); |
200 | if (len > 0) | 207 | if (len > 0) |
201 | *ppos += len; | 208 | *ppos += len; |
202 | up(&buffer->sem); | 209 | mutex_unlock(&buffer->mutex); |
203 | return len; | 210 | return len; |
204 | } | 211 | } |
205 | 212 | ||
@@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file) | |||
253 | error = -ENOMEM; | 260 | error = -ENOMEM; |
254 | goto Enomem; | 261 | goto Enomem; |
255 | } | 262 | } |
256 | init_MUTEX(&buffer->sem); | 263 | mutex_init(&buffer->mutex); |
257 | buffer->needs_read_fill = 1; | 264 | buffer->needs_read_fill = 1; |
258 | buffer->ops = ops; | 265 | buffer->ops = ops; |
259 | file->private_data = buffer; | 266 | file->private_data = buffer; |
@@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp) | |||
292 | if (buffer) { | 299 | if (buffer) { |
293 | if (buffer->page) | 300 | if (buffer->page) |
294 | free_page((unsigned long)buffer->page); | 301 | free_page((unsigned long)buffer->page); |
302 | mutex_destroy(&buffer->mutex); | ||
295 | kfree(buffer); | 303 | kfree(buffer); |
296 | } | 304 | } |
297 | return 0; | 305 | return 0; |
diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 24421209f854..76dc4c3e5d51 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c | |||
@@ -62,7 +62,6 @@ void config_item_init(struct config_item * item) | |||
62 | * dynamically allocated string that @item->ci_name points to. | 62 | * dynamically allocated string that @item->ci_name points to. |
63 | * Otherwise, use the static @item->ci_namebuf array. | 63 | * Otherwise, use the static @item->ci_namebuf array. |
64 | */ | 64 | */ |
65 | |||
66 | int config_item_set_name(struct config_item * item, const char * fmt, ...) | 65 | int config_item_set_name(struct config_item * item, const char * fmt, ...) |
67 | { | 66 | { |
68 | int error = 0; | 67 | int error = 0; |
@@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item) | |||
139 | return item; | 138 | return item; |
140 | } | 139 | } |
141 | 140 | ||
142 | /** | 141 | static void config_item_cleanup(struct config_item * item) |
143 | * config_item_cleanup - free config_item resources. | ||
144 | * @item: item. | ||
145 | */ | ||
146 | |||
147 | void config_item_cleanup(struct config_item * item) | ||
148 | { | 142 | { |
149 | struct config_item_type * t = item->ci_type; | 143 | struct config_item_type * t = item->ci_type; |
150 | struct config_group * s = item->ci_group; | 144 | struct config_group * s = item->ci_group; |
@@ -179,39 +173,35 @@ void config_item_put(struct config_item * item) | |||
179 | kref_put(&item->ci_kref, config_item_release); | 173 | kref_put(&item->ci_kref, config_item_release); |
180 | } | 174 | } |
181 | 175 | ||
182 | |||
183 | /** | 176 | /** |
184 | * config_group_init - initialize a group for use | 177 | * config_group_init - initialize a group for use |
185 | * @k: group | 178 | * @k: group |
186 | */ | 179 | */ |
187 | |||
188 | void config_group_init(struct config_group *group) | 180 | void config_group_init(struct config_group *group) |
189 | { | 181 | { |
190 | config_item_init(&group->cg_item); | 182 | config_item_init(&group->cg_item); |
191 | INIT_LIST_HEAD(&group->cg_children); | 183 | INIT_LIST_HEAD(&group->cg_children); |
192 | } | 184 | } |
193 | 185 | ||
194 | |||
195 | /** | 186 | /** |
196 | * config_group_find_obj - search for item in group. | 187 | * config_group_find_item - search for item in group. |
197 | * @group: group we're looking in. | 188 | * @group: group we're looking in. |
198 | * @name: item's name. | 189 | * @name: item's name. |
199 | * | 190 | * |
200 | * Lock group via @group->cg_subsys, and iterate over @group->cg_list, | 191 | * Iterate over @group->cg_list, looking for a matching config_item. |
201 | * looking for a matching config_item. If matching item is found | 192 | * If matching item is found take a reference and return the item. |
202 | * take a reference and return the item. | 193 | * Caller must have locked group via @group->cg_subsys->su_mtx. |
203 | */ | 194 | */ |
204 | 195 | struct config_item *config_group_find_item(struct config_group *group, | |
205 | struct config_item * config_group_find_obj(struct config_group * group, const char * name) | 196 | const char *name) |
206 | { | 197 | { |
207 | struct list_head * entry; | 198 | struct list_head * entry; |
208 | struct config_item * ret = NULL; | 199 | struct config_item * ret = NULL; |
209 | 200 | ||
210 | /* XXX LOCKING! */ | ||
211 | list_for_each(entry,&group->cg_children) { | 201 | list_for_each(entry,&group->cg_children) { |
212 | struct config_item * item = to_item(entry); | 202 | struct config_item * item = to_item(entry); |
213 | if (config_item_name(item) && | 203 | if (config_item_name(item) && |
214 | !strcmp(config_item_name(item), name)) { | 204 | !strcmp(config_item_name(item), name)) { |
215 | ret = config_item_get(item); | 205 | ret = config_item_get(item); |
216 | break; | 206 | break; |
217 | } | 207 | } |
@@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch | |||
219 | return ret; | 209 | return ret; |
220 | } | 210 | } |
221 | 211 | ||
222 | |||
223 | EXPORT_SYMBOL(config_item_init); | 212 | EXPORT_SYMBOL(config_item_init); |
224 | EXPORT_SYMBOL(config_group_init); | 213 | EXPORT_SYMBOL(config_group_init); |
225 | EXPORT_SYMBOL(config_item_get); | 214 | EXPORT_SYMBOL(config_item_get); |
226 | EXPORT_SYMBOL(config_item_put); | 215 | EXPORT_SYMBOL(config_item_put); |
227 | EXPORT_SYMBOL(config_group_find_obj); | 216 | EXPORT_SYMBOL(config_group_find_item); |
diff --git a/fs/dcache.c b/fs/dcache.c index 0e73aa0a0e8b..cb9d05056b54 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -883,6 +883,11 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) | |||
883 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 883 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; |
884 | } | 884 | } |
885 | 885 | ||
886 | static struct shrinker dcache_shrinker = { | ||
887 | .shrink = shrink_dcache_memory, | ||
888 | .seeks = DEFAULT_SEEKS, | ||
889 | }; | ||
890 | |||
886 | /** | 891 | /** |
887 | * d_alloc - allocate a dcache entry | 892 | * d_alloc - allocate a dcache entry |
888 | * @parent: parent of entry to allocate | 893 | * @parent: parent of entry to allocate |
@@ -2115,7 +2120,7 @@ static void __init dcache_init(unsigned long mempages) | |||
2115 | dentry_cache = KMEM_CACHE(dentry, | 2120 | dentry_cache = KMEM_CACHE(dentry, |
2116 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); | 2121 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); |
2117 | 2122 | ||
2118 | set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); | 2123 | register_shrinker(&dcache_shrinker); |
2119 | 2124 | ||
2120 | /* Hash may have been set up in dcache_init_early */ | 2125 | /* Hash may have been set up in dcache_init_early */ |
2121 | if (!hashdist) | 2126 | if (!hashdist) |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 5069b2cb5a1f..2f8e3c81bc19 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | |||
133 | return len; | 133 | return len; |
134 | } | 134 | } |
135 | 135 | ||
136 | #define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ | ||
137 | .attr = { .ca_name = __stringify(_name), \ | ||
138 | .ca_mode = _mode, \ | ||
139 | .ca_owner = THIS_MODULE }, \ | ||
140 | .show = _read, \ | ||
141 | .store = _write, \ | ||
142 | } | ||
143 | |||
144 | #define CLUSTER_ATTR(name, check_zero) \ | 136 | #define CLUSTER_ATTR(name, check_zero) \ |
145 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ | 137 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ |
146 | { \ | 138 | { \ |
@@ -615,7 +607,7 @@ static struct clusters clusters_root = { | |||
615 | int dlm_config_init(void) | 607 | int dlm_config_init(void) |
616 | { | 608 | { |
617 | config_group_init(&clusters_root.subsys.su_group); | 609 | config_group_init(&clusters_root.subsys.su_group); |
618 | init_MUTEX(&clusters_root.subsys.su_sem); | 610 | mutex_init(&clusters_root.subsys.su_mutex); |
619 | return configfs_register_subsystem(&clusters_root.subsys); | 611 | return configfs_register_subsystem(&clusters_root.subsys); |
620 | } | 612 | } |
621 | 613 | ||
@@ -759,9 +751,9 @@ static struct space *get_space(char *name) | |||
759 | if (!space_list) | 751 | if (!space_list) |
760 | return NULL; | 752 | return NULL; |
761 | 753 | ||
762 | down(&space_list->cg_subsys->su_sem); | 754 | mutex_lock(&space_list->cg_subsys->su_mutex); |
763 | i = config_group_find_obj(space_list, name); | 755 | i = config_group_find_item(space_list, name); |
764 | up(&space_list->cg_subsys->su_sem); | 756 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
765 | 757 | ||
766 | return to_space(i); | 758 | return to_space(i); |
767 | } | 759 | } |
@@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
780 | if (!comm_list) | 772 | if (!comm_list) |
781 | return NULL; | 773 | return NULL; |
782 | 774 | ||
783 | down(&clusters_root.subsys.su_sem); | 775 | mutex_lock(&clusters_root.subsys.su_mutex); |
784 | 776 | ||
785 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 777 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
786 | cm = to_comm(i); | 778 | cm = to_comm(i); |
@@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
800 | break; | 792 | break; |
801 | } | 793 | } |
802 | } | 794 | } |
803 | up(&clusters_root.subsys.su_sem); | 795 | mutex_unlock(&clusters_root.subsys.su_mutex); |
804 | 796 | ||
805 | if (!found) | 797 | if (!found) |
806 | cm = NULL; | 798 | cm = NULL; |
diff --git a/fs/dquot.c b/fs/dquot.c index 8819d281500c..7e273151f589 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -538,6 +538,11 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | |||
538 | return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; | 538 | return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; |
539 | } | 539 | } |
540 | 540 | ||
541 | static struct shrinker dqcache_shrinker = { | ||
542 | .shrink = shrink_dqcache_memory, | ||
543 | .seeks = DEFAULT_SEEKS, | ||
544 | }; | ||
545 | |||
541 | /* | 546 | /* |
542 | * Put reference to dquot | 547 | * Put reference to dquot |
543 | * NOTE: If you change this function please check whether dqput_blocks() works right... | 548 | * NOTE: If you change this function please check whether dqput_blocks() works right... |
@@ -1870,7 +1875,7 @@ static int __init dquot_init(void) | |||
1870 | printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", | 1875 | printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", |
1871 | nr_hash, order, (PAGE_SIZE << order)); | 1876 | nr_hash, order, (PAGE_SIZE << order)); |
1872 | 1877 | ||
1873 | set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory); | 1878 | register_shrinker(&dqcache_shrinker); |
1874 | 1879 | ||
1875 | return 0; | 1880 | return 0; |
1876 | } | 1881 | } |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 03ea7696fe39..59375efcf39d 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb) | |||
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | 21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) |
22 | continue; | 22 | continue; |
23 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 23 | __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); |
24 | } | 24 | } |
25 | spin_unlock(&inode_lock); | 25 | spin_unlock(&inode_lock); |
26 | } | 26 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 83e94fedd4e9..e77a2ec71aa5 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -282,7 +282,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, | |||
282 | struct dentry *lower_dentry; | 282 | struct dentry *lower_dentry; |
283 | struct vfsmount *lower_mnt; | 283 | struct vfsmount *lower_mnt; |
284 | char *encoded_name; | 284 | char *encoded_name; |
285 | unsigned int encoded_namelen; | 285 | int encoded_namelen; |
286 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | 286 | struct ecryptfs_crypt_stat *crypt_stat = NULL; |
287 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | 287 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; |
288 | char *page_virt = NULL; | 288 | char *page_virt = NULL; |
@@ -473,7 +473,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, | |||
473 | struct dentry *lower_dir_dentry; | 473 | struct dentry *lower_dir_dentry; |
474 | umode_t mode; | 474 | umode_t mode; |
475 | char *encoded_symname; | 475 | char *encoded_symname; |
476 | unsigned int encoded_symlen; | 476 | int encoded_symlen; |
477 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | 477 | struct ecryptfs_crypt_stat *crypt_stat = NULL; |
478 | 478 | ||
479 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 479 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index ed4a207fe22a..5276b19423c1 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -75,6 +75,38 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
75 | return NULL; | 75 | return NULL; |
76 | } | 76 | } |
77 | 77 | ||
78 | struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | ||
79 | { | ||
80 | __u32 *objp = vobjp; | ||
81 | unsigned long ino = objp[0]; | ||
82 | __u32 generation = objp[1]; | ||
83 | struct inode *inode; | ||
84 | struct dentry *result; | ||
85 | |||
86 | if (ino == 0) | ||
87 | return ERR_PTR(-ESTALE); | ||
88 | inode = iget(sb, ino); | ||
89 | if (inode == NULL) | ||
90 | return ERR_PTR(-ENOMEM); | ||
91 | |||
92 | if (is_bad_inode(inode) || | ||
93 | (generation && inode->i_generation != generation)) { | ||
94 | result = ERR_PTR(-ESTALE); | ||
95 | goto out_iput; | ||
96 | } | ||
97 | |||
98 | result = d_alloc_anon(inode); | ||
99 | if (!result) { | ||
100 | result = ERR_PTR(-ENOMEM); | ||
101 | goto out_iput; | ||
102 | } | ||
103 | return result; | ||
104 | |||
105 | out_iput: | ||
106 | iput(inode); | ||
107 | return result; | ||
108 | } | ||
109 | |||
78 | struct dentry *efs_get_parent(struct dentry *child) | 110 | struct dentry *efs_get_parent(struct dentry *child) |
79 | { | 111 | { |
80 | struct dentry *parent; | 112 | struct dentry *parent; |
diff --git a/fs/efs/super.c b/fs/efs/super.c index e0a6839e68ae..d360c81f3a72 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/efs_fs.h> | 11 | #include <linux/efs_fs.h> |
12 | #include <linux/efs_vh.h> | 12 | #include <linux/efs_vh.h> |
13 | #include <linux/efs_fs_sb.h> | 13 | #include <linux/efs_fs_sb.h> |
14 | #include <linux/exportfs.h> | ||
14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
15 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
16 | #include <linux/vfs.h> | 17 | #include <linux/vfs.h> |
@@ -113,6 +114,7 @@ static const struct super_operations efs_superblock_operations = { | |||
113 | }; | 114 | }; |
114 | 115 | ||
115 | static struct export_operations efs_export_ops = { | 116 | static struct export_operations efs_export_ops = { |
117 | .get_dentry = efs_get_dentry, | ||
116 | .get_parent = efs_get_parent, | 118 | .get_parent = efs_get_parent, |
117 | }; | 119 | }; |
118 | 120 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index e98f6cd7200c..8adb32a9387a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -1,15 +1,45 @@ | |||
1 | 1 | ||
2 | #include <linux/exportfs.h> | ||
2 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
3 | #include <linux/file.h> | 4 | #include <linux/file.h> |
4 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/mount.h> | ||
5 | #include <linux/namei.h> | 7 | #include <linux/namei.h> |
6 | 8 | ||
7 | struct export_operations export_op_default; | 9 | #define dprintk(fmt, args...) do{}while(0) |
8 | 10 | ||
9 | #define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun) | ||
10 | 11 | ||
11 | #define dprintk(fmt, args...) do{}while(0) | 12 | static int get_name(struct dentry *dentry, char *name, |
13 | struct dentry *child); | ||
14 | |||
15 | |||
16 | static struct dentry *exportfs_get_dentry(struct super_block *sb, void *obj) | ||
17 | { | ||
18 | struct dentry *result = ERR_PTR(-ESTALE); | ||
19 | |||
20 | if (sb->s_export_op->get_dentry) { | ||
21 | result = sb->s_export_op->get_dentry(sb, obj); | ||
22 | if (!result) | ||
23 | result = ERR_PTR(-ESTALE); | ||
24 | } | ||
25 | |||
26 | return result; | ||
27 | } | ||
28 | |||
29 | static int exportfs_get_name(struct dentry *dir, char *name, | ||
30 | struct dentry *child) | ||
31 | { | ||
32 | struct export_operations *nop = dir->d_sb->s_export_op; | ||
12 | 33 | ||
34 | if (nop->get_name) | ||
35 | return nop->get_name(dir, name, child); | ||
36 | else | ||
37 | return get_name(dir, name, child); | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Check if the dentry or any of it's aliases is acceptable. | ||
42 | */ | ||
13 | static struct dentry * | 43 | static struct dentry * |
14 | find_acceptable_alias(struct dentry *result, | 44 | find_acceptable_alias(struct dentry *result, |
15 | int (*acceptable)(void *context, struct dentry *dentry), | 45 | int (*acceptable)(void *context, struct dentry *dentry), |
@@ -17,6 +47,9 @@ find_acceptable_alias(struct dentry *result, | |||
17 | { | 47 | { |
18 | struct dentry *dentry, *toput = NULL; | 48 | struct dentry *dentry, *toput = NULL; |
19 | 49 | ||
50 | if (acceptable(context, result)) | ||
51 | return result; | ||
52 | |||
20 | spin_lock(&dcache_lock); | 53 | spin_lock(&dcache_lock); |
21 | list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { | 54 | list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { |
22 | dget_locked(dentry); | 55 | dget_locked(dentry); |
@@ -37,130 +70,50 @@ find_acceptable_alias(struct dentry *result, | |||
37 | return NULL; | 70 | return NULL; |
38 | } | 71 | } |
39 | 72 | ||
40 | /** | 73 | /* |
41 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | 74 | * Find root of a disconnected subtree and return a reference to it. |
42 | * @sb: The &super_block identifying the filesystem | ||
43 | * @obj: An opaque identifier of the object to be found - passed to | ||
44 | * get_inode | ||
45 | * @parent: An optional opqaue identifier of the parent of the object. | ||
46 | * @acceptable: A function used to test possible &dentries to see if they are | ||
47 | * acceptable | ||
48 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
49 | * judge. | ||
50 | * | ||
51 | * find_exported_dentry is the central helper routine to enable file systems | ||
52 | * to provide the decode_fh() export_operation. It's main task is to take | ||
53 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
54 | * splice this into the dcache in the correct place. | ||
55 | * | ||
56 | * The decode_fh() operation provided by the filesystem should call | ||
57 | * find_exported_dentry() with the same parameters that it received except | ||
58 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
59 | * for the object and optionally its parent are passed. The default decode_fh | ||
60 | * routine passes one pointer to the start of the filehandle fragment, and | ||
61 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
62 | * take this approach, though the offset to the parent identifier may well be | ||
63 | * different. | ||
64 | * | ||
65 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
66 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
67 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
68 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
69 | * appropriate place. | ||
70 | */ | 75 | */ |
71 | 76 | static struct dentry * | |
72 | struct dentry * | 77 | find_disconnected_root(struct dentry *dentry) |
73 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
74 | int (*acceptable)(void *context, struct dentry *de), | ||
75 | void *context) | ||
76 | { | 78 | { |
77 | struct dentry *result = NULL; | 79 | dget(dentry); |
78 | struct dentry *target_dir; | 80 | spin_lock(&dentry->d_lock); |
79 | int err; | 81 | while (!IS_ROOT(dentry) && |
80 | struct export_operations *nops = sb->s_export_op; | 82 | (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { |
81 | struct dentry *alias; | 83 | struct dentry *parent = dentry->d_parent; |
82 | int noprogress; | 84 | dget(parent); |
83 | char nbuf[NAME_MAX+1]; | 85 | spin_unlock(&dentry->d_lock); |
84 | 86 | dput(dentry); | |
85 | /* | 87 | dentry = parent; |
86 | * Attempt to find the inode. | 88 | spin_lock(&dentry->d_lock); |
87 | */ | ||
88 | result = CALL(sb->s_export_op,get_dentry)(sb,obj); | ||
89 | err = -ESTALE; | ||
90 | if (result == NULL) | ||
91 | goto err_out; | ||
92 | if (IS_ERR(result)) { | ||
93 | err = PTR_ERR(result); | ||
94 | goto err_out; | ||
95 | } | 89 | } |
96 | if (S_ISDIR(result->d_inode->i_mode) && | 90 | spin_unlock(&dentry->d_lock); |
97 | (result->d_flags & DCACHE_DISCONNECTED)) { | 91 | return dentry; |
98 | /* it is an unconnected directory, we must connect it */ | 92 | } |
99 | ; | ||
100 | } else { | ||
101 | if (acceptable(context, result)) | ||
102 | return result; | ||
103 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
104 | err = -EACCES; | ||
105 | goto err_result; | ||
106 | } | ||
107 | 93 | ||
108 | alias = find_acceptable_alias(result, acceptable, context); | ||
109 | if (alias) | ||
110 | return alias; | ||
111 | } | ||
112 | |||
113 | /* It's a directory, or we are required to confirm the file's | ||
114 | * location in the tree based on the parent information | ||
115 | */ | ||
116 | dprintk("find_exported_dentry: need to look harder for %s/%d\n",sb->s_id,*(int*)obj); | ||
117 | if (S_ISDIR(result->d_inode->i_mode)) | ||
118 | target_dir = dget(result); | ||
119 | else { | ||
120 | if (parent == NULL) | ||
121 | goto err_result; | ||
122 | 94 | ||
123 | target_dir = CALL(sb->s_export_op,get_dentry)(sb,parent); | 95 | /* |
124 | if (IS_ERR(target_dir)) | 96 | * Make sure target_dir is fully connected to the dentry tree. |
125 | err = PTR_ERR(target_dir); | 97 | * |
126 | if (target_dir == NULL || IS_ERR(target_dir)) | 98 | * It may already be, as the flag isn't always updated when connection happens. |
127 | goto err_result; | 99 | */ |
128 | } | 100 | static int |
129 | /* | 101 | reconnect_path(struct super_block *sb, struct dentry *target_dir) |
130 | * Now we need to make sure that target_dir is properly connected. | 102 | { |
131 | * It may already be, as the flag isn't always updated when connection | 103 | char nbuf[NAME_MAX+1]; |
132 | * happens. | 104 | int noprogress = 0; |
133 | * So, we walk up parent links until we find a connected directory, | 105 | int err = -ESTALE; |
134 | * or we run out of directories. Then we find the parent, find | ||
135 | * the name of the child in that parent, and do a lookup. | ||
136 | * This should connect the child into the parent | ||
137 | * We then repeat. | ||
138 | */ | ||
139 | 106 | ||
140 | /* it is possible that a confused file system might not let us complete | 107 | /* |
108 | * It is possible that a confused file system might not let us complete | ||
141 | * the path to the root. For example, if get_parent returns a directory | 109 | * the path to the root. For example, if get_parent returns a directory |
142 | * in which we cannot find a name for the child. While this implies a | 110 | * in which we cannot find a name for the child. While this implies a |
143 | * very sick filesystem we don't want it to cause knfsd to spin. Hence | 111 | * very sick filesystem we don't want it to cause knfsd to spin. Hence |
144 | * the noprogress counter. If we go through the loop 10 times (2 is | 112 | * the noprogress counter. If we go through the loop 10 times (2 is |
145 | * probably enough) without getting anywhere, we just give up | 113 | * probably enough) without getting anywhere, we just give up |
146 | */ | 114 | */ |
147 | noprogress= 0; | ||
148 | while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { | 115 | while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { |
149 | struct dentry *pd = target_dir; | 116 | struct dentry *pd = find_disconnected_root(target_dir); |
150 | |||
151 | dget(pd); | ||
152 | spin_lock(&pd->d_lock); | ||
153 | while (!IS_ROOT(pd) && | ||
154 | (pd->d_parent->d_flags&DCACHE_DISCONNECTED)) { | ||
155 | struct dentry *parent = pd->d_parent; | ||
156 | |||
157 | dget(parent); | ||
158 | spin_unlock(&pd->d_lock); | ||
159 | dput(pd); | ||
160 | pd = parent; | ||
161 | spin_lock(&pd->d_lock); | ||
162 | } | ||
163 | spin_unlock(&pd->d_lock); | ||
164 | 117 | ||
165 | if (!IS_ROOT(pd)) { | 118 | if (!IS_ROOT(pd)) { |
166 | /* must have found a connected parent - great */ | 119 | /* must have found a connected parent - great */ |
@@ -175,29 +128,40 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
175 | spin_unlock(&pd->d_lock); | 128 | spin_unlock(&pd->d_lock); |
176 | noprogress = 0; | 129 | noprogress = 0; |
177 | } else { | 130 | } else { |
178 | /* we have hit the top of a disconnected path. Try | 131 | /* |
179 | * to find parent and connect | 132 | * We have hit the top of a disconnected path, try to |
180 | * note: racing with some other process renaming a | 133 | * find parent and connect. |
181 | * directory isn't much of a problem here. If someone | 134 | * |
182 | * renames the directory, it will end up properly | 135 | * Racing with some other process renaming a directory |
183 | * connected, which is what we want | 136 | * isn't much of a problem here. If someone renames |
137 | * the directory, it will end up properly connected, | ||
138 | * which is what we want | ||
139 | * | ||
140 | * Getting the parent can't be supported generically, | ||
141 | * the locking is too icky. | ||
142 | * | ||
143 | * Instead we just return EACCES. If server reboots | ||
144 | * or inodes get flushed, you lose | ||
184 | */ | 145 | */ |
185 | struct dentry *ppd; | 146 | struct dentry *ppd = ERR_PTR(-EACCES); |
186 | struct dentry *npd; | 147 | struct dentry *npd; |
187 | 148 | ||
188 | mutex_lock(&pd->d_inode->i_mutex); | 149 | mutex_lock(&pd->d_inode->i_mutex); |
189 | ppd = CALL(nops,get_parent)(pd); | 150 | if (sb->s_export_op->get_parent) |
151 | ppd = sb->s_export_op->get_parent(pd); | ||
190 | mutex_unlock(&pd->d_inode->i_mutex); | 152 | mutex_unlock(&pd->d_inode->i_mutex); |
191 | 153 | ||
192 | if (IS_ERR(ppd)) { | 154 | if (IS_ERR(ppd)) { |
193 | err = PTR_ERR(ppd); | 155 | err = PTR_ERR(ppd); |
194 | dprintk("find_exported_dentry: get_parent of %ld failed, err %d\n", | 156 | dprintk("%s: get_parent of %ld failed, err %d\n", |
195 | pd->d_inode->i_ino, err); | 157 | __FUNCTION__, pd->d_inode->i_ino, err); |
196 | dput(pd); | 158 | dput(pd); |
197 | break; | 159 | break; |
198 | } | 160 | } |
199 | dprintk("find_exported_dentry: find name of %lu in %lu\n", pd->d_inode->i_ino, ppd->d_inode->i_ino); | 161 | |
200 | err = CALL(nops,get_name)(ppd, nbuf, pd); | 162 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, |
163 | pd->d_inode->i_ino, ppd->d_inode->i_ino); | ||
164 | err = exportfs_get_name(ppd, nbuf, pd); | ||
201 | if (err) { | 165 | if (err) { |
202 | dput(ppd); | 166 | dput(ppd); |
203 | dput(pd); | 167 | dput(pd); |
@@ -208,13 +172,14 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
208 | continue; | 172 | continue; |
209 | break; | 173 | break; |
210 | } | 174 | } |
211 | dprintk("find_exported_dentry: found name: %s\n", nbuf); | 175 | dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); |
212 | mutex_lock(&ppd->d_inode->i_mutex); | 176 | mutex_lock(&ppd->d_inode->i_mutex); |
213 | npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); | 177 | npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); |
214 | mutex_unlock(&ppd->d_inode->i_mutex); | 178 | mutex_unlock(&ppd->d_inode->i_mutex); |
215 | if (IS_ERR(npd)) { | 179 | if (IS_ERR(npd)) { |
216 | err = PTR_ERR(npd); | 180 | err = PTR_ERR(npd); |
217 | dprintk("find_exported_dentry: lookup failed: %d\n", err); | 181 | dprintk("%s: lookup failed: %d\n", |
182 | __FUNCTION__, err); | ||
218 | dput(ppd); | 183 | dput(ppd); |
219 | dput(pd); | 184 | dput(pd); |
220 | break; | 185 | break; |
@@ -227,7 +192,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
227 | if (npd == pd) | 192 | if (npd == pd) |
228 | noprogress = 0; | 193 | noprogress = 0; |
229 | else | 194 | else |
230 | printk("find_exported_dentry: npd != pd\n"); | 195 | printk("%s: npd != pd\n", __FUNCTION__); |
231 | dput(npd); | 196 | dput(npd); |
232 | dput(ppd); | 197 | dput(ppd); |
233 | if (IS_ROOT(pd)) { | 198 | if (IS_ROOT(pd)) { |
@@ -243,15 +208,101 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
243 | /* something went wrong - oh-well */ | 208 | /* something went wrong - oh-well */ |
244 | if (!err) | 209 | if (!err) |
245 | err = -ESTALE; | 210 | err = -ESTALE; |
246 | goto err_target; | 211 | return err; |
247 | } | 212 | } |
248 | /* if we weren't after a directory, have one more step to go */ | 213 | |
249 | if (result != target_dir) { | 214 | return 0; |
250 | struct dentry *nresult; | 215 | } |
251 | err = CALL(nops,get_name)(target_dir, nbuf, result); | 216 | |
217 | /** | ||
218 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | ||
219 | * @sb: The &super_block identifying the filesystem | ||
220 | * @obj: An opaque identifier of the object to be found - passed to | ||
221 | * get_inode | ||
222 | * @parent: An optional opqaue identifier of the parent of the object. | ||
223 | * @acceptable: A function used to test possible &dentries to see if they are | ||
224 | * acceptable | ||
225 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
226 | * judge. | ||
227 | * | ||
228 | * find_exported_dentry is the central helper routine to enable file systems | ||
229 | * to provide the decode_fh() export_operation. It's main task is to take | ||
230 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
231 | * splice this into the dcache in the correct place. | ||
232 | * | ||
233 | * The decode_fh() operation provided by the filesystem should call | ||
234 | * find_exported_dentry() with the same parameters that it received except | ||
235 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
236 | * for the object and optionally its parent are passed. The default decode_fh | ||
237 | * routine passes one pointer to the start of the filehandle fragment, and | ||
238 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
239 | * take this approach, though the offset to the parent identifier may well be | ||
240 | * different. | ||
241 | * | ||
242 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
243 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
244 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
245 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
246 | * appropriate place. | ||
247 | */ | ||
248 | |||
249 | struct dentry * | ||
250 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
251 | int (*acceptable)(void *context, struct dentry *de), | ||
252 | void *context) | ||
253 | { | ||
254 | struct dentry *result, *alias; | ||
255 | int err = -ESTALE; | ||
256 | |||
257 | /* | ||
258 | * Attempt to find the inode. | ||
259 | */ | ||
260 | result = exportfs_get_dentry(sb, obj); | ||
261 | if (IS_ERR(result)) | ||
262 | return result; | ||
263 | |||
264 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
265 | if (!(result->d_flags & DCACHE_DISCONNECTED)) { | ||
266 | if (acceptable(context, result)) | ||
267 | return result; | ||
268 | err = -EACCES; | ||
269 | goto err_result; | ||
270 | } | ||
271 | |||
272 | err = reconnect_path(sb, result); | ||
273 | if (err) | ||
274 | goto err_result; | ||
275 | } else { | ||
276 | struct dentry *target_dir, *nresult; | ||
277 | char nbuf[NAME_MAX+1]; | ||
278 | |||
279 | alias = find_acceptable_alias(result, acceptable, context); | ||
280 | if (alias) | ||
281 | return alias; | ||
282 | |||
283 | if (parent == NULL) | ||
284 | goto err_result; | ||
285 | |||
286 | target_dir = exportfs_get_dentry(sb,parent); | ||
287 | if (IS_ERR(target_dir)) { | ||
288 | err = PTR_ERR(target_dir); | ||
289 | goto err_result; | ||
290 | } | ||
291 | |||
292 | err = reconnect_path(sb, target_dir); | ||
293 | if (err) { | ||
294 | dput(target_dir); | ||
295 | goto err_result; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * As we weren't after a directory, have one more step to go. | ||
300 | */ | ||
301 | err = exportfs_get_name(target_dir, nbuf, result); | ||
252 | if (!err) { | 302 | if (!err) { |
253 | mutex_lock(&target_dir->d_inode->i_mutex); | 303 | mutex_lock(&target_dir->d_inode->i_mutex); |
254 | nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); | 304 | nresult = lookup_one_len(nbuf, target_dir, |
305 | strlen(nbuf)); | ||
255 | mutex_unlock(&target_dir->d_inode->i_mutex); | 306 | mutex_unlock(&target_dir->d_inode->i_mutex); |
256 | if (!IS_ERR(nresult)) { | 307 | if (!IS_ERR(nresult)) { |
257 | if (nresult->d_inode) { | 308 | if (nresult->d_inode) { |
@@ -261,11 +312,8 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
261 | dput(nresult); | 312 | dput(nresult); |
262 | } | 313 | } |
263 | } | 314 | } |
315 | dput(target_dir); | ||
264 | } | 316 | } |
265 | dput(target_dir); | ||
266 | /* now result is properly connected, it is our best bet */ | ||
267 | if (acceptable(context, result)) | ||
268 | return result; | ||
269 | 317 | ||
270 | alias = find_acceptable_alias(result, acceptable, context); | 318 | alias = find_acceptable_alias(result, acceptable, context); |
271 | if (alias) | 319 | if (alias) |
@@ -275,32 +323,16 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
275 | dput(result); | 323 | dput(result); |
276 | /* It might be justifiable to return ESTALE here, | 324 | /* It might be justifiable to return ESTALE here, |
277 | * but the filehandle at-least looks reasonable good | 325 | * but the filehandle at-least looks reasonable good |
278 | * and it just be a permission problem, so returning | 326 | * and it may just be a permission problem, so returning |
279 | * -EACCESS is safer | 327 | * -EACCESS is safer |
280 | */ | 328 | */ |
281 | return ERR_PTR(-EACCES); | 329 | return ERR_PTR(-EACCES); |
282 | 330 | ||
283 | err_target: | ||
284 | dput(target_dir); | ||
285 | err_result: | 331 | err_result: |
286 | dput(result); | 332 | dput(result); |
287 | err_out: | ||
288 | return ERR_PTR(err); | 333 | return ERR_PTR(err); |
289 | } | 334 | } |
290 | 335 | ||
291 | |||
292 | |||
293 | static struct dentry *get_parent(struct dentry *child) | ||
294 | { | ||
295 | /* get_parent cannot be supported generically, the locking | ||
296 | * is too icky. | ||
297 | * instead, we just return EACCES. If server reboots or inodes | ||
298 | * get flushed, you lose | ||
299 | */ | ||
300 | return ERR_PTR(-EACCES); | ||
301 | } | ||
302 | |||
303 | |||
304 | struct getdents_callback { | 336 | struct getdents_callback { |
305 | char *name; /* name that was found. It already points to a | 337 | char *name; /* name that was found. It already points to a |
306 | buffer NAME_MAX+1 is size */ | 338 | buffer NAME_MAX+1 is size */ |
@@ -390,61 +422,6 @@ out: | |||
390 | return error; | 422 | return error; |
391 | } | 423 | } |
392 | 424 | ||
393 | |||
394 | static struct dentry *export_iget(struct super_block *sb, unsigned long ino, __u32 generation) | ||
395 | { | ||
396 | |||
397 | /* iget isn't really right if the inode is currently unallocated!! | ||
398 | * This should really all be done inside each filesystem | ||
399 | * | ||
400 | * ext2fs' read_inode has been strengthed to return a bad_inode if | ||
401 | * the inode had been deleted. | ||
402 | * | ||
403 | * Currently we don't know the generation for parent directory, so | ||
404 | * a generation of 0 means "accept any" | ||
405 | */ | ||
406 | struct inode *inode; | ||
407 | struct dentry *result; | ||
408 | if (ino == 0) | ||
409 | return ERR_PTR(-ESTALE); | ||
410 | inode = iget(sb, ino); | ||
411 | if (inode == NULL) | ||
412 | return ERR_PTR(-ENOMEM); | ||
413 | if (is_bad_inode(inode) | ||
414 | || (generation && inode->i_generation != generation) | ||
415 | ) { | ||
416 | /* we didn't find the right inode.. */ | ||
417 | dprintk("fh_verify: Inode %lu, Bad count: %d %d or version %u %u\n", | ||
418 | inode->i_ino, | ||
419 | inode->i_nlink, atomic_read(&inode->i_count), | ||
420 | inode->i_generation, | ||
421 | generation); | ||
422 | |||
423 | iput(inode); | ||
424 | return ERR_PTR(-ESTALE); | ||
425 | } | ||
426 | /* now to find a dentry. | ||
427 | * If possible, get a well-connected one | ||
428 | */ | ||
429 | result = d_alloc_anon(inode); | ||
430 | if (!result) { | ||
431 | iput(inode); | ||
432 | return ERR_PTR(-ENOMEM); | ||
433 | } | ||
434 | return result; | ||
435 | } | ||
436 | |||
437 | |||
438 | static struct dentry *get_object(struct super_block *sb, void *vobjp) | ||
439 | { | ||
440 | __u32 *objp = vobjp; | ||
441 | unsigned long ino = objp[0]; | ||
442 | __u32 generation = objp[1]; | ||
443 | |||
444 | return export_iget(sb, ino, generation); | ||
445 | } | ||
446 | |||
447 | |||
448 | /** | 425 | /** |
449 | * export_encode_fh - default export_operations->encode_fh function | 426 | * export_encode_fh - default export_operations->encode_fh function |
450 | * @dentry: the dentry to encode | 427 | * @dentry: the dentry to encode |
@@ -517,16 +494,40 @@ static struct dentry *export_decode_fh(struct super_block *sb, __u32 *fh, int fh | |||
517 | acceptable, context); | 494 | acceptable, context); |
518 | } | 495 | } |
519 | 496 | ||
520 | struct export_operations export_op_default = { | 497 | int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, |
521 | .decode_fh = export_decode_fh, | 498 | int connectable) |
522 | .encode_fh = export_encode_fh, | 499 | { |
500 | struct export_operations *nop = dentry->d_sb->s_export_op; | ||
501 | int error; | ||
502 | |||
503 | if (nop->encode_fh) | ||
504 | error = nop->encode_fh(dentry, fh, max_len, connectable); | ||
505 | else | ||
506 | error = export_encode_fh(dentry, fh, max_len, connectable); | ||
523 | 507 | ||
524 | .get_name = get_name, | 508 | return error; |
525 | .get_parent = get_parent, | 509 | } |
526 | .get_dentry = get_object, | 510 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); |
527 | }; | 511 | |
512 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, int fh_len, | ||
513 | int fileid_type, int (*acceptable)(void *, struct dentry *), | ||
514 | void *context) | ||
515 | { | ||
516 | struct export_operations *nop = mnt->mnt_sb->s_export_op; | ||
517 | struct dentry *result; | ||
518 | |||
519 | if (nop->decode_fh) { | ||
520 | result = nop->decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | ||
521 | acceptable, context); | ||
522 | } else { | ||
523 | result = export_decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | ||
524 | acceptable, context); | ||
525 | } | ||
526 | |||
527 | return result; | ||
528 | } | ||
529 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); | ||
528 | 530 | ||
529 | EXPORT_SYMBOL(export_op_default); | ||
530 | EXPORT_SYMBOL(find_exported_dentry); | 531 | EXPORT_SYMBOL(find_exported_dentry); |
531 | 532 | ||
532 | MODULE_LICENSE("GPL"); | 533 | MODULE_LICENSE("GPL"); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 04afeecaaef3..ab7961260c49 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -24,9 +24,9 @@ | |||
24 | #include "acl.h" | 24 | #include "acl.h" |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Called when an inode is released. Note that this is different | 27 | * Called when filp is released. This happens when all file descriptors |
28 | * from ext2_open_file: open gets called at every open, but release | 28 | * for a single struct file are closed. Note that different open() calls |
29 | * gets called only when /all/ the files are closed. | 29 | * for the same file yield different struct file structures. |
30 | */ | 30 | */ |
31 | static int ext2_release_file (struct inode * inode, struct file * filp) | 31 | static int ext2_release_file (struct inode * inode, struct file * filp) |
32 | { | 32 | { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5de5061eb331..3eefa97fe204 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/parser.h> | 25 | #include <linux/parser.h> |
26 | #include <linux/random.h> | 26 | #include <linux/random.h> |
27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
28 | #include <linux/exportfs.h> | ||
28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
29 | #include <linux/vfs.h> | 30 | #include <linux/vfs.h> |
30 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
@@ -1099,15 +1100,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
1099 | struct super_block *sb = dentry->d_sb; | 1100 | struct super_block *sb = dentry->d_sb; |
1100 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 1101 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
1101 | struct ext2_super_block *es = sbi->s_es; | 1102 | struct ext2_super_block *es = sbi->s_es; |
1102 | unsigned long overhead; | ||
1103 | int i; | ||
1104 | u64 fsid; | 1103 | u64 fsid; |
1105 | 1104 | ||
1106 | if (test_opt (sb, MINIX_DF)) | 1105 | if (test_opt (sb, MINIX_DF)) |
1107 | overhead = 0; | 1106 | sbi->s_overhead_last = 0; |
1108 | else { | 1107 | else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
1108 | unsigned long i, overhead = 0; | ||
1109 | smp_rmb(); | ||
1110 | |||
1109 | /* | 1111 | /* |
1110 | * Compute the overhead (FS structures) | 1112 | * Compute the overhead (FS structures). This is constant |
1113 | * for a given filesystem unless the number of block groups | ||
1114 | * changes so we cache the previous value until it does. | ||
1111 | */ | 1115 | */ |
1112 | 1116 | ||
1113 | /* | 1117 | /* |
@@ -1131,17 +1135,22 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
1131 | */ | 1135 | */ |
1132 | overhead += (sbi->s_groups_count * | 1136 | overhead += (sbi->s_groups_count * |
1133 | (2 + sbi->s_itb_per_group)); | 1137 | (2 + sbi->s_itb_per_group)); |
1138 | sbi->s_overhead_last = overhead; | ||
1139 | smp_wmb(); | ||
1140 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
1134 | } | 1141 | } |
1135 | 1142 | ||
1136 | buf->f_type = EXT2_SUPER_MAGIC; | 1143 | buf->f_type = EXT2_SUPER_MAGIC; |
1137 | buf->f_bsize = sb->s_blocksize; | 1144 | buf->f_bsize = sb->s_blocksize; |
1138 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; | 1145 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; |
1139 | buf->f_bfree = ext2_count_free_blocks(sb); | 1146 | buf->f_bfree = ext2_count_free_blocks(sb); |
1147 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
1140 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); | 1148 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); |
1141 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) | 1149 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) |
1142 | buf->f_bavail = 0; | 1150 | buf->f_bavail = 0; |
1143 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 1151 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
1144 | buf->f_ffree = ext2_count_free_inodes(sb); | 1152 | buf->f_ffree = ext2_count_free_inodes(sb); |
1153 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
1145 | buf->f_namelen = EXT2_NAME_LEN; | 1154 | buf->f_namelen = EXT2_NAME_LEN; |
1146 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 1155 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
1147 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 1156 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2a85ddee4740..de4e3161e479 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -3195,7 +3195,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val) | |||
3195 | */ | 3195 | */ |
3196 | 3196 | ||
3197 | journal = EXT3_JOURNAL(inode); | 3197 | journal = EXT3_JOURNAL(inode); |
3198 | if (is_journal_aborted(journal) || IS_RDONLY(inode)) | 3198 | if (is_journal_aborted(journal)) |
3199 | return -EROFS; | 3199 | return -EROFS; |
3200 | 3200 | ||
3201 | journal_lock_updates(journal); | 3201 | journal_lock_updates(journal); |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 9bb046df827a..1586807b8177 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -1019,6 +1019,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
1019 | 1019 | ||
1020 | if (!inode) | 1020 | if (!inode) |
1021 | return ERR_PTR(-EACCES); | 1021 | return ERR_PTR(-EACCES); |
1022 | |||
1023 | if (is_bad_inode(inode)) { | ||
1024 | iput(inode); | ||
1025 | return ERR_PTR(-ENOENT); | ||
1026 | } | ||
1022 | } | 1027 | } |
1023 | return d_splice_alias(inode, dentry); | 1028 | return d_splice_alias(inode, dentry); |
1024 | } | 1029 | } |
@@ -1054,6 +1059,11 @@ struct dentry *ext3_get_parent(struct dentry *child) | |||
1054 | if (!inode) | 1059 | if (!inode) |
1055 | return ERR_PTR(-EACCES); | 1060 | return ERR_PTR(-EACCES); |
1056 | 1061 | ||
1062 | if (is_bad_inode(inode)) { | ||
1063 | iput(inode); | ||
1064 | return ERR_PTR(-ENOENT); | ||
1065 | } | ||
1066 | |||
1057 | parent = d_alloc_anon(inode); | 1067 | parent = d_alloc_anon(inode); |
1058 | if (!parent) { | 1068 | if (!parent) { |
1059 | iput(inode); | 1069 | iput(inode); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6e3062913a92..4f84dc86628a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -29,12 +29,14 @@ | |||
29 | #include <linux/parser.h> | 29 | #include <linux/parser.h> |
30 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
31 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
32 | #include <linux/exportfs.h> | ||
32 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
33 | #include <linux/random.h> | 34 | #include <linux/random.h> |
34 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
35 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
36 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
37 | #include <linux/seq_file.h> | 38 | #include <linux/seq_file.h> |
39 | #include <linux/log2.h> | ||
38 | 40 | ||
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
40 | 42 | ||
@@ -459,6 +461,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) | |||
459 | 461 | ||
460 | static void ext3_destroy_inode(struct inode *inode) | 462 | static void ext3_destroy_inode(struct inode *inode) |
461 | { | 463 | { |
464 | if (!list_empty(&(EXT3_I(inode)->i_orphan))) { | ||
465 | printk("EXT3 Inode %p: orphan list check failed!\n", | ||
466 | EXT3_I(inode)); | ||
467 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | ||
468 | EXT3_I(inode), sizeof(struct ext3_inode_info), | ||
469 | false); | ||
470 | dump_stack(); | ||
471 | } | ||
462 | kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); | 472 | kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); |
463 | } | 473 | } |
464 | 474 | ||
@@ -1566,7 +1576,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1566 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); | 1576 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); |
1567 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); | 1577 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); |
1568 | if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || | 1578 | if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || |
1569 | (sbi->s_inode_size & (sbi->s_inode_size - 1)) || | 1579 | (!is_power_of_2(sbi->s_inode_size)) || |
1570 | (sbi->s_inode_size > blocksize)) { | 1580 | (sbi->s_inode_size > blocksize)) { |
1571 | printk (KERN_ERR | 1581 | printk (KERN_ERR |
1572 | "EXT3-fs: unsupported inode size: %d\n", | 1582 | "EXT3-fs: unsupported inode size: %d\n", |
@@ -2075,6 +2085,7 @@ static int ext3_create_journal(struct super_block * sb, | |||
2075 | unsigned int journal_inum) | 2085 | unsigned int journal_inum) |
2076 | { | 2086 | { |
2077 | journal_t *journal; | 2087 | journal_t *journal; |
2088 | int err; | ||
2078 | 2089 | ||
2079 | if (sb->s_flags & MS_RDONLY) { | 2090 | if (sb->s_flags & MS_RDONLY) { |
2080 | printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " | 2091 | printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " |
@@ -2082,13 +2093,15 @@ static int ext3_create_journal(struct super_block * sb, | |||
2082 | return -EROFS; | 2093 | return -EROFS; |
2083 | } | 2094 | } |
2084 | 2095 | ||
2085 | if (!(journal = ext3_get_journal(sb, journal_inum))) | 2096 | journal = ext3_get_journal(sb, journal_inum); |
2097 | if (!journal) | ||
2086 | return -EINVAL; | 2098 | return -EINVAL; |
2087 | 2099 | ||
2088 | printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", | 2100 | printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", |
2089 | journal_inum); | 2101 | journal_inum); |
2090 | 2102 | ||
2091 | if (journal_create(journal)) { | 2103 | err = journal_create(journal); |
2104 | if (err) { | ||
2092 | printk(KERN_ERR "EXT3-fs: error creating journal.\n"); | 2105 | printk(KERN_ERR "EXT3-fs: error creating journal.\n"); |
2093 | journal_destroy(journal); | 2106 | journal_destroy(journal); |
2094 | return -EIO; | 2107 | return -EIO; |
@@ -2139,12 +2152,14 @@ static void ext3_mark_recovery_complete(struct super_block * sb, | |||
2139 | 2152 | ||
2140 | journal_lock_updates(journal); | 2153 | journal_lock_updates(journal); |
2141 | journal_flush(journal); | 2154 | journal_flush(journal); |
2155 | lock_super(sb); | ||
2142 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && | 2156 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && |
2143 | sb->s_flags & MS_RDONLY) { | 2157 | sb->s_flags & MS_RDONLY) { |
2144 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 2158 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
2145 | sb->s_dirt = 0; | 2159 | sb->s_dirt = 0; |
2146 | ext3_commit_super(sb, es, 1); | 2160 | ext3_commit_super(sb, es, 1); |
2147 | } | 2161 | } |
2162 | unlock_super(sb); | ||
2148 | journal_unlock_updates(journal); | 2163 | journal_unlock_updates(journal); |
2149 | } | 2164 | } |
2150 | 2165 | ||
@@ -2333,7 +2348,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2333 | (sbi->s_mount_state & EXT3_VALID_FS)) | 2348 | (sbi->s_mount_state & EXT3_VALID_FS)) |
2334 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 2349 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
2335 | 2350 | ||
2351 | /* | ||
2352 | * We have to unlock super so that we can wait for | ||
2353 | * transactions. | ||
2354 | */ | ||
2355 | unlock_super(sb); | ||
2336 | ext3_mark_recovery_complete(sb, es); | 2356 | ext3_mark_recovery_complete(sb, es); |
2357 | lock_super(sb); | ||
2337 | } else { | 2358 | } else { |
2338 | __le32 ret; | 2359 | __le32 ret; |
2339 | if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, | 2360 | if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, |
@@ -2406,19 +2427,19 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2406 | struct super_block *sb = dentry->d_sb; | 2427 | struct super_block *sb = dentry->d_sb; |
2407 | struct ext3_sb_info *sbi = EXT3_SB(sb); | 2428 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
2408 | struct ext3_super_block *es = sbi->s_es; | 2429 | struct ext3_super_block *es = sbi->s_es; |
2409 | ext3_fsblk_t overhead; | ||
2410 | int i; | ||
2411 | u64 fsid; | 2430 | u64 fsid; |
2412 | 2431 | ||
2413 | if (test_opt (sb, MINIX_DF)) | 2432 | if (test_opt(sb, MINIX_DF)) { |
2414 | overhead = 0; | 2433 | sbi->s_overhead_last = 0; |
2415 | else { | 2434 | } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
2416 | unsigned long ngroups; | 2435 | unsigned long ngroups = sbi->s_groups_count, i; |
2417 | ngroups = EXT3_SB(sb)->s_groups_count; | 2436 | ext3_fsblk_t overhead = 0; |
2418 | smp_rmb(); | 2437 | smp_rmb(); |
2419 | 2438 | ||
2420 | /* | 2439 | /* |
2421 | * Compute the overhead (FS structures) | 2440 | * Compute the overhead (FS structures). This is constant |
2441 | * for a given filesystem unless the number of block groups | ||
2442 | * changes so we cache the previous value until it does. | ||
2422 | */ | 2443 | */ |
2423 | 2444 | ||
2424 | /* | 2445 | /* |
@@ -2442,18 +2463,23 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2442 | * Every block group has an inode bitmap, a block | 2463 | * Every block group has an inode bitmap, a block |
2443 | * bitmap, and an inode table. | 2464 | * bitmap, and an inode table. |
2444 | */ | 2465 | */ |
2445 | overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group)); | 2466 | overhead += ngroups * (2 + sbi->s_itb_per_group); |
2467 | sbi->s_overhead_last = overhead; | ||
2468 | smp_wmb(); | ||
2469 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
2446 | } | 2470 | } |
2447 | 2471 | ||
2448 | buf->f_type = EXT3_SUPER_MAGIC; | 2472 | buf->f_type = EXT3_SUPER_MAGIC; |
2449 | buf->f_bsize = sb->s_blocksize; | 2473 | buf->f_bsize = sb->s_blocksize; |
2450 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; | 2474 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; |
2451 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); | 2475 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); |
2476 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
2452 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); | 2477 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); |
2453 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) | 2478 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) |
2454 | buf->f_bavail = 0; | 2479 | buf->f_bavail = 0; |
2455 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 2480 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
2456 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); | 2481 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); |
2482 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
2457 | buf->f_namelen = EXT3_NAME_LEN; | 2483 | buf->f_namelen = EXT3_NAME_LEN; |
2458 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 2484 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
2459 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 2485 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3b64bb16c727..9de54ae48dee 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -1585,7 +1585,7 @@ allocated: | |||
1585 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | 1585 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); |
1586 | 1586 | ||
1587 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | 1587 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || |
1588 | in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | 1588 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || |
1589 | in_range(ret_block, ext4_inode_table(sb, gdp), | 1589 | in_range(ret_block, ext4_inode_table(sb, gdp), |
1590 | EXT4_SB(sb)->s_itb_per_group) || | 1590 | EXT4_SB(sb)->s_itb_per_group) || |
1591 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | 1591 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2811e5720ad0..2de339dd7554 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1017,6 +1017,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str | |||
1017 | 1017 | ||
1018 | if (!inode) | 1018 | if (!inode) |
1019 | return ERR_PTR(-EACCES); | 1019 | return ERR_PTR(-EACCES); |
1020 | |||
1021 | if (is_bad_inode(inode)) { | ||
1022 | iput(inode); | ||
1023 | return ERR_PTR(-ENOENT); | ||
1024 | } | ||
1020 | } | 1025 | } |
1021 | return d_splice_alias(inode, dentry); | 1026 | return d_splice_alias(inode, dentry); |
1022 | } | 1027 | } |
@@ -1052,6 +1057,11 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1052 | if (!inode) | 1057 | if (!inode) |
1053 | return ERR_PTR(-EACCES); | 1058 | return ERR_PTR(-EACCES); |
1054 | 1059 | ||
1060 | if (is_bad_inode(inode)) { | ||
1061 | iput(inode); | ||
1062 | return ERR_PTR(-ENOENT); | ||
1063 | } | ||
1064 | |||
1055 | parent = d_alloc_anon(inode); | 1065 | parent = d_alloc_anon(inode); |
1056 | if (!parent) { | 1066 | if (!parent) { |
1057 | iput(inode); | 1067 | iput(inode); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 175b68c60968..b806e689c4aa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/parser.h> | 29 | #include <linux/parser.h> |
30 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
31 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
32 | #include <linux/exportfs.h> | ||
32 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
33 | #include <linux/random.h> | 34 | #include <linux/random.h> |
34 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
@@ -510,6 +511,14 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
510 | 511 | ||
511 | static void ext4_destroy_inode(struct inode *inode) | 512 | static void ext4_destroy_inode(struct inode *inode) |
512 | { | 513 | { |
514 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | ||
515 | printk("EXT4 Inode %p: orphan list check failed!\n", | ||
516 | EXT4_I(inode)); | ||
517 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | ||
518 | EXT4_I(inode), sizeof(struct ext4_inode_info), | ||
519 | true); | ||
520 | dump_stack(); | ||
521 | } | ||
513 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); | 522 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); |
514 | } | 523 | } |
515 | 524 | ||
@@ -2150,6 +2159,7 @@ static int ext4_create_journal(struct super_block * sb, | |||
2150 | unsigned int journal_inum) | 2159 | unsigned int journal_inum) |
2151 | { | 2160 | { |
2152 | journal_t *journal; | 2161 | journal_t *journal; |
2162 | int err; | ||
2153 | 2163 | ||
2154 | if (sb->s_flags & MS_RDONLY) { | 2164 | if (sb->s_flags & MS_RDONLY) { |
2155 | printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " | 2165 | printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " |
@@ -2157,13 +2167,15 @@ static int ext4_create_journal(struct super_block * sb, | |||
2157 | return -EROFS; | 2167 | return -EROFS; |
2158 | } | 2168 | } |
2159 | 2169 | ||
2160 | if (!(journal = ext4_get_journal(sb, journal_inum))) | 2170 | journal = ext4_get_journal(sb, journal_inum); |
2171 | if (!journal) | ||
2161 | return -EINVAL; | 2172 | return -EINVAL; |
2162 | 2173 | ||
2163 | printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", | 2174 | printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", |
2164 | journal_inum); | 2175 | journal_inum); |
2165 | 2176 | ||
2166 | if (jbd2_journal_create(journal)) { | 2177 | err = jbd2_journal_create(journal); |
2178 | if (err) { | ||
2167 | printk(KERN_ERR "EXT4-fs: error creating journal.\n"); | 2179 | printk(KERN_ERR "EXT4-fs: error creating journal.\n"); |
2168 | jbd2_journal_destroy(journal); | 2180 | jbd2_journal_destroy(journal); |
2169 | return -EIO; | 2181 | return -EIO; |
@@ -2214,12 +2226,14 @@ static void ext4_mark_recovery_complete(struct super_block * sb, | |||
2214 | 2226 | ||
2215 | jbd2_journal_lock_updates(journal); | 2227 | jbd2_journal_lock_updates(journal); |
2216 | jbd2_journal_flush(journal); | 2228 | jbd2_journal_flush(journal); |
2229 | lock_super(sb); | ||
2217 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2230 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
2218 | sb->s_flags & MS_RDONLY) { | 2231 | sb->s_flags & MS_RDONLY) { |
2219 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2232 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
2220 | sb->s_dirt = 0; | 2233 | sb->s_dirt = 0; |
2221 | ext4_commit_super(sb, es, 1); | 2234 | ext4_commit_super(sb, es, 1); |
2222 | } | 2235 | } |
2236 | unlock_super(sb); | ||
2223 | jbd2_journal_unlock_updates(journal); | 2237 | jbd2_journal_unlock_updates(journal); |
2224 | } | 2238 | } |
2225 | 2239 | ||
@@ -2408,7 +2422,13 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) | |||
2408 | (sbi->s_mount_state & EXT4_VALID_FS)) | 2422 | (sbi->s_mount_state & EXT4_VALID_FS)) |
2409 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 2423 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
2410 | 2424 | ||
2425 | /* | ||
2426 | * We have to unlock super so that we can wait for | ||
2427 | * transactions. | ||
2428 | */ | ||
2429 | unlock_super(sb); | ||
2411 | ext4_mark_recovery_complete(sb, es); | 2430 | ext4_mark_recovery_complete(sb, es); |
2431 | lock_super(sb); | ||
2412 | } else { | 2432 | } else { |
2413 | __le32 ret; | 2433 | __le32 ret; |
2414 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2434 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -2481,19 +2501,19 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2481 | struct super_block *sb = dentry->d_sb; | 2501 | struct super_block *sb = dentry->d_sb; |
2482 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2502 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2483 | struct ext4_super_block *es = sbi->s_es; | 2503 | struct ext4_super_block *es = sbi->s_es; |
2484 | ext4_fsblk_t overhead; | ||
2485 | int i; | ||
2486 | u64 fsid; | 2504 | u64 fsid; |
2487 | 2505 | ||
2488 | if (test_opt (sb, MINIX_DF)) | 2506 | if (test_opt(sb, MINIX_DF)) { |
2489 | overhead = 0; | 2507 | sbi->s_overhead_last = 0; |
2490 | else { | 2508 | } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
2491 | unsigned long ngroups; | 2509 | unsigned long ngroups = sbi->s_groups_count, i; |
2492 | ngroups = EXT4_SB(sb)->s_groups_count; | 2510 | ext4_fsblk_t overhead = 0; |
2493 | smp_rmb(); | 2511 | smp_rmb(); |
2494 | 2512 | ||
2495 | /* | 2513 | /* |
2496 | * Compute the overhead (FS structures) | 2514 | * Compute the overhead (FS structures). This is constant |
2515 | * for a given filesystem unless the number of block groups | ||
2516 | * changes so we cache the previous value until it does. | ||
2497 | */ | 2517 | */ |
2498 | 2518 | ||
2499 | /* | 2519 | /* |
@@ -2517,18 +2537,23 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2517 | * Every block group has an inode bitmap, a block | 2537 | * Every block group has an inode bitmap, a block |
2518 | * bitmap, and an inode table. | 2538 | * bitmap, and an inode table. |
2519 | */ | 2539 | */ |
2520 | overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); | 2540 | overhead += ngroups * (2 + sbi->s_itb_per_group); |
2541 | sbi->s_overhead_last = overhead; | ||
2542 | smp_wmb(); | ||
2543 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
2521 | } | 2544 | } |
2522 | 2545 | ||
2523 | buf->f_type = EXT4_SUPER_MAGIC; | 2546 | buf->f_type = EXT4_SUPER_MAGIC; |
2524 | buf->f_bsize = sb->s_blocksize; | 2547 | buf->f_bsize = sb->s_blocksize; |
2525 | buf->f_blocks = ext4_blocks_count(es) - overhead; | 2548 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
2526 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); | 2549 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); |
2550 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
2527 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 2551 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
2528 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 2552 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
2529 | buf->f_bavail = 0; | 2553 | buf->f_bavail = 0; |
2530 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 2554 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
2531 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); | 2555 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); |
2556 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
2532 | buf->f_namelen = EXT4_NAME_LEN; | 2557 | buf->f_namelen = EXT4_NAME_LEN; |
2533 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 2558 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
2534 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 2559 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index ccf161dffb63..72cbcd61bd95 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -313,7 +313,7 @@ int fat_search_long(struct inode *inode, const unsigned char *name, | |||
313 | wchar_t bufuname[14]; | 313 | wchar_t bufuname[14]; |
314 | unsigned char xlate_len, nr_slots; | 314 | unsigned char xlate_len, nr_slots; |
315 | wchar_t *unicode = NULL; | 315 | wchar_t *unicode = NULL; |
316 | unsigned char work[8], bufname[260]; /* 256 + 4 */ | 316 | unsigned char work[MSDOS_NAME], bufname[260]; /* 256 + 4 */ |
317 | int uni_xlate = sbi->options.unicode_xlate; | 317 | int uni_xlate = sbi->options.unicode_xlate; |
318 | int utf8 = sbi->options.utf8; | 318 | int utf8 = sbi->options.utf8; |
319 | int anycase = (sbi->options.name_check != 's'); | 319 | int anycase = (sbi->options.name_check != 's'); |
@@ -351,7 +351,8 @@ parse_record: | |||
351 | if (work[0] == 0x05) | 351 | if (work[0] == 0x05) |
352 | work[0] = 0xE5; | 352 | work[0] = 0xE5; |
353 | for (i = 0, j = 0, last_u = 0; i < 8;) { | 353 | for (i = 0, j = 0, last_u = 0; i < 8;) { |
354 | if (!work[i]) break; | 354 | if (!work[i]) |
355 | break; | ||
355 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, | 356 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, |
356 | &bufuname[j++], opt_shortname, | 357 | &bufuname[j++], opt_shortname, |
357 | de->lcase & CASE_LOWER_BASE); | 358 | de->lcase & CASE_LOWER_BASE); |
@@ -365,13 +366,15 @@ parse_record: | |||
365 | } | 366 | } |
366 | j = last_u; | 367 | j = last_u; |
367 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); | 368 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); |
368 | for (i = 0; i < 3;) { | 369 | for (i = 8; i < MSDOS_NAME;) { |
369 | if (!de->ext[i]) break; | 370 | if (!work[i]) |
370 | chl = fat_shortname2uni(nls_disk, &de->ext[i], 3 - i, | 371 | break; |
372 | chl = fat_shortname2uni(nls_disk, &work[i], | ||
373 | MSDOS_NAME - i, | ||
371 | &bufuname[j++], opt_shortname, | 374 | &bufuname[j++], opt_shortname, |
372 | de->lcase & CASE_LOWER_EXT); | 375 | de->lcase & CASE_LOWER_EXT); |
373 | if (chl <= 1) { | 376 | if (chl <= 1) { |
374 | if (de->ext[i] != ' ') | 377 | if (work[i] != ' ') |
375 | last_u = j; | 378 | last_u = j; |
376 | } else { | 379 | } else { |
377 | last_u = j; | 380 | last_u = j; |
@@ -445,7 +448,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
445 | int fill_len; | 448 | int fill_len; |
446 | wchar_t bufuname[14]; | 449 | wchar_t bufuname[14]; |
447 | wchar_t *unicode = NULL; | 450 | wchar_t *unicode = NULL; |
448 | unsigned char c, work[8], bufname[56], *ptname = bufname; | 451 | unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; |
449 | unsigned long lpos, dummy, *furrfu = &lpos; | 452 | unsigned long lpos, dummy, *furrfu = &lpos; |
450 | int uni_xlate = sbi->options.unicode_xlate; | 453 | int uni_xlate = sbi->options.unicode_xlate; |
451 | int isvfat = sbi->options.isvfat; | 454 | int isvfat = sbi->options.isvfat; |
@@ -527,7 +530,8 @@ parse_record: | |||
527 | if (work[0] == 0x05) | 530 | if (work[0] == 0x05) |
528 | work[0] = 0xE5; | 531 | work[0] = 0xE5; |
529 | for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { | 532 | for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { |
530 | if (!(c = work[i])) break; | 533 | if (!(c = work[i])) |
534 | break; | ||
531 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, | 535 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, |
532 | &bufuname[j++], opt_shortname, | 536 | &bufuname[j++], opt_shortname, |
533 | de->lcase & CASE_LOWER_BASE); | 537 | de->lcase & CASE_LOWER_BASE); |
@@ -549,9 +553,10 @@ parse_record: | |||
549 | j = last_u; | 553 | j = last_u; |
550 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); | 554 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); |
551 | ptname[i++] = '.'; | 555 | ptname[i++] = '.'; |
552 | for (i2 = 0; i2 < 3;) { | 556 | for (i2 = 8; i2 < MSDOS_NAME;) { |
553 | if (!(c = de->ext[i2])) break; | 557 | if (!(c = work[i2])) |
554 | chl = fat_shortname2uni(nls_disk, &de->ext[i2], 3 - i2, | 558 | break; |
559 | chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2, | ||
555 | &bufuname[j++], opt_shortname, | 560 | &bufuname[j++], opt_shortname, |
556 | de->lcase & CASE_LOWER_EXT); | 561 | de->lcase & CASE_LOWER_EXT); |
557 | if (chl <= 1) { | 562 | if (chl <= 1) { |
@@ -563,8 +568,8 @@ parse_record: | |||
563 | } | 568 | } |
564 | } else { | 569 | } else { |
565 | last_u = j; | 570 | last_u = j; |
566 | for (chi = 0; chi < chl && i2 < 3; chi++) { | 571 | for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) { |
567 | ptname[i++] = de->ext[i2++]; | 572 | ptname[i++] = work[i2++]; |
568 | last = i; | 573 | last = i; |
569 | } | 574 | } |
570 | } | 575 | } |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index ab171ea8e869..2c1b73fb82ae 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -17,6 +17,8 @@ struct fatent_operations { | |||
17 | int (*ent_next)(struct fat_entry *); | 17 | int (*ent_next)(struct fat_entry *); |
18 | }; | 18 | }; |
19 | 19 | ||
20 | static DEFINE_SPINLOCK(fat12_entry_lock); | ||
21 | |||
20 | static void fat12_ent_blocknr(struct super_block *sb, int entry, | 22 | static void fat12_ent_blocknr(struct super_block *sb, int entry, |
21 | int *offset, sector_t *blocknr) | 23 | int *offset, sector_t *blocknr) |
22 | { | 24 | { |
@@ -116,10 +118,13 @@ static int fat12_ent_get(struct fat_entry *fatent) | |||
116 | u8 **ent12_p = fatent->u.ent12_p; | 118 | u8 **ent12_p = fatent->u.ent12_p; |
117 | int next; | 119 | int next; |
118 | 120 | ||
121 | spin_lock(&fat12_entry_lock); | ||
119 | if (fatent->entry & 1) | 122 | if (fatent->entry & 1) |
120 | next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); | 123 | next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); |
121 | else | 124 | else |
122 | next = (*ent12_p[1] << 8) | *ent12_p[0]; | 125 | next = (*ent12_p[1] << 8) | *ent12_p[0]; |
126 | spin_unlock(&fat12_entry_lock); | ||
127 | |||
123 | next &= 0x0fff; | 128 | next &= 0x0fff; |
124 | if (next >= BAD_FAT12) | 129 | if (next >= BAD_FAT12) |
125 | next = FAT_ENT_EOF; | 130 | next = FAT_ENT_EOF; |
@@ -151,6 +156,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) | |||
151 | if (new == FAT_ENT_EOF) | 156 | if (new == FAT_ENT_EOF) |
152 | new = EOF_FAT12; | 157 | new = EOF_FAT12; |
153 | 158 | ||
159 | spin_lock(&fat12_entry_lock); | ||
154 | if (fatent->entry & 1) { | 160 | if (fatent->entry & 1) { |
155 | *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); | 161 | *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); |
156 | *ent12_p[1] = new >> 4; | 162 | *ent12_p[1] = new >> 4; |
@@ -158,6 +164,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) | |||
158 | *ent12_p[0] = new & 0xff; | 164 | *ent12_p[0] = new & 0xff; |
159 | *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); | 165 | *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); |
160 | } | 166 | } |
167 | spin_unlock(&fat12_entry_lock); | ||
161 | 168 | ||
162 | mark_buffer_dirty(fatent->bhs[0]); | 169 | mark_buffer_dirty(fatent->bhs[0]); |
163 | if (fatent->nr_bhs == 2) | 170 | if (fatent->nr_bhs == 2) |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 479722d89667..0a7ddb39a593 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/mpage.h> | 21 | #include <linux/mpage.h> |
22 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
23 | #include <linux/exportfs.h> | ||
23 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
24 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
25 | #include <linux/parser.h> | 26 | #include <linux/parser.h> |
@@ -354,8 +355,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) | |||
354 | } else { /* not a directory */ | 355 | } else { /* not a directory */ |
355 | inode->i_generation |= 1; | 356 | inode->i_generation |= 1; |
356 | inode->i_mode = MSDOS_MKMODE(de->attr, | 357 | inode->i_mode = MSDOS_MKMODE(de->attr, |
357 | ((sbi->options.showexec && | 358 | ((sbi->options.showexec && !is_exec(de->name + 8)) |
358 | !is_exec(de->ext)) | ||
359 | ? S_IRUGO|S_IWUGO : S_IRWXUGO) | 359 | ? S_IRUGO|S_IWUGO : S_IRWXUGO) |
360 | & ~sbi->options.fs_fmask) | S_IFREG; | 360 | & ~sbi->options.fs_fmask) | S_IFREG; |
361 | MSDOS_I(inode)->i_start = le16_to_cpu(de->start); | 361 | MSDOS_I(inode)->i_start = le16_to_cpu(de->start); |
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h index 8a4dfef1ddad..3c96d6e63978 100644 --- a/fs/freevxfs/vxfs_dir.h +++ b/fs/freevxfs/vxfs_dir.h | |||
@@ -80,7 +80,7 @@ struct vxfs_direct { | |||
80 | * a d_name with size len. | 80 | * a d_name with size len. |
81 | */ | 81 | */ |
82 | #define VXFS_DIRPAD 4 | 82 | #define VXFS_DIRPAD 4 |
83 | #define VXFS_NAMEMIN ((int)((struct vxfs_direct *)0)->d_name) | 83 | #define VXFS_NAMEMIN offsetof(struct vxfs_direct, d_name) |
84 | #define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1)) | 84 | #define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1)) |
85 | #define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len))) | 85 | #define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len))) |
86 | 86 | ||
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index c1f44009853f..1ab3e9d73886 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/capability.h> | ||
14 | #include <linux/xattr.h> | 15 | #include <linux/xattr.h> |
15 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 99ea5659bc2c..b8312edee0e4 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/exportfs.h> | ||
14 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
15 | #include <linux/crc32.h> | 16 | #include <linux/crc32.h> |
16 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 90ebab753d30..050d29c0a5b5 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
@@ -62,8 +62,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
62 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 62 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && |
63 | (head->key_type == HFSPLUS_KEY_BINARY)) | 63 | (head->key_type == HFSPLUS_KEY_BINARY)) |
64 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 64 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
65 | else | 65 | else { |
66 | tree->keycmp = hfsplus_cat_case_cmp_key; | 66 | tree->keycmp = hfsplus_cat_case_cmp_key; |
67 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | ||
68 | } | ||
67 | } else { | 69 | } else { |
68 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 70 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
69 | goto fail_page; | 71 | goto fail_page; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 80b5682a2273..1955ee61251c 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -36,6 +36,8 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
36 | u16 type; | 36 | u16 type; |
37 | 37 | ||
38 | sb = dir->i_sb; | 38 | sb = dir->i_sb; |
39 | |||
40 | dentry->d_op = &hfsplus_dentry_operations; | ||
39 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
40 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); |
41 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 3915635b4470..d9f5eda6d039 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -150,6 +150,7 @@ struct hfsplus_sb_info { | |||
150 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 150 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 |
151 | #define HFSPLUS_SB_FORCE 0x0004 | 151 | #define HFSPLUS_SB_FORCE 0x0004 |
152 | #define HFSPLUS_SB_HFSX 0x0008 | 152 | #define HFSPLUS_SB_HFSX 0x0008 |
153 | #define HFSPLUS_SB_CASEFOLD 0x0010 | ||
153 | 154 | ||
154 | 155 | ||
155 | struct hfsplus_inode_info { | 156 | struct hfsplus_inode_info { |
@@ -321,6 +322,7 @@ void hfsplus_file_truncate(struct inode *); | |||
321 | /* inode.c */ | 322 | /* inode.c */ |
322 | extern const struct address_space_operations hfsplus_aops; | 323 | extern const struct address_space_operations hfsplus_aops; |
323 | extern const struct address_space_operations hfsplus_btree_aops; | 324 | extern const struct address_space_operations hfsplus_btree_aops; |
325 | extern struct dentry_operations hfsplus_dentry_operations; | ||
324 | 326 | ||
325 | void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); | 327 | void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); |
326 | void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); | 328 | void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); |
@@ -353,6 +355,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist | |||
353 | int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); | 355 | int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); |
354 | int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); | 356 | int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); |
355 | int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); | 357 | int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); |
358 | int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); | ||
359 | int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); | ||
356 | 360 | ||
357 | /* wrapper.c */ | 361 | /* wrapper.c */ |
358 | int hfsplus_read_wrapper(struct super_block *); | 362 | int hfsplus_read_wrapper(struct super_block *); |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 409ce5429c91..6f7c662174db 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -131,6 +131,11 @@ const struct address_space_operations hfsplus_aops = { | |||
131 | .writepages = hfsplus_writepages, | 131 | .writepages = hfsplus_writepages, |
132 | }; | 132 | }; |
133 | 133 | ||
134 | struct dentry_operations hfsplus_dentry_operations = { | ||
135 | .d_hash = hfsplus_hash_dentry, | ||
136 | .d_compare = hfsplus_compare_dentry, | ||
137 | }; | ||
138 | |||
134 | static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, | 139 | static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, |
135 | struct nameidata *nd) | 140 | struct nameidata *nd) |
136 | { | 141 | { |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ebd1b380cbbc..6d87a2a9534d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -283,11 +283,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
283 | struct nls_table *nls = NULL; | 283 | struct nls_table *nls = NULL; |
284 | int err = -EINVAL; | 284 | int err = -EINVAL; |
285 | 285 | ||
286 | sbi = kmalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); | 286 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
287 | if (!sbi) | 287 | if (!sbi) |
288 | return -ENOMEM; | 288 | return -ENOMEM; |
289 | 289 | ||
290 | memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); | ||
291 | sb->s_fs_info = sbi; | 290 | sb->s_fs_info = sbi; |
292 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 291 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); |
293 | hfsplus_fill_defaults(sbi); | 292 | hfsplus_fill_defaults(sbi); |
@@ -381,6 +380,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
381 | iput(root); | 380 | iput(root); |
382 | goto cleanup; | 381 | goto cleanup; |
383 | } | 382 | } |
383 | sb->s_root->d_op = &hfsplus_dentry_operations; | ||
384 | 384 | ||
385 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 385 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
386 | str.name = HFSP_HIDDENDIR_NAME; | 386 | str.name = HFSP_HIDDENDIR_NAME; |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 689c8bd721fb..9e10f9444b64 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
@@ -239,61 +239,201 @@ out: | |||
239 | return res; | 239 | return res; |
240 | } | 240 | } |
241 | 241 | ||
242 | int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len) | 242 | /* |
243 | * Convert one or more ASCII characters into a single unicode character. | ||
244 | * Returns the number of ASCII characters corresponding to the unicode char. | ||
245 | */ | ||
246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | ||
247 | wchar_t *uc) | ||
243 | { | 248 | { |
244 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); |
245 | int size, off, decompose; | 250 | if (size <= 0) { |
251 | *uc = '?'; | ||
252 | size = 1; | ||
253 | } | ||
254 | switch (*uc) { | ||
255 | case 0x2400: | ||
256 | *uc = 0; | ||
257 | break; | ||
258 | case ':': | ||
259 | *uc = '/'; | ||
260 | break; | ||
261 | } | ||
262 | return size; | ||
263 | } | ||
264 | |||
265 | /* Decomposes a single unicode character. */ | ||
266 | static inline u16 *decompose_unichar(wchar_t uc, int *size) | ||
267 | { | ||
268 | int off; | ||
269 | |||
270 | off = hfsplus_decompose_table[(uc >> 12) & 0xf]; | ||
271 | if (off == 0 || off == 0xffff) | ||
272 | return NULL; | ||
273 | |||
274 | off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; | ||
275 | if (!off) | ||
276 | return NULL; | ||
277 | |||
278 | off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; | ||
279 | if (!off) | ||
280 | return NULL; | ||
281 | |||
282 | off = hfsplus_decompose_table[off + (uc & 0xf)]; | ||
283 | *size = off & 3; | ||
284 | if (*size == 0) | ||
285 | return NULL; | ||
286 | return hfsplus_decompose_table + (off / 4); | ||
287 | } | ||
288 | |||
289 | int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | ||
290 | const char *astr, int len) | ||
291 | { | ||
292 | int size, dsize, decompose; | ||
293 | u16 *dstr, outlen = 0; | ||
246 | wchar_t c; | 294 | wchar_t c; |
247 | u16 outlen = 0; | ||
248 | 295 | ||
249 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); |
250 | |||
251 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
252 | size = nls->char2uni(astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
253 | if (size <= 0) { | 299 | |
254 | c = '?'; | 300 | if (decompose && (dstr = decompose_unichar(c, &dsize))) { |
255 | size = 1; | 301 | if (outlen + dsize > HFSPLUS_MAX_STRLEN) |
256 | } | ||
257 | astr += size; | ||
258 | len -= size; | ||
259 | switch (c) { | ||
260 | case 0x2400: | ||
261 | c = 0; | ||
262 | break; | ||
263 | case ':': | ||
264 | c = '/'; | ||
265 | break; | ||
266 | } | ||
267 | if (c >= 0xc0 && decompose) { | ||
268 | off = hfsplus_decompose_table[(c >> 12) & 0xf]; | ||
269 | if (!off) | ||
270 | goto done; | ||
271 | if (off == 0xffff) { | ||
272 | goto done; | ||
273 | } | ||
274 | off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)]; | ||
275 | if (!off) | ||
276 | goto done; | ||
277 | off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)]; | ||
278 | if (!off) | ||
279 | goto done; | ||
280 | off = hfsplus_decompose_table[off + (c & 0xf)]; | ||
281 | size = off & 3; | ||
282 | if (!size) | ||
283 | goto done; | ||
284 | off /= 4; | ||
285 | if (outlen + size > HFSPLUS_MAX_STRLEN) | ||
286 | break; | 302 | break; |
287 | do { | 303 | do { |
288 | ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]); | 304 | ustr->unicode[outlen++] = cpu_to_be16(*dstr++); |
289 | } while (--size > 0); | 305 | } while (--dsize > 0); |
290 | continue; | 306 | } else |
291 | } | 307 | ustr->unicode[outlen++] = cpu_to_be16(c); |
292 | done: | 308 | |
293 | ustr->unicode[outlen++] = cpu_to_be16(c); | 309 | astr += size; |
310 | len -= size; | ||
294 | } | 311 | } |
295 | ustr->length = cpu_to_be16(outlen); | 312 | ustr->length = cpu_to_be16(outlen); |
296 | if (len > 0) | 313 | if (len > 0) |
297 | return -ENAMETOOLONG; | 314 | return -ENAMETOOLONG; |
298 | return 0; | 315 | return 0; |
299 | } | 316 | } |
317 | |||
318 | /* | ||
319 | * Hash a string to an integer as appropriate for the HFS+ filesystem. | ||
320 | * Composed unicode characters are decomposed and case-folding is performed | ||
321 | * if the appropriate bits are (un)set on the superblock. | ||
322 | */ | ||
323 | int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | ||
324 | { | ||
325 | struct super_block *sb = dentry->d_sb; | ||
326 | const char *astr; | ||
327 | const u16 *dstr; | ||
328 | int casefold, decompose, size, dsize, len; | ||
329 | unsigned long hash; | ||
330 | wchar_t c; | ||
331 | u16 c2; | ||
332 | |||
333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | ||
334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | ||
335 | hash = init_name_hash(); | ||
336 | astr = str->name; | ||
337 | len = str->len; | ||
338 | while (len > 0) { | ||
339 | size = asc2unichar(sb, astr, len, &c); | ||
340 | astr += size; | ||
341 | len -= size; | ||
342 | |||
343 | if (decompose && (dstr = decompose_unichar(c, &dsize))) { | ||
344 | do { | ||
345 | c2 = *dstr++; | ||
346 | if (!casefold || (c2 = case_fold(c2))) | ||
347 | hash = partial_name_hash(c2, hash); | ||
348 | } while (--dsize > 0); | ||
349 | } else { | ||
350 | c2 = c; | ||
351 | if (!casefold || (c2 = case_fold(c2))) | ||
352 | hash = partial_name_hash(c2, hash); | ||
353 | } | ||
354 | } | ||
355 | str->hash = end_name_hash(hash); | ||
356 | |||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * Compare strings with HFS+ filename ordering. | ||
362 | * Composed unicode characters are decomposed and case-folding is performed | ||
363 | * if the appropriate bits are (un)set on the superblock. | ||
364 | */ | ||
365 | int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) | ||
366 | { | ||
367 | struct super_block *sb = dentry->d_sb; | ||
368 | int casefold, decompose, size; | ||
369 | int dsize1, dsize2, len1, len2; | ||
370 | const u16 *dstr1, *dstr2; | ||
371 | const char *astr1, *astr2; | ||
372 | u16 c1, c2; | ||
373 | wchar_t c; | ||
374 | |||
375 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | ||
376 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | ||
377 | astr1 = s1->name; | ||
378 | len1 = s1->len; | ||
379 | astr2 = s2->name; | ||
380 | len2 = s2->len; | ||
381 | dsize1 = dsize2 = 0; | ||
382 | dstr1 = dstr2 = NULL; | ||
383 | |||
384 | while (len1 > 0 && len2 > 0) { | ||
385 | if (!dsize1) { | ||
386 | size = asc2unichar(sb, astr1, len1, &c); | ||
387 | astr1 += size; | ||
388 | len1 -= size; | ||
389 | |||
390 | if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) { | ||
391 | c1 = c; | ||
392 | dstr1 = &c1; | ||
393 | dsize1 = 1; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | if (!dsize2) { | ||
398 | size = asc2unichar(sb, astr2, len2, &c); | ||
399 | astr2 += size; | ||
400 | len2 -= size; | ||
401 | |||
402 | if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) { | ||
403 | c2 = c; | ||
404 | dstr2 = &c2; | ||
405 | dsize2 = 1; | ||
406 | } | ||
407 | } | ||
408 | |||
409 | c1 = *dstr1; | ||
410 | c2 = *dstr2; | ||
411 | if (casefold) { | ||
412 | if (!(c1 = case_fold(c1))) { | ||
413 | dstr1++; | ||
414 | dsize1--; | ||
415 | continue; | ||
416 | } | ||
417 | if (!(c2 = case_fold(c2))) { | ||
418 | dstr2++; | ||
419 | dsize2--; | ||
420 | continue; | ||
421 | } | ||
422 | } | ||
423 | if (c1 < c2) | ||
424 | return -1; | ||
425 | else if (c1 > c2) | ||
426 | return 1; | ||
427 | |||
428 | dstr1++; | ||
429 | dsize1--; | ||
430 | dstr2++; | ||
431 | dsize2--; | ||
432 | } | ||
433 | |||
434 | if (len1 < len2) | ||
435 | return -1; | ||
436 | if (len1 > len2) | ||
437 | return 1; | ||
438 | return 0; | ||
439 | } | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e6b46b3ac2fe..d145cb79c30a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -13,15 +13,18 @@ | |||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
15 | #include <linux/file.h> | 15 | #include <linux/file.h> |
16 | #include <linux/kernel.h> | ||
16 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
17 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
18 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
20 | #include <linux/string.h> | 21 | #include <linux/string.h> |
21 | #include <linux/capability.h> | 22 | #include <linux/capability.h> |
23 | #include <linux/ctype.h> | ||
22 | #include <linux/backing-dev.h> | 24 | #include <linux/backing-dev.h> |
23 | #include <linux/hugetlb.h> | 25 | #include <linux/hugetlb.h> |
24 | #include <linux/pagevec.h> | 26 | #include <linux/pagevec.h> |
27 | #include <linux/parser.h> | ||
25 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
26 | #include <linux/quotaops.h> | 29 | #include <linux/quotaops.h> |
27 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
@@ -47,6 +50,21 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = { | |||
47 | 50 | ||
48 | int sysctl_hugetlb_shm_group; | 51 | int sysctl_hugetlb_shm_group; |
49 | 52 | ||
53 | enum { | ||
54 | Opt_size, Opt_nr_inodes, | ||
55 | Opt_mode, Opt_uid, Opt_gid, | ||
56 | Opt_err, | ||
57 | }; | ||
58 | |||
59 | static match_table_t tokens = { | ||
60 | {Opt_size, "size=%s"}, | ||
61 | {Opt_nr_inodes, "nr_inodes=%s"}, | ||
62 | {Opt_mode, "mode=%o"}, | ||
63 | {Opt_uid, "uid=%u"}, | ||
64 | {Opt_gid, "gid=%u"}, | ||
65 | {Opt_err, NULL}, | ||
66 | }; | ||
67 | |||
50 | static void huge_pagevec_release(struct pagevec *pvec) | 68 | static void huge_pagevec_release(struct pagevec *pvec) |
51 | { | 69 | { |
52 | int i; | 70 | int i; |
@@ -594,46 +612,73 @@ static const struct super_operations hugetlbfs_ops = { | |||
594 | static int | 612 | static int |
595 | hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | 613 | hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) |
596 | { | 614 | { |
597 | char *opt, *value, *rest; | 615 | char *p, *rest; |
616 | substring_t args[MAX_OPT_ARGS]; | ||
617 | int option; | ||
598 | 618 | ||
599 | if (!options) | 619 | if (!options) |
600 | return 0; | 620 | return 0; |
601 | while ((opt = strsep(&options, ",")) != NULL) { | 621 | |
602 | if (!*opt) | 622 | while ((p = strsep(&options, ",")) != NULL) { |
623 | int token; | ||
624 | if (!*p) | ||
603 | continue; | 625 | continue; |
604 | 626 | ||
605 | value = strchr(opt, '='); | 627 | token = match_token(p, tokens, args); |
606 | if (!value || !*value) | 628 | switch (token) { |
607 | return -EINVAL; | 629 | case Opt_uid: |
608 | else | 630 | if (match_int(&args[0], &option)) |
609 | *value++ = '\0'; | 631 | goto bad_val; |
610 | 632 | pconfig->uid = option; | |
611 | if (!strcmp(opt, "uid")) | 633 | break; |
612 | pconfig->uid = simple_strtoul(value, &value, 0); | 634 | |
613 | else if (!strcmp(opt, "gid")) | 635 | case Opt_gid: |
614 | pconfig->gid = simple_strtoul(value, &value, 0); | 636 | if (match_int(&args[0], &option)) |
615 | else if (!strcmp(opt, "mode")) | 637 | goto bad_val; |
616 | pconfig->mode = simple_strtoul(value,&value,0) & 0777U; | 638 | pconfig->gid = option; |
617 | else if (!strcmp(opt, "size")) { | 639 | break; |
618 | unsigned long long size = memparse(value, &rest); | 640 | |
641 | case Opt_mode: | ||
642 | if (match_octal(&args[0], &option)) | ||
643 | goto bad_val; | ||
644 | pconfig->mode = option & 0777U; | ||
645 | break; | ||
646 | |||
647 | case Opt_size: { | ||
648 | unsigned long long size; | ||
649 | /* memparse() will accept a K/M/G without a digit */ | ||
650 | if (!isdigit(*args[0].from)) | ||
651 | goto bad_val; | ||
652 | size = memparse(args[0].from, &rest); | ||
619 | if (*rest == '%') { | 653 | if (*rest == '%') { |
620 | size <<= HPAGE_SHIFT; | 654 | size <<= HPAGE_SHIFT; |
621 | size *= max_huge_pages; | 655 | size *= max_huge_pages; |
622 | do_div(size, 100); | 656 | do_div(size, 100); |
623 | rest++; | ||
624 | } | 657 | } |
625 | pconfig->nr_blocks = (size >> HPAGE_SHIFT); | 658 | pconfig->nr_blocks = (size >> HPAGE_SHIFT); |
626 | value = rest; | 659 | break; |
627 | } else if (!strcmp(opt,"nr_inodes")) { | 660 | } |
628 | pconfig->nr_inodes = memparse(value, &rest); | 661 | |
629 | value = rest; | 662 | case Opt_nr_inodes: |
630 | } else | 663 | /* memparse() will accept a K/M/G without a digit */ |
631 | return -EINVAL; | 664 | if (!isdigit(*args[0].from)) |
665 | goto bad_val; | ||
666 | pconfig->nr_inodes = memparse(args[0].from, &rest); | ||
667 | break; | ||
632 | 668 | ||
633 | if (*value) | 669 | default: |
670 | printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", | ||
671 | p); | ||
634 | return -EINVAL; | 672 | return -EINVAL; |
673 | break; | ||
674 | } | ||
635 | } | 675 | } |
636 | return 0; | 676 | return 0; |
677 | |||
678 | bad_val: | ||
679 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", | ||
680 | args[0].from, p); | ||
681 | return 1; | ||
637 | } | 682 | } |
638 | 683 | ||
639 | static int | 684 | static int |
@@ -651,7 +696,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | |||
651 | config.gid = current->fsgid; | 696 | config.gid = current->fsgid; |
652 | config.mode = 0755; | 697 | config.mode = 0755; |
653 | ret = hugetlbfs_parse_options(data, &config); | 698 | ret = hugetlbfs_parse_options(data, &config); |
654 | |||
655 | if (ret) | 699 | if (ret) |
656 | return ret; | 700 | return ret; |
657 | 701 | ||
diff --git a/fs/inode.c b/fs/inode.c index 9a012cc5b6cd..320e088d0b28 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -145,7 +145,7 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
145 | mapping->a_ops = &empty_aops; | 145 | mapping->a_ops = &empty_aops; |
146 | mapping->host = inode; | 146 | mapping->host = inode; |
147 | mapping->flags = 0; | 147 | mapping->flags = 0; |
148 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER); | 148 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); |
149 | mapping->assoc_mapping = NULL; | 149 | mapping->assoc_mapping = NULL; |
150 | mapping->backing_dev_info = &default_backing_dev_info; | 150 | mapping->backing_dev_info = &default_backing_dev_info; |
151 | 151 | ||
@@ -462,6 +462,11 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) | |||
462 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 462 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; |
463 | } | 463 | } |
464 | 464 | ||
465 | static struct shrinker icache_shrinker = { | ||
466 | .shrink = shrink_icache_memory, | ||
467 | .seeks = DEFAULT_SEEKS, | ||
468 | }; | ||
469 | |||
465 | static void __wait_on_freeing_inode(struct inode *inode); | 470 | static void __wait_on_freeing_inode(struct inode *inode); |
466 | /* | 471 | /* |
467 | * Called with the inode lock held. | 472 | * Called with the inode lock held. |
@@ -519,7 +524,13 @@ repeat: | |||
519 | * new_inode - obtain an inode | 524 | * new_inode - obtain an inode |
520 | * @sb: superblock | 525 | * @sb: superblock |
521 | * | 526 | * |
522 | * Allocates a new inode for given superblock. | 527 | * Allocates a new inode for given superblock. The default gfp_mask |
528 | * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. | ||
529 | * If HIGHMEM pages are unsuitable or it is known that pages allocated | ||
530 | * for the page cache are not reclaimable or migratable, | ||
531 | * mapping_set_gfp_mask() must be called with suitable flags on the | ||
532 | * newly created inode's mapping | ||
533 | * | ||
523 | */ | 534 | */ |
524 | struct inode *new_inode(struct super_block *sb) | 535 | struct inode *new_inode(struct super_block *sb) |
525 | { | 536 | { |
@@ -1379,7 +1390,7 @@ void __init inode_init(unsigned long mempages) | |||
1379 | SLAB_MEM_SPREAD), | 1390 | SLAB_MEM_SPREAD), |
1380 | init_once, | 1391 | init_once, |
1381 | NULL); | 1392 | NULL); |
1382 | set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); | 1393 | register_shrinker(&icache_shrinker); |
1383 | 1394 | ||
1384 | /* Hash may have been set up in inode_init_early */ | 1395 | /* Hash may have been set up in inode_init_early */ |
1385 | if (!hashdist) | 1396 | if (!hashdist) |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 8c90cbc903fa..c2a773e8620b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kallsyms.h> | ||
16 | 15 | ||
17 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
18 | #include <asm/ioctls.h> | 17 | #include <asm/ioctls.h> |
@@ -21,7 +20,6 @@ static long do_ioctl(struct file *filp, unsigned int cmd, | |||
21 | unsigned long arg) | 20 | unsigned long arg) |
22 | { | 21 | { |
23 | int error = -ENOTTY; | 22 | int error = -ENOTTY; |
24 | void *f; | ||
25 | 23 | ||
26 | if (!filp->f_op) | 24 | if (!filp->f_op) |
27 | goto out; | 25 | goto out; |
@@ -31,16 +29,10 @@ static long do_ioctl(struct file *filp, unsigned int cmd, | |||
31 | if (error == -ENOIOCTLCMD) | 29 | if (error == -ENOIOCTLCMD) |
32 | error = -EINVAL; | 30 | error = -EINVAL; |
33 | goto out; | 31 | goto out; |
34 | } else if ((f = filp->f_op->ioctl)) { | 32 | } else if (filp->f_op->ioctl) { |
35 | lock_kernel(); | 33 | lock_kernel(); |
36 | if (!filp->f_op->ioctl) { | 34 | error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, |
37 | printk("%s: ioctl %p disappeared\n", __FUNCTION__, f); | 35 | filp, cmd, arg); |
38 | print_symbol("symbol: %s\n", (unsigned long)f); | ||
39 | dump_stack(); | ||
40 | } else { | ||
41 | error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, | ||
42 | filp, cmd, arg); | ||
43 | } | ||
44 | unlock_kernel(); | 36 | unlock_kernel(); |
45 | } | 37 | } |
46 | 38 | ||
@@ -182,11 +174,3 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) | |||
182 | out: | 174 | out: |
183 | return error; | 175 | return error; |
184 | } | 176 | } |
185 | |||
186 | /* | ||
187 | * Platforms implementing 32 bit compatibility ioctl handlers in | ||
188 | * modules need this exported | ||
189 | */ | ||
190 | #ifdef CONFIG_COMPAT | ||
191 | EXPORT_SYMBOL(sys_ioctl); | ||
192 | #endif | ||
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 0e94c31cad9b..1ba407c64df1 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
@@ -7,34 +7,18 @@ | |||
7 | * | 7 | * |
8 | * Steve Beynon : Missing last directory entries fixed | 8 | * Steve Beynon : Missing last directory entries fixed |
9 | * (stephen@askone.demon.co.uk) : 21st June 1996 | 9 | * (stephen@askone.demon.co.uk) : 21st June 1996 |
10 | * | 10 | * |
11 | * isofs directory handling functions | 11 | * isofs directory handling functions |
12 | */ | 12 | */ |
13 | #include <linux/smp_lock.h> | 13 | #include <linux/smp_lock.h> |
14 | #include "isofs.h" | 14 | #include "isofs.h" |
15 | 15 | ||
16 | static int isofs_readdir(struct file *, void *, filldir_t); | ||
17 | |||
18 | const struct file_operations isofs_dir_operations = | ||
19 | { | ||
20 | .read = generic_read_dir, | ||
21 | .readdir = isofs_readdir, | ||
22 | }; | ||
23 | |||
24 | /* | ||
25 | * directories can handle most operations... | ||
26 | */ | ||
27 | const struct inode_operations isofs_dir_inode_operations = | ||
28 | { | ||
29 | .lookup = isofs_lookup, | ||
30 | }; | ||
31 | |||
32 | int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) | 16 | int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) |
33 | { | 17 | { |
34 | char * old = de->name; | 18 | char * old = de->name; |
35 | int len = de->name_len[0]; | 19 | int len = de->name_len[0]; |
36 | int i; | 20 | int i; |
37 | 21 | ||
38 | for (i = 0; i < len; i++) { | 22 | for (i = 0; i < len; i++) { |
39 | unsigned char c = old[i]; | 23 | unsigned char c = old[i]; |
40 | if (!c) | 24 | if (!c) |
@@ -62,22 +46,27 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod | |||
62 | } | 46 | } |
63 | 47 | ||
64 | /* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */ | 48 | /* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */ |
65 | int get_acorn_filename(struct iso_directory_record * de, | 49 | int get_acorn_filename(struct iso_directory_record *de, |
66 | char * retname, struct inode * inode) | 50 | char *retname, struct inode *inode) |
67 | { | 51 | { |
68 | int std; | 52 | int std; |
69 | unsigned char * chr; | 53 | unsigned char *chr; |
70 | int retnamlen = isofs_name_translate(de, retname, inode); | 54 | int retnamlen = isofs_name_translate(de, retname, inode); |
71 | if (retnamlen == 0) return 0; | 55 | |
56 | if (retnamlen == 0) | ||
57 | return 0; | ||
72 | std = sizeof(struct iso_directory_record) + de->name_len[0]; | 58 | std = sizeof(struct iso_directory_record) + de->name_len[0]; |
73 | if (std & 1) std++; | 59 | if (std & 1) |
74 | if ((*((unsigned char *) de) - std) != 32) return retnamlen; | 60 | std++; |
61 | if ((*((unsigned char *) de) - std) != 32) | ||
62 | return retnamlen; | ||
75 | chr = ((unsigned char *) de) + std; | 63 | chr = ((unsigned char *) de) + std; |
76 | if (strncmp(chr, "ARCHIMEDES", 10)) return retnamlen; | 64 | if (strncmp(chr, "ARCHIMEDES", 10)) |
77 | if ((*retname == '_') && ((chr[19] & 1) == 1)) *retname = '!'; | 65 | return retnamlen; |
66 | if ((*retname == '_') && ((chr[19] & 1) == 1)) | ||
67 | *retname = '!'; | ||
78 | if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) | 68 | if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) |
79 | && ((chr[12] & 0xf0) == 0xf0)) | 69 | && ((chr[12] & 0xf0) == 0xf0)) { |
80 | { | ||
81 | retname[retnamlen] = ','; | 70 | retname[retnamlen] = ','; |
82 | sprintf(retname+retnamlen+1, "%3.3x", | 71 | sprintf(retname+retnamlen+1, "%3.3x", |
83 | ((chr[12] & 0xf) << 8) | chr[11]); | 72 | ((chr[12] & 0xf) << 8) | chr[11]); |
@@ -91,7 +80,7 @@ int get_acorn_filename(struct iso_directory_record * de, | |||
91 | */ | 80 | */ |
92 | static int do_isofs_readdir(struct inode *inode, struct file *filp, | 81 | static int do_isofs_readdir(struct inode *inode, struct file *filp, |
93 | void *dirent, filldir_t filldir, | 82 | void *dirent, filldir_t filldir, |
94 | char * tmpname, struct iso_directory_record * tmpde) | 83 | char *tmpname, struct iso_directory_record *tmpde) |
95 | { | 84 | { |
96 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
97 | unsigned char bufbits = ISOFS_BUFFER_BITS(inode); | 86 | unsigned char bufbits = ISOFS_BUFFER_BITS(inode); |
@@ -121,9 +110,11 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
121 | 110 | ||
122 | de_len = *(unsigned char *) de; | 111 | de_len = *(unsigned char *) de; |
123 | 112 | ||
124 | /* If the length byte is zero, we should move on to the next | 113 | /* |
125 | CDROM sector. If we are at the end of the directory, we | 114 | * If the length byte is zero, we should move on to the next |
126 | kick out of the while loop. */ | 115 | * CDROM sector. If we are at the end of the directory, we |
116 | * kick out of the while loop. | ||
117 | */ | ||
127 | 118 | ||
128 | if (de_len == 0) { | 119 | if (de_len == 0) { |
129 | brelse(bh); | 120 | brelse(bh); |
@@ -157,11 +148,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
157 | 148 | ||
158 | if (first_de) { | 149 | if (first_de) { |
159 | isofs_normalize_block_and_offset(de, | 150 | isofs_normalize_block_and_offset(de, |
160 | &block_saved, | 151 | &block_saved, |
161 | &offset_saved); | 152 | &offset_saved); |
162 | inode_number = isofs_get_ino(block_saved, | 153 | inode_number = isofs_get_ino(block_saved, |
163 | offset_saved, | 154 | offset_saved, bufbits); |
164 | bufbits); | ||
165 | } | 155 | } |
166 | 156 | ||
167 | if (de->flags[-sbi->s_high_sierra] & 0x80) { | 157 | if (de->flags[-sbi->s_high_sierra] & 0x80) { |
@@ -199,7 +189,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
199 | */ | 189 | */ |
200 | if ((sbi->s_hide == 'y' && | 190 | if ((sbi->s_hide == 'y' && |
201 | (de->flags[-sbi->s_high_sierra] & 1)) || | 191 | (de->flags[-sbi->s_high_sierra] & 1)) || |
202 | (sbi->s_showassoc =='n' && | 192 | (sbi->s_showassoc =='n' && |
203 | (de->flags[-sbi->s_high_sierra] & 4))) { | 193 | (de->flags[-sbi->s_high_sierra] & 4))) { |
204 | filp->f_pos += de_len; | 194 | filp->f_pos += de_len; |
205 | continue; | 195 | continue; |
@@ -240,7 +230,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
240 | 230 | ||
241 | continue; | 231 | continue; |
242 | } | 232 | } |
243 | if (bh) brelse(bh); | 233 | if (bh) |
234 | brelse(bh); | ||
244 | return 0; | 235 | return 0; |
245 | } | 236 | } |
246 | 237 | ||
@@ -253,8 +244,8 @@ static int isofs_readdir(struct file *filp, | |||
253 | void *dirent, filldir_t filldir) | 244 | void *dirent, filldir_t filldir) |
254 | { | 245 | { |
255 | int result; | 246 | int result; |
256 | char * tmpname; | 247 | char *tmpname; |
257 | struct iso_directory_record * tmpde; | 248 | struct iso_directory_record *tmpde; |
258 | struct inode *inode = filp->f_path.dentry->d_inode; | 249 | struct inode *inode = filp->f_path.dentry->d_inode; |
259 | 250 | ||
260 | tmpname = (char *)__get_free_page(GFP_KERNEL); | 251 | tmpname = (char *)__get_free_page(GFP_KERNEL); |
@@ -270,3 +261,19 @@ static int isofs_readdir(struct file *filp, | |||
270 | unlock_kernel(); | 261 | unlock_kernel(); |
271 | return result; | 262 | return result; |
272 | } | 263 | } |
264 | |||
265 | const struct file_operations isofs_dir_operations = | ||
266 | { | ||
267 | .read = generic_read_dir, | ||
268 | .readdir = isofs_readdir, | ||
269 | }; | ||
270 | |||
271 | /* | ||
272 | * directories can handle most operations... | ||
273 | */ | ||
274 | const struct inode_operations isofs_dir_inode_operations = | ||
275 | { | ||
276 | .lookup = isofs_lookup, | ||
277 | }; | ||
278 | |||
279 | |||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 5c3eecf7542e..4f5418be0590 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -73,20 +73,20 @@ static void isofs_destroy_inode(struct inode *inode) | |||
73 | kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); | 73 | kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); |
74 | } | 74 | } |
75 | 75 | ||
76 | static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) | 76 | static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) |
77 | { | 77 | { |
78 | struct iso_inode_info *ei = foo; | 78 | struct iso_inode_info *ei = foo; |
79 | 79 | ||
80 | inode_init_once(&ei->vfs_inode); | 80 | inode_init_once(&ei->vfs_inode); |
81 | } | 81 | } |
82 | 82 | ||
83 | static int init_inodecache(void) | 83 | static int init_inodecache(void) |
84 | { | 84 | { |
85 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", | 85 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", |
86 | sizeof(struct iso_inode_info), | 86 | sizeof(struct iso_inode_info), |
87 | 0, (SLAB_RECLAIM_ACCOUNT| | 87 | 0, (SLAB_RECLAIM_ACCOUNT| |
88 | SLAB_MEM_SPREAD), | 88 | SLAB_MEM_SPREAD), |
89 | init_once, NULL); | 89 | init_once, NULL); |
90 | if (isofs_inode_cachep == NULL) | 90 | if (isofs_inode_cachep == NULL) |
91 | return -ENOMEM; | 91 | return -ENOMEM; |
92 | return 0; | 92 | return 0; |
@@ -150,9 +150,9 @@ struct iso9660_options{ | |||
150 | uid_t uid; | 150 | uid_t uid; |
151 | char *iocharset; | 151 | char *iocharset; |
152 | unsigned char utf8; | 152 | unsigned char utf8; |
153 | /* LVE */ | 153 | /* LVE */ |
154 | s32 session; | 154 | s32 session; |
155 | s32 sbsector; | 155 | s32 sbsector; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | /* | 158 | /* |
@@ -197,7 +197,7 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) | |||
197 | hash = init_name_hash(); | 197 | hash = init_name_hash(); |
198 | while (len--) { | 198 | while (len--) { |
199 | c = tolower(*name++); | 199 | c = tolower(*name++); |
200 | hash = partial_name_hash(tolower(c), hash); | 200 | hash = partial_name_hash(c, hash); |
201 | } | 201 | } |
202 | qstr->hash = end_name_hash(hash); | 202 | qstr->hash = end_name_hash(hash); |
203 | 203 | ||
@@ -360,10 +360,12 @@ static int parse_options(char *options, struct iso9660_options *popt) | |||
360 | popt->check = 'u'; /* unset */ | 360 | popt->check = 'u'; /* unset */ |
361 | popt->nocompress = 0; | 361 | popt->nocompress = 0; |
362 | popt->blocksize = 1024; | 362 | popt->blocksize = 1024; |
363 | popt->mode = S_IRUGO | S_IXUGO; /* r-x for all. The disc could | 363 | popt->mode = S_IRUGO | S_IXUGO; /* |
364 | be shared with DOS machines so | 364 | * r-x for all. The disc could |
365 | virtually anything could be | 365 | * be shared with DOS machines so |
366 | a valid executable. */ | 366 | * virtually anything could be |
367 | * a valid executable. | ||
368 | */ | ||
367 | popt->gid = 0; | 369 | popt->gid = 0; |
368 | popt->uid = 0; | 370 | popt->uid = 0; |
369 | popt->iocharset = NULL; | 371 | popt->iocharset = NULL; |
@@ -503,30 +505,30 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) | |||
503 | Te.cdte_format=CDROM_LBA; | 505 | Te.cdte_format=CDROM_LBA; |
504 | i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); | 506 | i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); |
505 | if (!i) { | 507 | if (!i) { |
506 | printk(KERN_DEBUG "Session %d start %d type %d\n", | 508 | printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n", |
507 | session, Te.cdte_addr.lba, | 509 | session, Te.cdte_addr.lba, |
508 | Te.cdte_ctrl&CDROM_DATA_TRACK); | 510 | Te.cdte_ctrl&CDROM_DATA_TRACK); |
509 | if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) | 511 | if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) |
510 | return Te.cdte_addr.lba; | 512 | return Te.cdte_addr.lba; |
511 | } | 513 | } |
512 | 514 | ||
513 | printk(KERN_ERR "Invalid session number or type of track\n"); | 515 | printk(KERN_ERR "ISOFS: Invalid session number or type of track\n"); |
514 | } | 516 | } |
515 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); | 517 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); |
516 | if (session > 0) | 518 | if (session > 0) |
517 | printk(KERN_ERR "Invalid session number\n"); | 519 | printk(KERN_ERR "ISOFS: Invalid session number\n"); |
518 | #if 0 | 520 | #if 0 |
519 | printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); | 521 | printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i); |
520 | if (i==0) { | 522 | if (i==0) { |
521 | printk("isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); | 523 | printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); |
522 | printk("isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); | 524 | printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); |
523 | } | 525 | } |
524 | #endif | 526 | #endif |
525 | if (i==0) | 527 | if (i==0) |
526 | #if WE_OBEY_THE_WRITTEN_STANDARDS | 528 | #if WE_OBEY_THE_WRITTEN_STANDARDS |
527 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ | 529 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ |
528 | #endif | 530 | #endif |
529 | vol_desc_start=ms_info.addr.lba; | 531 | vol_desc_start=ms_info.addr.lba; |
530 | return vol_desc_start; | 532 | return vol_desc_start; |
531 | } | 533 | } |
532 | 534 | ||
@@ -538,20 +540,20 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) | |||
538 | */ | 540 | */ |
539 | static int isofs_fill_super(struct super_block *s, void *data, int silent) | 541 | static int isofs_fill_super(struct super_block *s, void *data, int silent) |
540 | { | 542 | { |
541 | struct buffer_head * bh = NULL, *pri_bh = NULL; | 543 | struct buffer_head *bh = NULL, *pri_bh = NULL; |
542 | struct hs_primary_descriptor * h_pri = NULL; | 544 | struct hs_primary_descriptor *h_pri = NULL; |
543 | struct iso_primary_descriptor * pri = NULL; | 545 | struct iso_primary_descriptor *pri = NULL; |
544 | struct iso_supplementary_descriptor *sec = NULL; | 546 | struct iso_supplementary_descriptor *sec = NULL; |
545 | struct iso_directory_record * rootp; | 547 | struct iso_directory_record *rootp; |
546 | int joliet_level = 0; | 548 | struct inode *inode; |
547 | int iso_blknum, block; | 549 | struct iso9660_options opt; |
548 | int orig_zonesize; | 550 | struct isofs_sb_info *sbi; |
549 | int table; | 551 | unsigned long first_data_zone; |
550 | unsigned int vol_desc_start; | 552 | int joliet_level = 0; |
551 | unsigned long first_data_zone; | 553 | int iso_blknum, block; |
552 | struct inode * inode; | 554 | int orig_zonesize; |
553 | struct iso9660_options opt; | 555 | int table; |
554 | struct isofs_sb_info * sbi; | 556 | unsigned int vol_desc_start; |
555 | 557 | ||
556 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 558 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
557 | if (!sbi) | 559 | if (!sbi) |
@@ -577,72 +579,73 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) | |||
577 | vol_desc_start = (opt.sbsector != -1) ? | 579 | vol_desc_start = (opt.sbsector != -1) ? |
578 | opt.sbsector : isofs_get_last_session(s,opt.session); | 580 | opt.sbsector : isofs_get_last_session(s,opt.session); |
579 | 581 | ||
580 | for (iso_blknum = vol_desc_start+16; | 582 | for (iso_blknum = vol_desc_start+16; |
581 | iso_blknum < vol_desc_start+100; iso_blknum++) | 583 | iso_blknum < vol_desc_start+100; iso_blknum++) { |
582 | { | 584 | struct hs_volume_descriptor *hdp; |
583 | struct hs_volume_descriptor * hdp; | 585 | struct iso_volume_descriptor *vdp; |
584 | struct iso_volume_descriptor * vdp; | 586 | |
585 | 587 | block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); | |
586 | block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); | 588 | if (!(bh = sb_bread(s, block))) |
587 | if (!(bh = sb_bread(s, block))) | 589 | goto out_no_read; |
588 | goto out_no_read; | 590 | |
589 | 591 | vdp = (struct iso_volume_descriptor *)bh->b_data; | |
590 | vdp = (struct iso_volume_descriptor *)bh->b_data; | 592 | hdp = (struct hs_volume_descriptor *)bh->b_data; |
591 | hdp = (struct hs_volume_descriptor *)bh->b_data; | 593 | |
592 | 594 | /* | |
593 | /* Due to the overlapping physical location of the descriptors, | 595 | * Due to the overlapping physical location of the descriptors, |
594 | * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure | 596 | * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure |
595 | * proper identification in this case, we first check for ISO. | 597 | * proper identification in this case, we first check for ISO. |
596 | */ | 598 | */ |
597 | if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { | 599 | if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { |
598 | if (isonum_711 (vdp->type) == ISO_VD_END) | 600 | if (isonum_711(vdp->type) == ISO_VD_END) |
599 | break; | 601 | break; |
600 | if (isonum_711 (vdp->type) == ISO_VD_PRIMARY) { | 602 | if (isonum_711(vdp->type) == ISO_VD_PRIMARY) { |
601 | if (pri == NULL) { | 603 | if (pri == NULL) { |
602 | pri = (struct iso_primary_descriptor *)vdp; | 604 | pri = (struct iso_primary_descriptor *)vdp; |
603 | /* Save the buffer in case we need it ... */ | 605 | /* Save the buffer in case we need it ... */ |
604 | pri_bh = bh; | 606 | pri_bh = bh; |
605 | bh = NULL; | 607 | bh = NULL; |
606 | } | 608 | } |
607 | } | 609 | } |
608 | #ifdef CONFIG_JOLIET | 610 | #ifdef CONFIG_JOLIET |
609 | else if (isonum_711 (vdp->type) == ISO_VD_SUPPLEMENTARY) { | 611 | else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { |
610 | sec = (struct iso_supplementary_descriptor *)vdp; | 612 | sec = (struct iso_supplementary_descriptor *)vdp; |
611 | if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { | 613 | if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { |
612 | if (opt.joliet == 'y') { | 614 | if (opt.joliet == 'y') { |
613 | if (sec->escape[2] == 0x40) { | 615 | if (sec->escape[2] == 0x40) |
614 | joliet_level = 1; | 616 | joliet_level = 1; |
615 | } else if (sec->escape[2] == 0x43) { | 617 | else if (sec->escape[2] == 0x43) |
616 | joliet_level = 2; | 618 | joliet_level = 2; |
617 | } else if (sec->escape[2] == 0x45) { | 619 | else if (sec->escape[2] == 0x45) |
618 | joliet_level = 3; | 620 | joliet_level = 3; |
619 | } | 621 | |
620 | printk(KERN_DEBUG"ISO 9660 Extensions: Microsoft Joliet Level %d\n", | 622 | printk(KERN_DEBUG "ISO 9660 Extensions: " |
621 | joliet_level); | 623 | "Microsoft Joliet Level %d\n", |
624 | joliet_level); | ||
625 | } | ||
626 | goto root_found; | ||
627 | } else { | ||
628 | /* Unknown supplementary volume descriptor */ | ||
629 | sec = NULL; | ||
630 | } | ||
622 | } | 631 | } |
623 | goto root_found; | ||
624 | } else { | ||
625 | /* Unknown supplementary volume descriptor */ | ||
626 | sec = NULL; | ||
627 | } | ||
628 | } | ||
629 | #endif | 632 | #endif |
630 | } else { | 633 | } else { |
631 | if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { | 634 | if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { |
632 | if (isonum_711 (hdp->type) != ISO_VD_PRIMARY) | 635 | if (isonum_711(hdp->type) != ISO_VD_PRIMARY) |
633 | goto out_freebh; | 636 | goto out_freebh; |
634 | 637 | ||
635 | sbi->s_high_sierra = 1; | 638 | sbi->s_high_sierra = 1; |
636 | opt.rock = 'n'; | 639 | opt.rock = 'n'; |
637 | h_pri = (struct hs_primary_descriptor *)vdp; | 640 | h_pri = (struct hs_primary_descriptor *)vdp; |
638 | goto root_found; | 641 | goto root_found; |
642 | } | ||
639 | } | 643 | } |
640 | } | ||
641 | 644 | ||
642 | /* Just skip any volume descriptors we don't recognize */ | 645 | /* Just skip any volume descriptors we don't recognize */ |
643 | 646 | ||
644 | brelse(bh); | 647 | brelse(bh); |
645 | bh = NULL; | 648 | bh = NULL; |
646 | } | 649 | } |
647 | /* | 650 | /* |
648 | * If we fall through, either no volume descriptor was found, | 651 | * If we fall through, either no volume descriptor was found, |
@@ -657,24 +660,24 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) | |||
657 | root_found: | 660 | root_found: |
658 | 661 | ||
659 | if (joliet_level && (pri == NULL || opt.rock == 'n')) { | 662 | if (joliet_level && (pri == NULL || opt.rock == 'n')) { |
660 | /* This is the case of Joliet with the norock mount flag. | 663 | /* This is the case of Joliet with the norock mount flag. |
661 | * A disc with both Joliet and Rock Ridge is handled later | 664 | * A disc with both Joliet and Rock Ridge is handled later |
662 | */ | 665 | */ |
663 | pri = (struct iso_primary_descriptor *) sec; | 666 | pri = (struct iso_primary_descriptor *) sec; |
664 | } | 667 | } |
665 | 668 | ||
666 | if(sbi->s_high_sierra){ | 669 | if(sbi->s_high_sierra){ |
667 | rootp = (struct iso_directory_record *) h_pri->root_directory_record; | 670 | rootp = (struct iso_directory_record *) h_pri->root_directory_record; |
668 | sbi->s_nzones = isonum_733 (h_pri->volume_space_size); | 671 | sbi->s_nzones = isonum_733(h_pri->volume_space_size); |
669 | sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size); | 672 | sbi->s_log_zone_size = isonum_723(h_pri->logical_block_size); |
670 | sbi->s_max_size = isonum_733(h_pri->volume_space_size); | 673 | sbi->s_max_size = isonum_733(h_pri->volume_space_size); |
671 | } else { | 674 | } else { |
672 | if (!pri) | 675 | if (!pri) |
673 | goto out_freebh; | 676 | goto out_freebh; |
674 | rootp = (struct iso_directory_record *) pri->root_directory_record; | 677 | rootp = (struct iso_directory_record *) pri->root_directory_record; |
675 | sbi->s_nzones = isonum_733 (pri->volume_space_size); | 678 | sbi->s_nzones = isonum_733(pri->volume_space_size); |
676 | sbi->s_log_zone_size = isonum_723 (pri->logical_block_size); | 679 | sbi->s_log_zone_size = isonum_723(pri->logical_block_size); |
677 | sbi->s_max_size = isonum_733(pri->volume_space_size); | 680 | sbi->s_max_size = isonum_733(pri->volume_space_size); |
678 | } | 681 | } |
679 | 682 | ||
680 | sbi->s_ninodes = 0; /* No way to figure this out easily */ | 683 | sbi->s_ninodes = 0; /* No way to figure this out easily */ |
@@ -687,42 +690,43 @@ root_found: | |||
687 | * blocks that were 512 bytes (which should only very rarely | 690 | * blocks that were 512 bytes (which should only very rarely |
688 | * happen.) | 691 | * happen.) |
689 | */ | 692 | */ |
690 | if(orig_zonesize < opt.blocksize) | 693 | if (orig_zonesize < opt.blocksize) |
691 | goto out_bad_size; | 694 | goto out_bad_size; |
692 | 695 | ||
693 | /* RDE: convert log zone size to bit shift */ | 696 | /* RDE: convert log zone size to bit shift */ |
694 | switch (sbi->s_log_zone_size) | 697 | switch (sbi->s_log_zone_size) { |
695 | { case 512: sbi->s_log_zone_size = 9; break; | 698 | case 512: sbi->s_log_zone_size = 9; break; |
696 | case 1024: sbi->s_log_zone_size = 10; break; | 699 | case 1024: sbi->s_log_zone_size = 10; break; |
697 | case 2048: sbi->s_log_zone_size = 11; break; | 700 | case 2048: sbi->s_log_zone_size = 11; break; |
698 | 701 | ||
699 | default: | 702 | default: |
700 | goto out_bad_zone_size; | 703 | goto out_bad_zone_size; |
701 | } | 704 | } |
702 | 705 | ||
703 | s->s_magic = ISOFS_SUPER_MAGIC; | 706 | s->s_magic = ISOFS_SUPER_MAGIC; |
704 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ | 707 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ |
705 | 708 | ||
706 | /* The CDROM is read-only, has no nodes (devices) on it, and since | 709 | /* |
707 | all of the files appear to be owned by root, we really do not want | 710 | * The CDROM is read-only, has no nodes (devices) on it, and since |
708 | to allow suid. (suid or devices will not show up unless we have | 711 | * all of the files appear to be owned by root, we really do not want |
709 | Rock Ridge extensions) */ | 712 | * to allow suid. (suid or devices will not show up unless we have |
713 | * Rock Ridge extensions) | ||
714 | */ | ||
710 | 715 | ||
711 | s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; | 716 | s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; |
712 | 717 | ||
713 | /* Set this for reference. Its not currently used except on write | 718 | /* Set this for reference. Its not currently used except on write |
714 | which we don't have .. */ | 719 | which we don't have .. */ |
715 | 720 | ||
716 | first_data_zone = isonum_733 (rootp->extent) + | 721 | first_data_zone = isonum_733(rootp->extent) + |
717 | isonum_711 (rootp->ext_attr_length); | 722 | isonum_711(rootp->ext_attr_length); |
718 | sbi->s_firstdatazone = first_data_zone; | 723 | sbi->s_firstdatazone = first_data_zone; |
719 | #ifndef BEQUIET | 724 | #ifndef BEQUIET |
720 | printk(KERN_DEBUG "Max size:%ld Log zone size:%ld\n", | 725 | printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n", |
721 | sbi->s_max_size, | 726 | sbi->s_max_size, 1UL << sbi->s_log_zone_size); |
722 | 1UL << sbi->s_log_zone_size); | 727 | printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone); |
723 | printk(KERN_DEBUG "First datazone:%ld\n", sbi->s_firstdatazone); | ||
724 | if(sbi->s_high_sierra) | 728 | if(sbi->s_high_sierra) |
725 | printk(KERN_DEBUG "Disc in High Sierra format.\n"); | 729 | printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n"); |
726 | #endif | 730 | #endif |
727 | 731 | ||
728 | /* | 732 | /* |
@@ -737,8 +741,8 @@ root_found: | |||
737 | pri = (struct iso_primary_descriptor *) sec; | 741 | pri = (struct iso_primary_descriptor *) sec; |
738 | rootp = (struct iso_directory_record *) | 742 | rootp = (struct iso_directory_record *) |
739 | pri->root_directory_record; | 743 | pri->root_directory_record; |
740 | first_data_zone = isonum_733 (rootp->extent) + | 744 | first_data_zone = isonum_733(rootp->extent) + |
741 | isonum_711 (rootp->ext_attr_length); | 745 | isonum_711(rootp->ext_attr_length); |
742 | } | 746 | } |
743 | 747 | ||
744 | /* | 748 | /* |
@@ -771,7 +775,7 @@ root_found: | |||
771 | 775 | ||
772 | #ifdef CONFIG_JOLIET | 776 | #ifdef CONFIG_JOLIET |
773 | if (joliet_level && opt.utf8 == 0) { | 777 | if (joliet_level && opt.utf8 == 0) { |
774 | char * p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; | 778 | char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; |
775 | sbi->s_nls_iocharset = load_nls(p); | 779 | sbi->s_nls_iocharset = load_nls(p); |
776 | if (! sbi->s_nls_iocharset) { | 780 | if (! sbi->s_nls_iocharset) { |
777 | /* Fail only if explicit charset specified */ | 781 | /* Fail only if explicit charset specified */ |
@@ -821,7 +825,7 @@ root_found: | |||
821 | sbi->s_rock = 0; | 825 | sbi->s_rock = 0; |
822 | if (sbi->s_firstdatazone != first_data_zone) { | 826 | if (sbi->s_firstdatazone != first_data_zone) { |
823 | sbi->s_firstdatazone = first_data_zone; | 827 | sbi->s_firstdatazone = first_data_zone; |
824 | printk(KERN_DEBUG | 828 | printk(KERN_DEBUG |
825 | "ISOFS: changing to secondary root\n"); | 829 | "ISOFS: changing to secondary root\n"); |
826 | iput(inode); | 830 | iput(inode); |
827 | inode = isofs_iget(s, sbi->s_firstdatazone, 0); | 831 | inode = isofs_iget(s, sbi->s_firstdatazone, 0); |
@@ -830,8 +834,10 @@ root_found: | |||
830 | 834 | ||
831 | if (opt.check == 'u') { | 835 | if (opt.check == 'u') { |
832 | /* Only Joliet is case insensitive by default */ | 836 | /* Only Joliet is case insensitive by default */ |
833 | if (joliet_level) opt.check = 'r'; | 837 | if (joliet_level) |
834 | else opt.check = 's'; | 838 | opt.check = 'r'; |
839 | else | ||
840 | opt.check = 's'; | ||
835 | } | 841 | } |
836 | sbi->s_joliet_level = joliet_level; | 842 | sbi->s_joliet_level = joliet_level; |
837 | 843 | ||
@@ -846,8 +852,10 @@ root_found: | |||
846 | goto out_no_root; | 852 | goto out_no_root; |
847 | 853 | ||
848 | table = 0; | 854 | table = 0; |
849 | if (joliet_level) table += 2; | 855 | if (joliet_level) |
850 | if (opt.check == 'r') table++; | 856 | table += 2; |
857 | if (opt.check == 'r') | ||
858 | table++; | ||
851 | s->s_root->d_op = &isofs_dentry_ops[table]; | 859 | s->s_root->d_op = &isofs_dentry_ops[table]; |
852 | 860 | ||
853 | kfree(opt.iocharset); | 861 | kfree(opt.iocharset); |
@@ -858,10 +866,10 @@ root_found: | |||
858 | * Display error messages and free resources. | 866 | * Display error messages and free resources. |
859 | */ | 867 | */ |
860 | out_bad_root: | 868 | out_bad_root: |
861 | printk(KERN_WARNING "isofs_fill_super: root inode not initialized\n"); | 869 | printk(KERN_WARNING "%s: root inode not initialized\n", __func__); |
862 | goto out_iput; | 870 | goto out_iput; |
863 | out_no_root: | 871 | out_no_root: |
864 | printk(KERN_WARNING "isofs_fill_super: get root inode failed\n"); | 872 | printk(KERN_WARNING "%s: get root inode failed\n", __func__); |
865 | out_iput: | 873 | out_iput: |
866 | iput(inode); | 874 | iput(inode); |
867 | #ifdef CONFIG_JOLIET | 875 | #ifdef CONFIG_JOLIET |
@@ -870,21 +878,20 @@ out_iput: | |||
870 | #endif | 878 | #endif |
871 | goto out_freesbi; | 879 | goto out_freesbi; |
872 | out_no_read: | 880 | out_no_read: |
873 | printk(KERN_WARNING "isofs_fill_super: " | 881 | printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", |
874 | "bread failed, dev=%s, iso_blknum=%d, block=%d\n", | 882 | __func__, s->s_id, iso_blknum, block); |
875 | s->s_id, iso_blknum, block); | ||
876 | goto out_freesbi; | 883 | goto out_freesbi; |
877 | out_bad_zone_size: | 884 | out_bad_zone_size: |
878 | printk(KERN_WARNING "Bad logical zone size %ld\n", | 885 | printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", |
879 | sbi->s_log_zone_size); | 886 | sbi->s_log_zone_size); |
880 | goto out_freebh; | 887 | goto out_freebh; |
881 | out_bad_size: | 888 | out_bad_size: |
882 | printk(KERN_WARNING "Logical zone size(%d) < hardware blocksize(%u)\n", | 889 | printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n", |
883 | orig_zonesize, opt.blocksize); | 890 | orig_zonesize, opt.blocksize); |
884 | goto out_freebh; | 891 | goto out_freebh; |
885 | out_unknown_format: | 892 | out_unknown_format: |
886 | if (!silent) | 893 | if (!silent) |
887 | printk(KERN_WARNING "Unable to identify CD-ROM format.\n"); | 894 | printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n"); |
888 | 895 | ||
889 | out_freebh: | 896 | out_freebh: |
890 | brelse(bh); | 897 | brelse(bh); |
@@ -902,7 +909,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) | |||
902 | buf->f_type = ISOFS_SUPER_MAGIC; | 909 | buf->f_type = ISOFS_SUPER_MAGIC; |
903 | buf->f_bsize = sb->s_blocksize; | 910 | buf->f_bsize = sb->s_blocksize; |
904 | buf->f_blocks = (ISOFS_SB(sb)->s_nzones | 911 | buf->f_blocks = (ISOFS_SB(sb)->s_nzones |
905 | << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); | 912 | << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); |
906 | buf->f_bfree = 0; | 913 | buf->f_bfree = 0; |
907 | buf->f_bavail = 0; | 914 | buf->f_bavail = 0; |
908 | buf->f_files = ISOFS_SB(sb)->s_ninodes; | 915 | buf->f_files = ISOFS_SB(sb)->s_ninodes; |
@@ -931,20 +938,20 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
931 | 938 | ||
932 | rv = 0; | 939 | rv = 0; |
933 | if (iblock < 0 || iblock != iblock_s) { | 940 | if (iblock < 0 || iblock != iblock_s) { |
934 | printk("isofs_get_blocks: block number too large\n"); | 941 | printk(KERN_DEBUG "%s: block number too large\n", __func__); |
935 | goto abort; | 942 | goto abort; |
936 | } | 943 | } |
937 | 944 | ||
938 | b_off = iblock; | 945 | b_off = iblock; |
939 | 946 | ||
940 | offset = 0; | 947 | offset = 0; |
941 | firstext = ei->i_first_extent; | 948 | firstext = ei->i_first_extent; |
942 | sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); | 949 | sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); |
943 | nextblk = ei->i_next_section_block; | 950 | nextblk = ei->i_next_section_block; |
944 | nextoff = ei->i_next_section_offset; | 951 | nextoff = ei->i_next_section_offset; |
945 | section = 0; | 952 | section = 0; |
946 | 953 | ||
947 | while ( nblocks ) { | 954 | while (nblocks) { |
948 | /* If we are *way* beyond the end of the file, print a message. | 955 | /* If we are *way* beyond the end of the file, print a message. |
949 | * Access beyond the end of the file up to the next page boundary | 956 | * Access beyond the end of the file up to the next page boundary |
950 | * is normal, however because of the way the page cache works. | 957 | * is normal, however because of the way the page cache works. |
@@ -953,11 +960,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
953 | * I/O errors. | 960 | * I/O errors. |
954 | */ | 961 | */ |
955 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { | 962 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { |
956 | printk("isofs_get_blocks: block >= EOF (%ld, %ld)\n", | 963 | printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n", |
957 | iblock, (unsigned long) inode->i_size); | 964 | __func__, iblock, (unsigned long) inode->i_size); |
958 | goto abort; | 965 | goto abort; |
959 | } | 966 | } |
960 | 967 | ||
961 | /* On the last section, nextblk == 0, section size is likely to | 968 | /* On the last section, nextblk == 0, section size is likely to |
962 | * exceed sect_size by a partial block, and access beyond the | 969 | * exceed sect_size by a partial block, and access beyond the |
963 | * end of the file will reach beyond the section size, too. | 970 | * end of the file will reach beyond the section size, too. |
@@ -976,20 +983,21 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
976 | iput(ninode); | 983 | iput(ninode); |
977 | 984 | ||
978 | if (++section > 100) { | 985 | if (++section > 100) { |
979 | printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); | 986 | printk(KERN_DEBUG "%s: More than 100 file sections ?!?" |
980 | printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " | 987 | " aborting...\n", __func__); |
981 | "nextblk=%lu nextoff=%lu\n", | 988 | printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u " |
982 | iblock, firstext, (unsigned) sect_size, | 989 | "nextblk=%lu nextoff=%lu\n", __func__, |
983 | nextblk, nextoff); | 990 | iblock, firstext, (unsigned) sect_size, |
991 | nextblk, nextoff); | ||
984 | goto abort; | 992 | goto abort; |
985 | } | 993 | } |
986 | } | 994 | } |
987 | 995 | ||
988 | if ( *bh ) { | 996 | if (*bh) { |
989 | map_bh(*bh, inode->i_sb, firstext + b_off - offset); | 997 | map_bh(*bh, inode->i_sb, firstext + b_off - offset); |
990 | } else { | 998 | } else { |
991 | *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); | 999 | *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); |
992 | if ( !*bh ) | 1000 | if (!*bh) |
993 | goto abort; | 1001 | goto abort; |
994 | } | 1002 | } |
995 | bh++; /* Next buffer head */ | 1003 | bh++; /* Next buffer head */ |
@@ -1010,7 +1018,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock, | |||
1010 | struct buffer_head *bh_result, int create) | 1018 | struct buffer_head *bh_result, int create) |
1011 | { | 1019 | { |
1012 | if (create) { | 1020 | if (create) { |
1013 | printk("isofs_get_block: Kernel tries to allocate a block\n"); | 1021 | printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__); |
1014 | return -EROFS; | 1022 | return -EROFS; |
1015 | } | 1023 | } |
1016 | 1024 | ||
@@ -1070,11 +1078,11 @@ static int isofs_read_level3_size(struct inode *inode) | |||
1070 | { | 1078 | { |
1071 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 1079 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
1072 | int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; | 1080 | int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; |
1073 | struct buffer_head * bh = NULL; | 1081 | struct buffer_head *bh = NULL; |
1074 | unsigned long block, offset, block_saved, offset_saved; | 1082 | unsigned long block, offset, block_saved, offset_saved; |
1075 | int i = 0; | 1083 | int i = 0; |
1076 | int more_entries = 0; | 1084 | int more_entries = 0; |
1077 | struct iso_directory_record * tmpde = NULL; | 1085 | struct iso_directory_record *tmpde = NULL; |
1078 | struct iso_inode_info *ei = ISOFS_I(inode); | 1086 | struct iso_inode_info *ei = ISOFS_I(inode); |
1079 | 1087 | ||
1080 | inode->i_size = 0; | 1088 | inode->i_size = 0; |
@@ -1089,7 +1097,7 @@ static int isofs_read_level3_size(struct inode *inode) | |||
1089 | offset = ei->i_iget5_offset; | 1097 | offset = ei->i_iget5_offset; |
1090 | 1098 | ||
1091 | do { | 1099 | do { |
1092 | struct iso_directory_record * de; | 1100 | struct iso_directory_record *de; |
1093 | unsigned int de_len; | 1101 | unsigned int de_len; |
1094 | 1102 | ||
1095 | if (!bh) { | 1103 | if (!bh) { |
@@ -1163,10 +1171,9 @@ out_noread: | |||
1163 | return -EIO; | 1171 | return -EIO; |
1164 | 1172 | ||
1165 | out_toomany: | 1173 | out_toomany: |
1166 | printk(KERN_INFO "isofs_read_level3_size: " | 1174 | printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n" |
1167 | "More than 100 file sections ?!?, aborting...\n" | 1175 | "isofs_read_level3_size: inode=%lu\n", |
1168 | "isofs_read_level3_size: inode=%lu\n", | 1176 | __func__, inode->i_ino); |
1169 | inode->i_ino); | ||
1170 | goto out; | 1177 | goto out; |
1171 | } | 1178 | } |
1172 | 1179 | ||
@@ -1177,9 +1184,9 @@ static void isofs_read_inode(struct inode *inode) | |||
1177 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 1184 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
1178 | unsigned long block; | 1185 | unsigned long block; |
1179 | int high_sierra = sbi->s_high_sierra; | 1186 | int high_sierra = sbi->s_high_sierra; |
1180 | struct buffer_head * bh = NULL; | 1187 | struct buffer_head *bh = NULL; |
1181 | struct iso_directory_record * de; | 1188 | struct iso_directory_record *de; |
1182 | struct iso_directory_record * tmpde = NULL; | 1189 | struct iso_directory_record *tmpde = NULL; |
1183 | unsigned int de_len; | 1190 | unsigned int de_len; |
1184 | unsigned long offset; | 1191 | unsigned long offset; |
1185 | struct iso_inode_info *ei = ISOFS_I(inode); | 1192 | struct iso_inode_info *ei = ISOFS_I(inode); |
@@ -1199,7 +1206,7 @@ static void isofs_read_inode(struct inode *inode) | |||
1199 | 1206 | ||
1200 | tmpde = kmalloc(de_len, GFP_KERNEL); | 1207 | tmpde = kmalloc(de_len, GFP_KERNEL); |
1201 | if (tmpde == NULL) { | 1208 | if (tmpde == NULL) { |
1202 | printk(KERN_INFO "isofs_read_inode: out of memory\n"); | 1209 | printk(KERN_INFO "%s: out of memory\n", __func__); |
1203 | goto fail; | 1210 | goto fail; |
1204 | } | 1211 | } |
1205 | memcpy(tmpde, bh->b_data + offset, frag1); | 1212 | memcpy(tmpde, bh->b_data + offset, frag1); |
@@ -1212,24 +1219,26 @@ static void isofs_read_inode(struct inode *inode) | |||
1212 | } | 1219 | } |
1213 | 1220 | ||
1214 | inode->i_ino = isofs_get_ino(ei->i_iget5_block, | 1221 | inode->i_ino = isofs_get_ino(ei->i_iget5_block, |
1215 | ei->i_iget5_offset, | 1222 | ei->i_iget5_offset, |
1216 | ISOFS_BUFFER_BITS(inode)); | 1223 | ISOFS_BUFFER_BITS(inode)); |
1217 | 1224 | ||
1218 | /* Assume it is a normal-format file unless told otherwise */ | 1225 | /* Assume it is a normal-format file unless told otherwise */ |
1219 | ei->i_file_format = isofs_file_normal; | 1226 | ei->i_file_format = isofs_file_normal; |
1220 | 1227 | ||
1221 | if (de->flags[-high_sierra] & 2) { | 1228 | if (de->flags[-high_sierra] & 2) { |
1222 | inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; | 1229 | inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; |
1223 | inode->i_nlink = 1; /* Set to 1. We know there are 2, but | 1230 | inode->i_nlink = 1; /* |
1224 | the find utility tries to optimize | 1231 | * Set to 1. We know there are 2, but |
1225 | if it is 2, and it screws up. It is | 1232 | * the find utility tries to optimize |
1226 | easier to give 1 which tells find to | 1233 | * if it is 2, and it screws up. It is |
1227 | do it the hard way. */ | 1234 | * easier to give 1 which tells find to |
1235 | * do it the hard way. | ||
1236 | */ | ||
1228 | } else { | 1237 | } else { |
1229 | /* Everybody gets to read the file. */ | 1238 | /* Everybody gets to read the file. */ |
1230 | inode->i_mode = sbi->s_mode; | 1239 | inode->i_mode = sbi->s_mode; |
1231 | inode->i_nlink = 1; | 1240 | inode->i_nlink = 1; |
1232 | inode->i_mode |= S_IFREG; | 1241 | inode->i_mode |= S_IFREG; |
1233 | } | 1242 | } |
1234 | inode->i_uid = sbi->s_uid; | 1243 | inode->i_uid = sbi->s_uid; |
1235 | inode->i_gid = sbi->s_gid; | 1244 | inode->i_gid = sbi->s_gid; |
@@ -1239,13 +1248,14 @@ static void isofs_read_inode(struct inode *inode) | |||
1239 | ei->i_format_parm[1] = 0; | 1248 | ei->i_format_parm[1] = 0; |
1240 | ei->i_format_parm[2] = 0; | 1249 | ei->i_format_parm[2] = 0; |
1241 | 1250 | ||
1242 | ei->i_section_size = isonum_733 (de->size); | 1251 | ei->i_section_size = isonum_733(de->size); |
1243 | if (de->flags[-high_sierra] & 0x80) { | 1252 | if (de->flags[-high_sierra] & 0x80) { |
1244 | if(isofs_read_level3_size(inode)) goto fail; | 1253 | if(isofs_read_level3_size(inode)) |
1254 | goto fail; | ||
1245 | } else { | 1255 | } else { |
1246 | ei->i_next_section_block = 0; | 1256 | ei->i_next_section_block = 0; |
1247 | ei->i_next_section_offset = 0; | 1257 | ei->i_next_section_offset = 0; |
1248 | inode->i_size = isonum_733 (de->size); | 1258 | inode->i_size = isonum_733(de->size); |
1249 | } | 1259 | } |
1250 | 1260 | ||
1251 | /* | 1261 | /* |
@@ -1258,23 +1268,24 @@ static void isofs_read_inode(struct inode *inode) | |||
1258 | inode->i_size &= 0x00ffffff; | 1268 | inode->i_size &= 0x00ffffff; |
1259 | 1269 | ||
1260 | if (de->interleave[0]) { | 1270 | if (de->interleave[0]) { |
1261 | printk("Interleaved files not (yet) supported.\n"); | 1271 | printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n"); |
1262 | inode->i_size = 0; | 1272 | inode->i_size = 0; |
1263 | } | 1273 | } |
1264 | 1274 | ||
1265 | /* I have no idea what file_unit_size is used for, so | 1275 | /* I have no idea what file_unit_size is used for, so |
1266 | we will flag it for now */ | 1276 | we will flag it for now */ |
1267 | if (de->file_unit_size[0] != 0) { | 1277 | if (de->file_unit_size[0] != 0) { |
1268 | printk("File unit size != 0 for ISO file (%ld).\n", | 1278 | printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n", |
1269 | inode->i_ino); | 1279 | inode->i_ino); |
1270 | } | 1280 | } |
1271 | 1281 | ||
1272 | /* I have no idea what other flag bits are used for, so | 1282 | /* I have no idea what other flag bits are used for, so |
1273 | we will flag it for now */ | 1283 | we will flag it for now */ |
1274 | #ifdef DEBUG | 1284 | #ifdef DEBUG |
1275 | if((de->flags[-high_sierra] & ~2)!= 0){ | 1285 | if((de->flags[-high_sierra] & ~2)!= 0){ |
1276 | printk("Unusual flag settings for ISO file (%ld %x).\n", | 1286 | printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file " |
1277 | inode->i_ino, de->flags[-high_sierra]); | 1287 | "(%ld %x).\n", |
1288 | inode->i_ino, de->flags[-high_sierra]); | ||
1278 | } | 1289 | } |
1279 | #endif | 1290 | #endif |
1280 | 1291 | ||
@@ -1285,11 +1296,11 @@ static void isofs_read_inode(struct inode *inode) | |||
1285 | inode->i_atime.tv_nsec = | 1296 | inode->i_atime.tv_nsec = |
1286 | inode->i_ctime.tv_nsec = 0; | 1297 | inode->i_ctime.tv_nsec = 0; |
1287 | 1298 | ||
1288 | ei->i_first_extent = (isonum_733 (de->extent) + | 1299 | ei->i_first_extent = (isonum_733(de->extent) + |
1289 | isonum_711 (de->ext_attr_length)); | 1300 | isonum_711(de->ext_attr_length)); |
1290 | 1301 | ||
1291 | /* Set the number of blocks for stat() - should be done before RR */ | 1302 | /* Set the number of blocks for stat() - should be done before RR */ |
1292 | inode->i_blocks = (inode->i_size + 511) >> 9; | 1303 | inode->i_blocks = (inode->i_size + 511) >> 9; |
1293 | 1304 | ||
1294 | /* | 1305 | /* |
1295 | * Now test for possible Rock Ridge extensions which will override | 1306 | * Now test for possible Rock Ridge extensions which will override |
@@ -1306,7 +1317,7 @@ static void isofs_read_inode(struct inode *inode) | |||
1306 | /* Install the inode operations vector */ | 1317 | /* Install the inode operations vector */ |
1307 | if (S_ISREG(inode->i_mode)) { | 1318 | if (S_ISREG(inode->i_mode)) { |
1308 | inode->i_fop = &generic_ro_fops; | 1319 | inode->i_fop = &generic_ro_fops; |
1309 | switch ( ei->i_file_format ) { | 1320 | switch (ei->i_file_format) { |
1310 | #ifdef CONFIG_ZISOFS | 1321 | #ifdef CONFIG_ZISOFS |
1311 | case isofs_file_compressed: | 1322 | case isofs_file_compressed: |
1312 | inode->i_data.a_ops = &zisofs_aops; | 1323 | inode->i_data.a_ops = &zisofs_aops; |
@@ -1350,7 +1361,7 @@ static int isofs_iget5_test(struct inode *ino, void *data) | |||
1350 | struct isofs_iget5_callback_data *d = | 1361 | struct isofs_iget5_callback_data *d = |
1351 | (struct isofs_iget5_callback_data*)data; | 1362 | (struct isofs_iget5_callback_data*)data; |
1352 | return (i->i_iget5_block == d->block) | 1363 | return (i->i_iget5_block == d->block) |
1353 | && (i->i_iget5_offset == d->offset); | 1364 | && (i->i_iget5_offset == d->offset); |
1354 | } | 1365 | } |
1355 | 1366 | ||
1356 | static int isofs_iget5_set(struct inode *ino, void *data) | 1367 | static int isofs_iget5_set(struct inode *ino, void *data) |
@@ -1384,7 +1395,7 @@ struct inode *isofs_iget(struct super_block *sb, | |||
1384 | hashval = (block << sb->s_blocksize_bits) | offset; | 1395 | hashval = (block << sb->s_blocksize_bits) | offset; |
1385 | 1396 | ||
1386 | inode = iget5_locked(sb, hashval, &isofs_iget5_test, | 1397 | inode = iget5_locked(sb, hashval, &isofs_iget5_test, |
1387 | &isofs_iget5_set, &data); | 1398 | &isofs_iget5_set, &data); |
1388 | 1399 | ||
1389 | if (inode && (inode->i_state & I_NEW)) { | 1400 | if (inode && (inode->i_state & I_NEW)) { |
1390 | sb->s_op->read_inode(inode); | 1401 | sb->s_op->read_inode(inode); |
@@ -1398,7 +1409,7 @@ static int isofs_get_sb(struct file_system_type *fs_type, | |||
1398 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 1409 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
1399 | { | 1410 | { |
1400 | return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, | 1411 | return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, |
1401 | mnt); | 1412 | mnt); |
1402 | } | 1413 | } |
1403 | 1414 | ||
1404 | static struct file_system_type iso9660_fs_type = { | 1415 | static struct file_system_type iso9660_fs_type = { |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index efe2872cd4e3..a07e67b1ea7f 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/fs.h> | 1 | #include <linux/fs.h> |
2 | #include <linux/buffer_head.h> | 2 | #include <linux/buffer_head.h> |
3 | #include <linux/exportfs.h> | ||
3 | #include <linux/iso_fs.h> | 4 | #include <linux/iso_fs.h> |
4 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
5 | 6 | ||
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index fb8fe7a9ddc6..92c14b850e9c 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c | |||
@@ -80,22 +80,20 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st | |||
80 | 80 | ||
81 | if (utf8) { | 81 | if (utf8) { |
82 | len = wcsntombs_be(outname, de->name, | 82 | len = wcsntombs_be(outname, de->name, |
83 | de->name_len[0] >> 1, PAGE_SIZE); | 83 | de->name_len[0] >> 1, PAGE_SIZE); |
84 | } else { | 84 | } else { |
85 | len = uni16_to_x8(outname, (__be16 *) de->name, | 85 | len = uni16_to_x8(outname, (__be16 *) de->name, |
86 | de->name_len[0] >> 1, nls); | 86 | de->name_len[0] >> 1, nls); |
87 | } | 87 | } |
88 | if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) { | 88 | if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) |
89 | len -= 2; | 89 | len -= 2; |
90 | } | ||
91 | 90 | ||
92 | /* | 91 | /* |
93 | * Windows doesn't like periods at the end of a name, | 92 | * Windows doesn't like periods at the end of a name, |
94 | * so neither do we | 93 | * so neither do we |
95 | */ | 94 | */ |
96 | while (len >= 2 && (outname[len-1] == '.')) { | 95 | while (len >= 2 && (outname[len-1] == '.')) |
97 | len--; | 96 | len--; |
98 | } | ||
99 | 97 | ||
100 | return len; | 98 | return len; |
101 | } | 99 | } |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index c04b3a14a3e9..c8c7e5138a01 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
@@ -15,7 +15,7 @@ | |||
15 | * some sanity tests. | 15 | * some sanity tests. |
16 | */ | 16 | */ |
17 | static int | 17 | static int |
18 | isofs_cmp(struct dentry * dentry, const char * compare, int dlen) | 18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) |
19 | { | 19 | { |
20 | struct qstr qstr; | 20 | struct qstr qstr; |
21 | 21 | ||
@@ -48,24 +48,24 @@ isofs_cmp(struct dentry * dentry, const char * compare, int dlen) | |||
48 | */ | 48 | */ |
49 | static unsigned long | 49 | static unsigned long |
50 | isofs_find_entry(struct inode *dir, struct dentry *dentry, | 50 | isofs_find_entry(struct inode *dir, struct dentry *dentry, |
51 | unsigned long *block_rv, unsigned long* offset_rv, | 51 | unsigned long *block_rv, unsigned long *offset_rv, |
52 | char * tmpname, struct iso_directory_record * tmpde) | 52 | char *tmpname, struct iso_directory_record *tmpde) |
53 | { | 53 | { |
54 | unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); | 54 | unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); |
55 | unsigned char bufbits = ISOFS_BUFFER_BITS(dir); | 55 | unsigned char bufbits = ISOFS_BUFFER_BITS(dir); |
56 | unsigned long block, f_pos, offset, block_saved, offset_saved; | 56 | unsigned long block, f_pos, offset, block_saved, offset_saved; |
57 | struct buffer_head * bh = NULL; | 57 | struct buffer_head *bh = NULL; |
58 | struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); | 58 | struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); |
59 | 59 | ||
60 | if (!ISOFS_I(dir)->i_first_extent) | 60 | if (!ISOFS_I(dir)->i_first_extent) |
61 | return 0; | 61 | return 0; |
62 | 62 | ||
63 | f_pos = 0; | 63 | f_pos = 0; |
64 | offset = 0; | 64 | offset = 0; |
65 | block = 0; | 65 | block = 0; |
66 | 66 | ||
67 | while (f_pos < dir->i_size) { | 67 | while (f_pos < dir->i_size) { |
68 | struct iso_directory_record * de; | 68 | struct iso_directory_record *de; |
69 | int de_len, match, i, dlen; | 69 | int de_len, match, i, dlen; |
70 | char *dpnt; | 70 | char *dpnt; |
71 | 71 | ||
@@ -114,7 +114,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
114 | 114 | ||
115 | if (sbi->s_rock && | 115 | if (sbi->s_rock && |
116 | ((i = get_rock_ridge_filename(de, tmpname, dir)))) { | 116 | ((i = get_rock_ridge_filename(de, tmpname, dir)))) { |
117 | dlen = i; /* possibly -1 */ | 117 | dlen = i; /* possibly -1 */ |
118 | dpnt = tmpname; | 118 | dpnt = tmpname; |
119 | #ifdef CONFIG_JOLIET | 119 | #ifdef CONFIG_JOLIET |
120 | } else if (sbi->s_joliet_level) { | 120 | } else if (sbi->s_joliet_level) { |
@@ -145,8 +145,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
145 | isofs_normalize_block_and_offset(de, | 145 | isofs_normalize_block_and_offset(de, |
146 | &block_saved, | 146 | &block_saved, |
147 | &offset_saved); | 147 | &offset_saved); |
148 | *block_rv = block_saved; | 148 | *block_rv = block_saved; |
149 | *offset_rv = offset_saved; | 149 | *offset_rv = offset_saved; |
150 | brelse(bh); | 150 | brelse(bh); |
151 | return 1; | 151 | return 1; |
152 | } | 152 | } |
@@ -155,7 +155,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
155 | return 0; | 155 | return 0; |
156 | } | 156 | } |
157 | 157 | ||
158 | struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 158 | struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
159 | { | 159 | { |
160 | int found; | 160 | int found; |
161 | unsigned long block, offset; | 161 | unsigned long block, offset; |
@@ -170,9 +170,9 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n | |||
170 | 170 | ||
171 | lock_kernel(); | 171 | lock_kernel(); |
172 | found = isofs_find_entry(dir, dentry, | 172 | found = isofs_find_entry(dir, dentry, |
173 | &block, &offset, | 173 | &block, &offset, |
174 | page_address(page), | 174 | page_address(page), |
175 | 1024 + page_address(page)); | 175 | 1024 + page_address(page)); |
176 | __free_page(page); | 176 | __free_page(page); |
177 | 177 | ||
178 | inode = NULL; | 178 | inode = NULL; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 1facfaff97cb..a003d50edcdb 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -887,7 +887,8 @@ restart_loop: | |||
887 | journal->j_committing_transaction = NULL; | 887 | journal->j_committing_transaction = NULL; |
888 | spin_unlock(&journal->j_state_lock); | 888 | spin_unlock(&journal->j_state_lock); |
889 | 889 | ||
890 | if (commit_transaction->t_checkpoint_list == NULL) { | 890 | if (commit_transaction->t_checkpoint_list == NULL && |
891 | commit_transaction->t_checkpoint_io_list == NULL) { | ||
891 | __journal_drop_transaction(journal, commit_transaction); | 892 | __journal_drop_transaction(journal, commit_transaction); |
892 | } else { | 893 | } else { |
893 | if (journal->j_checkpoint_transactions == NULL) { | 894 | if (journal->j_checkpoint_transactions == NULL) { |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 824e3b7d4ec1..8db2fa25170b 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -68,6 +68,7 @@ | |||
68 | #include <linux/list.h> | 68 | #include <linux/list.h> |
69 | #include <linux/init.h> | 69 | #include <linux/init.h> |
70 | #endif | 70 | #endif |
71 | #include <linux/log2.h> | ||
71 | 72 | ||
72 | static struct kmem_cache *revoke_record_cache; | 73 | static struct kmem_cache *revoke_record_cache; |
73 | static struct kmem_cache *revoke_table_cache; | 74 | static struct kmem_cache *revoke_table_cache; |
@@ -211,7 +212,7 @@ int journal_init_revoke(journal_t *journal, int hash_size) | |||
211 | journal->j_revoke = journal->j_revoke_table[0]; | 212 | journal->j_revoke = journal->j_revoke_table[0]; |
212 | 213 | ||
213 | /* Check that the hash_size is a power of two */ | 214 | /* Check that the hash_size is a power of two */ |
214 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 215 | J_ASSERT(is_power_of_2(hash_size)); |
215 | 216 | ||
216 | journal->j_revoke->hash_size = hash_size; | 217 | journal->j_revoke->hash_size = hash_size; |
217 | 218 | ||
@@ -238,7 +239,7 @@ int journal_init_revoke(journal_t *journal, int hash_size) | |||
238 | journal->j_revoke = journal->j_revoke_table[1]; | 239 | journal->j_revoke = journal->j_revoke_table[1]; |
239 | 240 | ||
240 | /* Check that the hash_size is a power of two */ | 241 | /* Check that the hash_size is a power of two */ |
241 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 242 | J_ASSERT(is_power_of_2(hash_size)); |
242 | 243 | ||
243 | journal->j_revoke->hash_size = hash_size; | 244 | journal->j_revoke->hash_size = hash_size; |
244 | 245 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 2856e1100a5f..c0f59d1b13dc 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -896,7 +896,8 @@ restart_loop: | |||
896 | journal->j_committing_transaction = NULL; | 896 | journal->j_committing_transaction = NULL; |
897 | spin_unlock(&journal->j_state_lock); | 897 | spin_unlock(&journal->j_state_lock); |
898 | 898 | ||
899 | if (commit_transaction->t_checkpoint_list == NULL) { | 899 | if (commit_transaction->t_checkpoint_list == NULL && |
900 | commit_transaction->t_checkpoint_io_list == NULL) { | ||
900 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 901 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
901 | } else { | 902 | } else { |
902 | if (journal->j_checkpoint_transactions == NULL) { | 903 | if (journal->j_checkpoint_transactions == NULL) { |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 9246e763da78..28cac049a56b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -68,6 +68,7 @@ | |||
68 | #include <linux/list.h> | 68 | #include <linux/list.h> |
69 | #include <linux/init.h> | 69 | #include <linux/init.h> |
70 | #endif | 70 | #endif |
71 | #include <linux/log2.h> | ||
71 | 72 | ||
72 | static struct kmem_cache *jbd2_revoke_record_cache; | 73 | static struct kmem_cache *jbd2_revoke_record_cache; |
73 | static struct kmem_cache *jbd2_revoke_table_cache; | 74 | static struct kmem_cache *jbd2_revoke_table_cache; |
@@ -212,7 +213,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) | |||
212 | journal->j_revoke = journal->j_revoke_table[0]; | 213 | journal->j_revoke = journal->j_revoke_table[0]; |
213 | 214 | ||
214 | /* Check that the hash_size is a power of two */ | 215 | /* Check that the hash_size is a power of two */ |
215 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 216 | J_ASSERT(is_power_of_2(hash_size)); |
216 | 217 | ||
217 | journal->j_revoke->hash_size = hash_size; | 218 | journal->j_revoke->hash_size = hash_size; |
218 | 219 | ||
@@ -239,7 +240,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) | |||
239 | journal->j_revoke = journal->j_revoke_table[1]; | 240 | journal->j_revoke = journal->j_revoke_table[1]; |
240 | 241 | ||
241 | /* Check that the hash_size is a power of two */ | 242 | /* Check that the hash_size is a power of two */ |
242 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 243 | J_ASSERT(is_power_of_2(hash_size)); |
243 | 244 | ||
244 | journal->j_revoke->hash_size = hash_size; | 245 | journal->j_revoke->hash_size = hash_size; |
245 | 246 | ||
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 0c82dfcfd246..143c5530caf3 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c | |||
@@ -81,6 +81,7 @@ static int jffs2_garbage_collect_thread(void *_c) | |||
81 | 81 | ||
82 | set_user_nice(current, 10); | 82 | set_user_nice(current, 10); |
83 | 83 | ||
84 | set_freezable(); | ||
84 | for (;;) { | 85 | for (;;) { |
85 | allow_signal(SIGHUP); | 86 | allow_signal(SIGHUP); |
86 | 87 | ||
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 2374b595f2e1..f0ec72b263f1 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); | |||
32 | extern void jfs_free_zero_link(struct inode *); | 32 | extern void jfs_free_zero_link(struct inode *); |
33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); | 33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); |
34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); | 34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); |
35 | extern struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp); | ||
35 | extern void jfs_set_inode_flags(struct inode *); | 36 | extern void jfs_set_inode_flags(struct inode *); |
36 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 37 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
37 | 38 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 25161c4121e4..932797ba433b 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1477,6 +1477,38 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
1477 | return dentry; | 1477 | return dentry; |
1478 | } | 1478 | } |
1479 | 1479 | ||
1480 | struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | ||
1481 | { | ||
1482 | __u32 *objp = vobjp; | ||
1483 | unsigned long ino = objp[0]; | ||
1484 | __u32 generation = objp[1]; | ||
1485 | struct inode *inode; | ||
1486 | struct dentry *result; | ||
1487 | |||
1488 | if (ino == 0) | ||
1489 | return ERR_PTR(-ESTALE); | ||
1490 | inode = iget(sb, ino); | ||
1491 | if (inode == NULL) | ||
1492 | return ERR_PTR(-ENOMEM); | ||
1493 | |||
1494 | if (is_bad_inode(inode) || | ||
1495 | (generation && inode->i_generation != generation)) { | ||
1496 | result = ERR_PTR(-ESTALE); | ||
1497 | goto out_iput; | ||
1498 | } | ||
1499 | |||
1500 | result = d_alloc_anon(inode); | ||
1501 | if (!result) { | ||
1502 | result = ERR_PTR(-ENOMEM); | ||
1503 | goto out_iput; | ||
1504 | } | ||
1505 | return result; | ||
1506 | |||
1507 | out_iput: | ||
1508 | iput(inode); | ||
1509 | return result; | ||
1510 | } | ||
1511 | |||
1480 | struct dentry *jfs_get_parent(struct dentry *dentry) | 1512 | struct dentry *jfs_get_parent(struct dentry *dentry) |
1481 | { | 1513 | { |
1482 | struct super_block *sb = dentry->d_inode->i_sb; | 1514 | struct super_block *sb = dentry->d_inode->i_sb; |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 20e4ac1c79a3..929fceca7999 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <linux/posix_acl.h> | 28 | #include <linux/posix_acl.h> |
29 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
30 | #include <linux/exportfs.h> | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <linux/seq_file.h> | 32 | #include <linux/seq_file.h> |
32 | 33 | ||
@@ -737,6 +738,7 @@ static const struct super_operations jfs_super_operations = { | |||
737 | }; | 738 | }; |
738 | 739 | ||
739 | static struct export_operations jfs_export_operations = { | 740 | static struct export_operations jfs_export_operations = { |
741 | .get_dentry = jfs_get_dentry, | ||
740 | .get_parent = jfs_get_parent, | 742 | .get_parent = jfs_get_parent, |
741 | }; | 743 | }; |
742 | 744 | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 26809325469c..82e2192a0d5c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/smp.h> | 25 | #include <linux/smp.h> |
26 | #include <linux/smp_lock.h> | 26 | #include <linux/smp_lock.h> |
27 | #include <linux/mutex.h> | 27 | #include <linux/mutex.h> |
28 | #include <linux/freezer.h> | ||
28 | 29 | ||
29 | #include <linux/sunrpc/types.h> | 30 | #include <linux/sunrpc/types.h> |
30 | #include <linux/sunrpc/stats.h> | 31 | #include <linux/sunrpc/stats.h> |
@@ -75,18 +76,31 @@ static const int nlm_port_min = 0, nlm_port_max = 65535; | |||
75 | 76 | ||
76 | static struct ctl_table_header * nlm_sysctl_table; | 77 | static struct ctl_table_header * nlm_sysctl_table; |
77 | 78 | ||
78 | static unsigned long set_grace_period(void) | 79 | static unsigned long get_lockd_grace_period(void) |
79 | { | 80 | { |
80 | unsigned long grace_period; | ||
81 | |||
82 | /* Note: nlm_timeout should always be nonzero */ | 81 | /* Note: nlm_timeout should always be nonzero */ |
83 | if (nlm_grace_period) | 82 | if (nlm_grace_period) |
84 | grace_period = ((nlm_grace_period + nlm_timeout - 1) | 83 | return roundup(nlm_grace_period, nlm_timeout) * HZ; |
85 | / nlm_timeout) * nlm_timeout * HZ; | ||
86 | else | 84 | else |
87 | grace_period = nlm_timeout * 5 * HZ; | 85 | return nlm_timeout * 5 * HZ; |
86 | } | ||
87 | |||
88 | unsigned long get_nfs_grace_period(void) | ||
89 | { | ||
90 | unsigned long lockdgrace = get_lockd_grace_period(); | ||
91 | unsigned long nfsdgrace = 0; | ||
92 | |||
93 | if (nlmsvc_ops) | ||
94 | nfsdgrace = nlmsvc_ops->get_grace_period(); | ||
95 | |||
96 | return max(lockdgrace, nfsdgrace); | ||
97 | } | ||
98 | EXPORT_SYMBOL(get_nfs_grace_period); | ||
99 | |||
100 | static unsigned long set_grace_period(void) | ||
101 | { | ||
88 | nlmsvc_grace_period = 1; | 102 | nlmsvc_grace_period = 1; |
89 | return grace_period + jiffies; | 103 | return get_nfs_grace_period() + jiffies; |
90 | } | 104 | } |
91 | 105 | ||
92 | static inline void clear_grace_period(void) | 106 | static inline void clear_grace_period(void) |
@@ -119,6 +133,7 @@ lockd(struct svc_rqst *rqstp) | |||
119 | complete(&lockd_start_done); | 133 | complete(&lockd_start_done); |
120 | 134 | ||
121 | daemonize("lockd"); | 135 | daemonize("lockd"); |
136 | set_freezable(); | ||
122 | 137 | ||
123 | /* Process request with signals blocked, but allow SIGKILL. */ | 138 | /* Process request with signals blocked, but allow SIGKILL. */ |
124 | allow_signal(SIGKILL); | 139 | allow_signal(SIGKILL); |
diff --git a/fs/mbcache.c b/fs/mbcache.c index deeb9dc062d9..fbb1d02f8791 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -100,7 +100,6 @@ struct mb_cache { | |||
100 | static LIST_HEAD(mb_cache_list); | 100 | static LIST_HEAD(mb_cache_list); |
101 | static LIST_HEAD(mb_cache_lru_list); | 101 | static LIST_HEAD(mb_cache_lru_list); |
102 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 102 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
103 | static struct shrinker *mb_shrinker; | ||
104 | 103 | ||
105 | static inline int | 104 | static inline int |
106 | mb_cache_indexes(struct mb_cache *cache) | 105 | mb_cache_indexes(struct mb_cache *cache) |
@@ -118,6 +117,10 @@ mb_cache_indexes(struct mb_cache *cache) | |||
118 | 117 | ||
119 | static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); | 118 | static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); |
120 | 119 | ||
120 | static struct shrinker mb_cache_shrinker = { | ||
121 | .shrink = mb_cache_shrink_fn, | ||
122 | .seeks = DEFAULT_SEEKS, | ||
123 | }; | ||
121 | 124 | ||
122 | static inline int | 125 | static inline int |
123 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 126 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) |
@@ -662,13 +665,13 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, | |||
662 | 665 | ||
663 | static int __init init_mbcache(void) | 666 | static int __init init_mbcache(void) |
664 | { | 667 | { |
665 | mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn); | 668 | register_shrinker(&mb_cache_shrinker); |
666 | return 0; | 669 | return 0; |
667 | } | 670 | } |
668 | 671 | ||
669 | static void __exit exit_mbcache(void) | 672 | static void __exit exit_mbcache(void) |
670 | { | 673 | { |
671 | remove_shrinker(mb_shrinker); | 674 | unregister_shrinker(&mb_cache_shrinker); |
672 | } | 675 | } |
673 | 676 | ||
674 | module_init(init_mbcache) | 677 | module_init(init_mbcache) |
diff --git a/fs/namespace.c b/fs/namespace.c index b696e3a0d18f..4198003d7e18 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
30 | #include "pnode.h" | 30 | #include "pnode.h" |
31 | #include "internal.h" | ||
31 | 32 | ||
32 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ | 33 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ |
33 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | 34 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); |
@@ -320,22 +321,16 @@ EXPORT_SYMBOL(mnt_unpin); | |||
320 | static void *m_start(struct seq_file *m, loff_t *pos) | 321 | static void *m_start(struct seq_file *m, loff_t *pos) |
321 | { | 322 | { |
322 | struct mnt_namespace *n = m->private; | 323 | struct mnt_namespace *n = m->private; |
323 | struct list_head *p; | ||
324 | loff_t l = *pos; | ||
325 | 324 | ||
326 | down_read(&namespace_sem); | 325 | down_read(&namespace_sem); |
327 | list_for_each(p, &n->list) | 326 | return seq_list_start(&n->list, *pos); |
328 | if (!l--) | ||
329 | return list_entry(p, struct vfsmount, mnt_list); | ||
330 | return NULL; | ||
331 | } | 327 | } |
332 | 328 | ||
333 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 329 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
334 | { | 330 | { |
335 | struct mnt_namespace *n = m->private; | 331 | struct mnt_namespace *n = m->private; |
336 | struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; | 332 | |
337 | (*pos)++; | 333 | return seq_list_next(v, &n->list, pos); |
338 | return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); | ||
339 | } | 334 | } |
340 | 335 | ||
341 | static void m_stop(struct seq_file *m, void *v) | 336 | static void m_stop(struct seq_file *m, void *v) |
@@ -350,7 +345,7 @@ static inline void mangle(struct seq_file *m, const char *s) | |||
350 | 345 | ||
351 | static int show_vfsmnt(struct seq_file *m, void *v) | 346 | static int show_vfsmnt(struct seq_file *m, void *v) |
352 | { | 347 | { |
353 | struct vfsmount *mnt = v; | 348 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); |
354 | int err = 0; | 349 | int err = 0; |
355 | static struct proc_fs_info { | 350 | static struct proc_fs_info { |
356 | int flag; | 351 | int flag; |
@@ -405,7 +400,7 @@ struct seq_operations mounts_op = { | |||
405 | 400 | ||
406 | static int show_vfsstat(struct seq_file *m, void *v) | 401 | static int show_vfsstat(struct seq_file *m, void *v) |
407 | { | 402 | { |
408 | struct vfsmount *mnt = v; | 403 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); |
409 | int err = 0; | 404 | int err = 0; |
410 | 405 | ||
411 | /* device */ | 406 | /* device */ |
@@ -1457,7 +1452,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
1457 | 1452 | ||
1458 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 1453 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
1459 | if (!new_ns) | 1454 | if (!new_ns) |
1460 | return NULL; | 1455 | return ERR_PTR(-ENOMEM); |
1461 | 1456 | ||
1462 | atomic_set(&new_ns->count, 1); | 1457 | atomic_set(&new_ns->count, 1); |
1463 | INIT_LIST_HEAD(&new_ns->list); | 1458 | INIT_LIST_HEAD(&new_ns->list); |
@@ -1471,7 +1466,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
1471 | if (!new_ns->root) { | 1466 | if (!new_ns->root) { |
1472 | up_write(&namespace_sem); | 1467 | up_write(&namespace_sem); |
1473 | kfree(new_ns); | 1468 | kfree(new_ns); |
1474 | return NULL; | 1469 | return ERR_PTR(-ENOMEM);; |
1475 | } | 1470 | } |
1476 | spin_lock(&vfsmount_lock); | 1471 | spin_lock(&vfsmount_lock); |
1477 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); | 1472 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); |
@@ -1515,7 +1510,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
1515 | return new_ns; | 1510 | return new_ns; |
1516 | } | 1511 | } |
1517 | 1512 | ||
1518 | struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, | 1513 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, |
1519 | struct fs_struct *new_fs) | 1514 | struct fs_struct *new_fs) |
1520 | { | 1515 | { |
1521 | struct mnt_namespace *new_ns; | 1516 | struct mnt_namespace *new_ns; |
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index d3152f8d95c6..2b145de45b39 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -203,7 +203,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
203 | 203 | ||
204 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { | 204 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { |
205 | if (pos >= MAX_NON_LFS) { | 205 | if (pos >= MAX_NON_LFS) { |
206 | send_sig(SIGXFSZ, current, 0); | ||
207 | return -EFBIG; | 206 | return -EFBIG; |
208 | } | 207 | } |
209 | if (count > MAX_NON_LFS - (u32)pos) { | 208 | if (count > MAX_NON_LFS - (u32)pos) { |
@@ -212,7 +211,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
212 | } | 211 | } |
213 | if (pos >= inode->i_sb->s_maxbytes) { | 212 | if (pos >= inode->i_sb->s_maxbytes) { |
214 | if (count || pos > inode->i_sb->s_maxbytes) { | 213 | if (count || pos > inode->i_sb->s_maxbytes) { |
215 | send_sig(SIGXFSZ, current, 0); | ||
216 | return -EFBIG; | 214 | return -EFBIG; |
217 | } | 215 | } |
218 | } | 216 | } |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f309c8741a..a796be5051bf 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sunrpc/svcsock.h> | 14 | #include <linux/sunrpc/svcsock.h> |
15 | #include <linux/nfs_fs.h> | 15 | #include <linux/nfs_fs.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/freezer.h> | ||
17 | 18 | ||
18 | #include <net/inet_sock.h> | 19 | #include <net/inet_sock.h> |
19 | 20 | ||
@@ -67,6 +68,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
67 | daemonize("nfsv4-svc"); | 68 | daemonize("nfsv4-svc"); |
68 | /* Process request with signals blocked, but allow SIGKILL. */ | 69 | /* Process request with signals blocked, but allow SIGKILL. */ |
69 | allow_signal(SIGKILL); | 70 | allow_signal(SIGKILL); |
71 | set_freezable(); | ||
70 | 72 | ||
71 | complete(&nfs_callback_info.started); | 73 | complete(&nfs_callback_info.started); |
72 | 74 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ccb455053ee4..a49f9feff776 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -1206,23 +1206,9 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1206 | */ | 1206 | */ |
1207 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1207 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
1208 | { | 1208 | { |
1209 | struct list_head *_p; | ||
1210 | loff_t pos = *_pos; | ||
1211 | |||
1212 | /* lock the list against modification */ | 1209 | /* lock the list against modification */ |
1213 | spin_lock(&nfs_client_lock); | 1210 | spin_lock(&nfs_client_lock); |
1214 | 1211 | return seq_list_start_head(&nfs_client_list, *_pos); | |
1215 | /* allow for the header line */ | ||
1216 | if (!pos) | ||
1217 | return SEQ_START_TOKEN; | ||
1218 | pos--; | ||
1219 | |||
1220 | /* find the n'th element in the list */ | ||
1221 | list_for_each(_p, &nfs_client_list) | ||
1222 | if (!pos--) | ||
1223 | break; | ||
1224 | |||
1225 | return _p != &nfs_client_list ? _p : NULL; | ||
1226 | } | 1212 | } |
1227 | 1213 | ||
1228 | /* | 1214 | /* |
@@ -1230,14 +1216,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | |||
1230 | */ | 1216 | */ |
1231 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | 1217 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) |
1232 | { | 1218 | { |
1233 | struct list_head *_p; | 1219 | return seq_list_next(v, &nfs_client_list, pos); |
1234 | |||
1235 | (*pos)++; | ||
1236 | |||
1237 | _p = v; | ||
1238 | _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next; | ||
1239 | |||
1240 | return _p != &nfs_client_list ? _p : NULL; | ||
1241 | } | 1220 | } |
1242 | 1221 | ||
1243 | /* | 1222 | /* |
@@ -1256,7 +1235,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1256 | struct nfs_client *clp; | 1235 | struct nfs_client *clp; |
1257 | 1236 | ||
1258 | /* display header on line 1 */ | 1237 | /* display header on line 1 */ |
1259 | if (v == SEQ_START_TOKEN) { | 1238 | if (v == &nfs_client_list) { |
1260 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); | 1239 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); |
1261 | return 0; | 1240 | return 0; |
1262 | } | 1241 | } |
@@ -1297,23 +1276,9 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1297 | */ | 1276 | */ |
1298 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1277 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
1299 | { | 1278 | { |
1300 | struct list_head *_p; | ||
1301 | loff_t pos = *_pos; | ||
1302 | |||
1303 | /* lock the list against modification */ | 1279 | /* lock the list against modification */ |
1304 | spin_lock(&nfs_client_lock); | 1280 | spin_lock(&nfs_client_lock); |
1305 | 1281 | return seq_list_start_head(&nfs_volume_list, *_pos); | |
1306 | /* allow for the header line */ | ||
1307 | if (!pos) | ||
1308 | return SEQ_START_TOKEN; | ||
1309 | pos--; | ||
1310 | |||
1311 | /* find the n'th element in the list */ | ||
1312 | list_for_each(_p, &nfs_volume_list) | ||
1313 | if (!pos--) | ||
1314 | break; | ||
1315 | |||
1316 | return _p != &nfs_volume_list ? _p : NULL; | ||
1317 | } | 1282 | } |
1318 | 1283 | ||
1319 | /* | 1284 | /* |
@@ -1321,14 +1286,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | |||
1321 | */ | 1286 | */ |
1322 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | 1287 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) |
1323 | { | 1288 | { |
1324 | struct list_head *_p; | 1289 | return seq_list_next(v, &nfs_volume_list, pos); |
1325 | |||
1326 | (*pos)++; | ||
1327 | |||
1328 | _p = v; | ||
1329 | _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next; | ||
1330 | |||
1331 | return _p != &nfs_volume_list ? _p : NULL; | ||
1332 | } | 1290 | } |
1333 | 1291 | ||
1334 | /* | 1292 | /* |
@@ -1349,7 +1307,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1349 | char dev[8], fsid[17]; | 1307 | char dev[8], fsid[17]; |
1350 | 1308 | ||
1351 | /* display header on line 1 */ | 1309 | /* display header on line 1 */ |
1352 | if (v == SEQ_START_TOKEN) { | 1310 | if (v == &nfs_volume_list) { |
1353 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); | 1311 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); |
1354 | return 0; | 1312 | return 0; |
1355 | } | 1313 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a2b1af89ca1a..adffe1615c51 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -300,7 +300,10 @@ static const struct super_operations nfs4_sops = { | |||
300 | }; | 300 | }; |
301 | #endif | 301 | #endif |
302 | 302 | ||
303 | static struct shrinker *acl_shrinker; | 303 | static struct shrinker acl_shrinker = { |
304 | .shrink = nfs_access_cache_shrinker, | ||
305 | .seeks = DEFAULT_SEEKS, | ||
306 | }; | ||
304 | 307 | ||
305 | /* | 308 | /* |
306 | * Register the NFS filesystems | 309 | * Register the NFS filesystems |
@@ -321,7 +324,7 @@ int __init register_nfs_fs(void) | |||
321 | if (ret < 0) | 324 | if (ret < 0) |
322 | goto error_2; | 325 | goto error_2; |
323 | #endif | 326 | #endif |
324 | acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker); | 327 | register_shrinker(&acl_shrinker); |
325 | return 0; | 328 | return 0; |
326 | 329 | ||
327 | #ifdef CONFIG_NFS_V4 | 330 | #ifdef CONFIG_NFS_V4 |
@@ -339,8 +342,7 @@ error_0: | |||
339 | */ | 342 | */ |
340 | void __exit unregister_nfs_fs(void) | 343 | void __exit unregister_nfs_fs(void) |
341 | { | 344 | { |
342 | if (acl_shrinker != NULL) | 345 | unregister_shrinker(&acl_shrinker); |
343 | remove_shrinker(acl_shrinker); | ||
344 | #ifdef CONFIG_NFS_V4 | 346 | #ifdef CONFIG_NFS_V4 |
345 | unregister_filesystem(&nfs4_fs_type); | 347 | unregister_filesystem(&nfs4_fs_type); |
346 | nfs_unregister_sysctl(); | 348 | nfs_unregister_sysctl(); |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 6e92b0fe5323..cf61dc8ae942 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -12,17 +12,31 @@ | |||
12 | 12 | ||
13 | #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) | 13 | #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) |
14 | 14 | ||
15 | static int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) | ||
16 | { | ||
17 | struct exp_flavor_info *f; | ||
18 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
19 | |||
20 | for (f = exp->ex_flavors; f < end; f++) { | ||
21 | if (f->pseudoflavor == rqstp->rq_flavor) | ||
22 | return f->flags; | ||
23 | } | ||
24 | return exp->ex_flags; | ||
25 | |||
26 | } | ||
27 | |||
15 | int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | 28 | int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) |
16 | { | 29 | { |
17 | struct svc_cred cred = rqstp->rq_cred; | 30 | struct svc_cred cred = rqstp->rq_cred; |
18 | int i; | 31 | int i; |
32 | int flags = nfsexp_flags(rqstp, exp); | ||
19 | int ret; | 33 | int ret; |
20 | 34 | ||
21 | if (exp->ex_flags & NFSEXP_ALLSQUASH) { | 35 | if (flags & NFSEXP_ALLSQUASH) { |
22 | cred.cr_uid = exp->ex_anon_uid; | 36 | cred.cr_uid = exp->ex_anon_uid; |
23 | cred.cr_gid = exp->ex_anon_gid; | 37 | cred.cr_gid = exp->ex_anon_gid; |
24 | cred.cr_group_info = groups_alloc(0); | 38 | cred.cr_group_info = groups_alloc(0); |
25 | } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { | 39 | } else if (flags & NFSEXP_ROOTSQUASH) { |
26 | struct group_info *gi; | 40 | struct group_info *gi; |
27 | if (!cred.cr_uid) | 41 | if (!cred.cr_uid) |
28 | cred.cr_uid = exp->ex_anon_uid; | 42 | cred.cr_uid = exp->ex_anon_uid; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 79bd03b8bbf8..c7bbf460b009 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -26,12 +26,15 @@ | |||
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/hash.h> | 27 | #include <linux/hash.h> |
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/exportfs.h> | ||
29 | 30 | ||
30 | #include <linux/sunrpc/svc.h> | 31 | #include <linux/sunrpc/svc.h> |
31 | #include <linux/nfsd/nfsd.h> | 32 | #include <linux/nfsd/nfsd.h> |
32 | #include <linux/nfsd/nfsfh.h> | 33 | #include <linux/nfsd/nfsfh.h> |
33 | #include <linux/nfsd/syscall.h> | 34 | #include <linux/nfsd/syscall.h> |
34 | #include <linux/lockd/bind.h> | 35 | #include <linux/lockd/bind.h> |
36 | #include <linux/sunrpc/msg_prot.h> | ||
37 | #include <linux/sunrpc/gss_api.h> | ||
35 | 38 | ||
36 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT | 39 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT |
37 | 40 | ||
@@ -451,8 +454,48 @@ out_free_all: | |||
451 | return err; | 454 | return err; |
452 | } | 455 | } |
453 | 456 | ||
457 | static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) | ||
458 | { | ||
459 | int listsize, err; | ||
460 | struct exp_flavor_info *f; | ||
461 | |||
462 | err = get_int(mesg, &listsize); | ||
463 | if (err) | ||
464 | return err; | ||
465 | if (listsize < 0 || listsize > MAX_SECINFO_LIST) | ||
466 | return -EINVAL; | ||
467 | |||
468 | for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { | ||
469 | err = get_int(mesg, &f->pseudoflavor); | ||
470 | if (err) | ||
471 | return err; | ||
472 | /* | ||
473 | * Just a quick sanity check; we could also try to check | ||
474 | * whether this pseudoflavor is supported, but at worst | ||
475 | * an unsupported pseudoflavor on the export would just | ||
476 | * be a pseudoflavor that won't match the flavor of any | ||
477 | * authenticated request. The administrator will | ||
478 | * probably discover the problem when someone fails to | ||
479 | * authenticate. | ||
480 | */ | ||
481 | if (f->pseudoflavor < 0) | ||
482 | return -EINVAL; | ||
483 | err = get_int(mesg, &f->flags); | ||
484 | if (err) | ||
485 | return err; | ||
486 | /* Only some flags are allowed to differ between flavors: */ | ||
487 | if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp->ex_flags)) | ||
488 | return -EINVAL; | ||
489 | } | ||
490 | exp->ex_nflavors = listsize; | ||
491 | return 0; | ||
492 | } | ||
493 | |||
454 | #else /* CONFIG_NFSD_V4 */ | 494 | #else /* CONFIG_NFSD_V4 */ |
455 | static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; } | 495 | static inline int |
496 | fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc){return 0;} | ||
497 | static inline int | ||
498 | secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } | ||
456 | #endif | 499 | #endif |
457 | 500 | ||
458 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | 501 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) |
@@ -476,6 +519,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
476 | 519 | ||
477 | exp.ex_uuid = NULL; | 520 | exp.ex_uuid = NULL; |
478 | 521 | ||
522 | /* secinfo */ | ||
523 | exp.ex_nflavors = 0; | ||
524 | |||
479 | if (mesg[mlen-1] != '\n') | 525 | if (mesg[mlen-1] != '\n') |
480 | return -EINVAL; | 526 | return -EINVAL; |
481 | mesg[mlen-1] = 0; | 527 | mesg[mlen-1] = 0; |
@@ -553,7 +599,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
553 | if (exp.ex_uuid == NULL) | 599 | if (exp.ex_uuid == NULL) |
554 | err = -ENOMEM; | 600 | err = -ENOMEM; |
555 | } | 601 | } |
556 | } else | 602 | } else if (strcmp(buf, "secinfo") == 0) |
603 | err = secinfo_parse(&mesg, buf, &exp); | ||
604 | else | ||
557 | /* quietly ignore unknown words and anything | 605 | /* quietly ignore unknown words and anything |
558 | * following. Newer user-space can try to set | 606 | * following. Newer user-space can try to set |
559 | * new values, then see what the result was. | 607 | * new values, then see what the result was. |
@@ -593,6 +641,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
593 | 641 | ||
594 | static void exp_flags(struct seq_file *m, int flag, int fsid, | 642 | static void exp_flags(struct seq_file *m, int flag, int fsid, |
595 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); | 643 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); |
644 | static void show_secinfo(struct seq_file *m, struct svc_export *exp); | ||
596 | 645 | ||
597 | static int svc_export_show(struct seq_file *m, | 646 | static int svc_export_show(struct seq_file *m, |
598 | struct cache_detail *cd, | 647 | struct cache_detail *cd, |
@@ -622,6 +671,7 @@ static int svc_export_show(struct seq_file *m, | |||
622 | seq_printf(m, "%02x", exp->ex_uuid[i]); | 671 | seq_printf(m, "%02x", exp->ex_uuid[i]); |
623 | } | 672 | } |
624 | } | 673 | } |
674 | show_secinfo(m, exp); | ||
625 | } | 675 | } |
626 | seq_puts(m, ")\n"); | 676 | seq_puts(m, ")\n"); |
627 | return 0; | 677 | return 0; |
@@ -654,6 +704,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
654 | { | 704 | { |
655 | struct svc_export *new = container_of(cnew, struct svc_export, h); | 705 | struct svc_export *new = container_of(cnew, struct svc_export, h); |
656 | struct svc_export *item = container_of(citem, struct svc_export, h); | 706 | struct svc_export *item = container_of(citem, struct svc_export, h); |
707 | int i; | ||
657 | 708 | ||
658 | new->ex_flags = item->ex_flags; | 709 | new->ex_flags = item->ex_flags; |
659 | new->ex_anon_uid = item->ex_anon_uid; | 710 | new->ex_anon_uid = item->ex_anon_uid; |
@@ -669,6 +720,10 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
669 | item->ex_fslocs.locations_count = 0; | 720 | item->ex_fslocs.locations_count = 0; |
670 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; | 721 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; |
671 | item->ex_fslocs.migrated = 0; | 722 | item->ex_fslocs.migrated = 0; |
723 | new->ex_nflavors = item->ex_nflavors; | ||
724 | for (i = 0; i < MAX_SECINFO_LIST; i++) { | ||
725 | new->ex_flavors[i] = item->ex_flavors[i]; | ||
726 | } | ||
672 | } | 727 | } |
673 | 728 | ||
674 | static struct cache_head *svc_export_alloc(void) | 729 | static struct cache_head *svc_export_alloc(void) |
@@ -738,16 +793,18 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) | |||
738 | int err; | 793 | int err; |
739 | 794 | ||
740 | if (!clp) | 795 | if (!clp) |
741 | return NULL; | 796 | return ERR_PTR(-ENOENT); |
742 | 797 | ||
743 | key.ek_client = clp; | 798 | key.ek_client = clp; |
744 | key.ek_fsidtype = fsid_type; | 799 | key.ek_fsidtype = fsid_type; |
745 | memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); | 800 | memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); |
746 | 801 | ||
747 | ek = svc_expkey_lookup(&key); | 802 | ek = svc_expkey_lookup(&key); |
748 | if (ek != NULL) | 803 | if (ek == NULL) |
749 | if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp))) | 804 | return ERR_PTR(-ENOMEM); |
750 | ek = ERR_PTR(err); | 805 | err = cache_check(&svc_expkey_cache, &ek->h, reqp); |
806 | if (err) | ||
807 | return ERR_PTR(err); | ||
751 | return ek; | 808 | return ek; |
752 | } | 809 | } |
753 | 810 | ||
@@ -808,30 +865,21 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, | |||
808 | struct cache_req *reqp) | 865 | struct cache_req *reqp) |
809 | { | 866 | { |
810 | struct svc_export *exp, key; | 867 | struct svc_export *exp, key; |
868 | int err; | ||
811 | 869 | ||
812 | if (!clp) | 870 | if (!clp) |
813 | return NULL; | 871 | return ERR_PTR(-ENOENT); |
814 | 872 | ||
815 | key.ex_client = clp; | 873 | key.ex_client = clp; |
816 | key.ex_mnt = mnt; | 874 | key.ex_mnt = mnt; |
817 | key.ex_dentry = dentry; | 875 | key.ex_dentry = dentry; |
818 | 876 | ||
819 | exp = svc_export_lookup(&key); | 877 | exp = svc_export_lookup(&key); |
820 | if (exp != NULL) { | 878 | if (exp == NULL) |
821 | int err; | 879 | return ERR_PTR(-ENOMEM); |
822 | 880 | err = cache_check(&svc_export_cache, &exp->h, reqp); | |
823 | err = cache_check(&svc_export_cache, &exp->h, reqp); | 881 | if (err) |
824 | switch (err) { | 882 | return ERR_PTR(err); |
825 | case 0: break; | ||
826 | case -EAGAIN: | ||
827 | case -ETIMEDOUT: | ||
828 | exp = ERR_PTR(err); | ||
829 | break; | ||
830 | default: | ||
831 | exp = NULL; | ||
832 | } | ||
833 | } | ||
834 | |||
835 | return exp; | 883 | return exp; |
836 | } | 884 | } |
837 | 885 | ||
@@ -847,7 +895,7 @@ exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, | |||
847 | dget(dentry); | 895 | dget(dentry); |
848 | exp = exp_get_by_name(clp, mnt, dentry, reqp); | 896 | exp = exp_get_by_name(clp, mnt, dentry, reqp); |
849 | 897 | ||
850 | while (exp == NULL && !IS_ROOT(dentry)) { | 898 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { |
851 | struct dentry *parent; | 899 | struct dentry *parent; |
852 | 900 | ||
853 | parent = dget_parent(dentry); | 901 | parent = dget_parent(dentry); |
@@ -900,7 +948,7 @@ static void exp_fsid_unhash(struct svc_export *exp) | |||
900 | return; | 948 | return; |
901 | 949 | ||
902 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); | 950 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); |
903 | if (ek && !IS_ERR(ek)) { | 951 | if (!IS_ERR(ek)) { |
904 | ek->h.expiry_time = get_seconds()-1; | 952 | ek->h.expiry_time = get_seconds()-1; |
905 | cache_put(&ek->h, &svc_expkey_cache); | 953 | cache_put(&ek->h, &svc_expkey_cache); |
906 | } | 954 | } |
@@ -938,7 +986,7 @@ static void exp_unhash(struct svc_export *exp) | |||
938 | struct inode *inode = exp->ex_dentry->d_inode; | 986 | struct inode *inode = exp->ex_dentry->d_inode; |
939 | 987 | ||
940 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); | 988 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); |
941 | if (ek && !IS_ERR(ek)) { | 989 | if (!IS_ERR(ek)) { |
942 | ek->h.expiry_time = get_seconds()-1; | 990 | ek->h.expiry_time = get_seconds()-1; |
943 | cache_put(&ek->h, &svc_expkey_cache); | 991 | cache_put(&ek->h, &svc_expkey_cache); |
944 | } | 992 | } |
@@ -989,13 +1037,12 @@ exp_export(struct nfsctl_export *nxp) | |||
989 | 1037 | ||
990 | /* must make sure there won't be an ex_fsid clash */ | 1038 | /* must make sure there won't be an ex_fsid clash */ |
991 | if ((nxp->ex_flags & NFSEXP_FSID) && | 1039 | if ((nxp->ex_flags & NFSEXP_FSID) && |
992 | (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) && | 1040 | (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && |
993 | !IS_ERR(fsid_key) && | ||
994 | fsid_key->ek_mnt && | 1041 | fsid_key->ek_mnt && |
995 | (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) | 1042 | (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) |
996 | goto finish; | 1043 | goto finish; |
997 | 1044 | ||
998 | if (exp) { | 1045 | if (!IS_ERR(exp)) { |
999 | /* just a flags/id/fsid update */ | 1046 | /* just a flags/id/fsid update */ |
1000 | 1047 | ||
1001 | exp_fsid_unhash(exp); | 1048 | exp_fsid_unhash(exp); |
@@ -1104,7 +1151,7 @@ exp_unexport(struct nfsctl_export *nxp) | |||
1104 | err = -EINVAL; | 1151 | err = -EINVAL; |
1105 | exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); | 1152 | exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); |
1106 | path_release(&nd); | 1153 | path_release(&nd); |
1107 | if (!exp) | 1154 | if (IS_ERR(exp)) |
1108 | goto out_domain; | 1155 | goto out_domain; |
1109 | 1156 | ||
1110 | exp_do_unexport(exp); | 1157 | exp_do_unexport(exp); |
@@ -1149,10 +1196,6 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) | |||
1149 | err = PTR_ERR(exp); | 1196 | err = PTR_ERR(exp); |
1150 | goto out; | 1197 | goto out; |
1151 | } | 1198 | } |
1152 | if (!exp) { | ||
1153 | dprintk("nfsd: exp_rootfh export not found.\n"); | ||
1154 | goto out; | ||
1155 | } | ||
1156 | 1199 | ||
1157 | /* | 1200 | /* |
1158 | * fh must be initialized before calling fh_compose | 1201 | * fh must be initialized before calling fh_compose |
@@ -1176,17 +1219,130 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, | |||
1176 | { | 1219 | { |
1177 | struct svc_export *exp; | 1220 | struct svc_export *exp; |
1178 | struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); | 1221 | struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); |
1179 | if (!ek || IS_ERR(ek)) | 1222 | if (IS_ERR(ek)) |
1180 | return ERR_PTR(PTR_ERR(ek)); | 1223 | return ERR_PTR(PTR_ERR(ek)); |
1181 | 1224 | ||
1182 | exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); | 1225 | exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); |
1183 | cache_put(&ek->h, &svc_expkey_cache); | 1226 | cache_put(&ek->h, &svc_expkey_cache); |
1184 | 1227 | ||
1185 | if (!exp || IS_ERR(exp)) | 1228 | if (IS_ERR(exp)) |
1186 | return ERR_PTR(PTR_ERR(exp)); | 1229 | return ERR_PTR(PTR_ERR(exp)); |
1187 | return exp; | 1230 | return exp; |
1188 | } | 1231 | } |
1189 | 1232 | ||
1233 | __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) | ||
1234 | { | ||
1235 | struct exp_flavor_info *f; | ||
1236 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
1237 | |||
1238 | /* legacy gss-only clients are always OK: */ | ||
1239 | if (exp->ex_client == rqstp->rq_gssclient) | ||
1240 | return 0; | ||
1241 | /* ip-address based client; check sec= export option: */ | ||
1242 | for (f = exp->ex_flavors; f < end; f++) { | ||
1243 | if (f->pseudoflavor == rqstp->rq_flavor) | ||
1244 | return 0; | ||
1245 | } | ||
1246 | /* defaults in absence of sec= options: */ | ||
1247 | if (exp->ex_nflavors == 0) { | ||
1248 | if (rqstp->rq_flavor == RPC_AUTH_NULL || | ||
1249 | rqstp->rq_flavor == RPC_AUTH_UNIX) | ||
1250 | return 0; | ||
1251 | } | ||
1252 | return nfserr_wrongsec; | ||
1253 | } | ||
1254 | |||
1255 | /* | ||
1256 | * Uses rq_client and rq_gssclient to find an export; uses rq_client (an | ||
1257 | * auth_unix client) if it's available and has secinfo information; | ||
1258 | * otherwise, will try to use rq_gssclient. | ||
1259 | * | ||
1260 | * Called from functions that handle requests; functions that do work on | ||
1261 | * behalf of mountd are passed a single client name to use, and should | ||
1262 | * use exp_get_by_name() or exp_find(). | ||
1263 | */ | ||
1264 | struct svc_export * | ||
1265 | rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, | ||
1266 | struct dentry *dentry) | ||
1267 | { | ||
1268 | struct svc_export *gssexp, *exp = NULL; | ||
1269 | |||
1270 | if (rqstp->rq_client == NULL) | ||
1271 | goto gss; | ||
1272 | |||
1273 | /* First try the auth_unix client: */ | ||
1274 | exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, | ||
1275 | &rqstp->rq_chandle); | ||
1276 | if (PTR_ERR(exp) == -ENOENT) | ||
1277 | goto gss; | ||
1278 | if (IS_ERR(exp)) | ||
1279 | return exp; | ||
1280 | /* If it has secinfo, assume there are no gss/... clients */ | ||
1281 | if (exp->ex_nflavors > 0) | ||
1282 | return exp; | ||
1283 | gss: | ||
1284 | /* Otherwise, try falling back on gss client */ | ||
1285 | if (rqstp->rq_gssclient == NULL) | ||
1286 | return exp; | ||
1287 | gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, | ||
1288 | &rqstp->rq_chandle); | ||
1289 | if (PTR_ERR(gssexp) == -ENOENT) | ||
1290 | return exp; | ||
1291 | if (exp && !IS_ERR(exp)) | ||
1292 | exp_put(exp); | ||
1293 | return gssexp; | ||
1294 | } | ||
1295 | |||
1296 | struct svc_export * | ||
1297 | rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) | ||
1298 | { | ||
1299 | struct svc_export *gssexp, *exp = NULL; | ||
1300 | |||
1301 | if (rqstp->rq_client == NULL) | ||
1302 | goto gss; | ||
1303 | |||
1304 | /* First try the auth_unix client: */ | ||
1305 | exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle); | ||
1306 | if (PTR_ERR(exp) == -ENOENT) | ||
1307 | goto gss; | ||
1308 | if (IS_ERR(exp)) | ||
1309 | return exp; | ||
1310 | /* If it has secinfo, assume there are no gss/... clients */ | ||
1311 | if (exp->ex_nflavors > 0) | ||
1312 | return exp; | ||
1313 | gss: | ||
1314 | /* Otherwise, try falling back on gss client */ | ||
1315 | if (rqstp->rq_gssclient == NULL) | ||
1316 | return exp; | ||
1317 | gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv, | ||
1318 | &rqstp->rq_chandle); | ||
1319 | if (PTR_ERR(gssexp) == -ENOENT) | ||
1320 | return exp; | ||
1321 | if (exp && !IS_ERR(exp)) | ||
1322 | exp_put(exp); | ||
1323 | return gssexp; | ||
1324 | } | ||
1325 | |||
1326 | struct svc_export * | ||
1327 | rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, | ||
1328 | struct dentry *dentry) | ||
1329 | { | ||
1330 | struct svc_export *exp; | ||
1331 | |||
1332 | dget(dentry); | ||
1333 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | ||
1334 | |||
1335 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { | ||
1336 | struct dentry *parent; | ||
1337 | |||
1338 | parent = dget_parent(dentry); | ||
1339 | dput(dentry); | ||
1340 | dentry = parent; | ||
1341 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | ||
1342 | } | ||
1343 | dput(dentry); | ||
1344 | return exp; | ||
1345 | } | ||
1190 | 1346 | ||
1191 | /* | 1347 | /* |
1192 | * Called when we need the filehandle for the root of the pseudofs, | 1348 | * Called when we need the filehandle for the root of the pseudofs, |
@@ -1194,8 +1350,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, | |||
1194 | * export point with fsid==0 | 1350 | * export point with fsid==0 |
1195 | */ | 1351 | */ |
1196 | __be32 | 1352 | __be32 |
1197 | exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, | 1353 | exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) |
1198 | struct cache_req *creq) | ||
1199 | { | 1354 | { |
1200 | struct svc_export *exp; | 1355 | struct svc_export *exp; |
1201 | __be32 rv; | 1356 | __be32 rv; |
@@ -1203,12 +1358,16 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, | |||
1203 | 1358 | ||
1204 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); | 1359 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); |
1205 | 1360 | ||
1206 | exp = exp_find(clp, FSID_NUM, fsidv, creq); | 1361 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); |
1362 | if (PTR_ERR(exp) == -ENOENT) | ||
1363 | return nfserr_perm; | ||
1207 | if (IS_ERR(exp)) | 1364 | if (IS_ERR(exp)) |
1208 | return nfserrno(PTR_ERR(exp)); | 1365 | return nfserrno(PTR_ERR(exp)); |
1209 | if (exp == NULL) | ||
1210 | return nfserr_perm; | ||
1211 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); | 1366 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); |
1367 | if (rv) | ||
1368 | goto out; | ||
1369 | rv = check_nfsd_access(exp, rqstp); | ||
1370 | out: | ||
1212 | exp_put(exp); | 1371 | exp_put(exp); |
1213 | return rv; | 1372 | return rv; |
1214 | } | 1373 | } |
@@ -1296,28 +1455,62 @@ static struct flags { | |||
1296 | { 0, {"", ""}} | 1455 | { 0, {"", ""}} |
1297 | }; | 1456 | }; |
1298 | 1457 | ||
1299 | static void exp_flags(struct seq_file *m, int flag, int fsid, | 1458 | static void show_expflags(struct seq_file *m, int flags, int mask) |
1300 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) | ||
1301 | { | 1459 | { |
1302 | int first = 0; | ||
1303 | struct flags *flg; | 1460 | struct flags *flg; |
1461 | int state, first = 0; | ||
1304 | 1462 | ||
1305 | for (flg = expflags; flg->flag; flg++) { | 1463 | for (flg = expflags; flg->flag; flg++) { |
1306 | int state = (flg->flag & flag)?0:1; | 1464 | if (flg->flag & ~mask) |
1465 | continue; | ||
1466 | state = (flg->flag & flags) ? 0 : 1; | ||
1307 | if (*flg->name[state]) | 1467 | if (*flg->name[state]) |
1308 | seq_printf(m, "%s%s", first++?",":"", flg->name[state]); | 1468 | seq_printf(m, "%s%s", first++?",":"", flg->name[state]); |
1309 | } | 1469 | } |
1470 | } | ||
1471 | |||
1472 | static void show_secinfo_flags(struct seq_file *m, int flags) | ||
1473 | { | ||
1474 | seq_printf(m, ","); | ||
1475 | show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); | ||
1476 | } | ||
1477 | |||
1478 | static void show_secinfo(struct seq_file *m, struct svc_export *exp) | ||
1479 | { | ||
1480 | struct exp_flavor_info *f; | ||
1481 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
1482 | int lastflags = 0, first = 0; | ||
1483 | |||
1484 | if (exp->ex_nflavors == 0) | ||
1485 | return; | ||
1486 | for (f = exp->ex_flavors; f < end; f++) { | ||
1487 | if (first || f->flags != lastflags) { | ||
1488 | if (!first) | ||
1489 | show_secinfo_flags(m, lastflags); | ||
1490 | seq_printf(m, ",sec=%d", f->pseudoflavor); | ||
1491 | lastflags = f->flags; | ||
1492 | } else { | ||
1493 | seq_printf(m, ":%d", f->pseudoflavor); | ||
1494 | } | ||
1495 | } | ||
1496 | show_secinfo_flags(m, lastflags); | ||
1497 | } | ||
1498 | |||
1499 | static void exp_flags(struct seq_file *m, int flag, int fsid, | ||
1500 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) | ||
1501 | { | ||
1502 | show_expflags(m, flag, NFSEXP_ALLFLAGS); | ||
1310 | if (flag & NFSEXP_FSID) | 1503 | if (flag & NFSEXP_FSID) |
1311 | seq_printf(m, "%sfsid=%d", first++?",":"", fsid); | 1504 | seq_printf(m, ",fsid=%d", fsid); |
1312 | if (anonu != (uid_t)-2 && anonu != (0x10000-2)) | 1505 | if (anonu != (uid_t)-2 && anonu != (0x10000-2)) |
1313 | seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); | 1506 | seq_printf(m, ",sanonuid=%d", anonu); |
1314 | if (anong != (gid_t)-2 && anong != (0x10000-2)) | 1507 | if (anong != (gid_t)-2 && anong != (0x10000-2)) |
1315 | seq_printf(m, "%sanongid=%d", first++?",":"", anong); | 1508 | seq_printf(m, ",sanongid=%d", anong); |
1316 | if (fsloc && fsloc->locations_count > 0) { | 1509 | if (fsloc && fsloc->locations_count > 0) { |
1317 | char *loctype = (fsloc->migrated) ? "refer" : "replicas"; | 1510 | char *loctype = (fsloc->migrated) ? "refer" : "replicas"; |
1318 | int i; | 1511 | int i; |
1319 | 1512 | ||
1320 | seq_printf(m, "%s%s=", first++?",":"", loctype); | 1513 | seq_printf(m, ",%s=", loctype); |
1321 | seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); | 1514 | seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); |
1322 | seq_putc(m, '@'); | 1515 | seq_putc(m, '@'); |
1323 | seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); | 1516 | seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); |
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 221acd1f11f6..9e4a568a5013 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c | |||
@@ -65,6 +65,7 @@ nlm_fclose(struct file *filp) | |||
65 | static struct nlmsvc_binding nfsd_nlm_ops = { | 65 | static struct nlmsvc_binding nfsd_nlm_ops = { |
66 | .fopen = nlm_fopen, /* open file for locking */ | 66 | .fopen = nlm_fopen, /* open file for locking */ |
67 | .fclose = nlm_fclose, /* close file */ | 67 | .fclose = nlm_fclose, /* close file */ |
68 | .get_grace_period = get_nfs4_grace_period, | ||
68 | }; | 69 | }; |
69 | 70 | ||
70 | void | 71 | void |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index cc3b7badd486..b6ed38380ab8 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -183,8 +183,13 @@ static void | |||
183 | summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) | 183 | summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) |
184 | { | 184 | { |
185 | struct posix_acl_entry *pa, *pe; | 185 | struct posix_acl_entry *pa, *pe; |
186 | pas->users = 0; | 186 | |
187 | pas->groups = 0; | 187 | /* |
188 | * Only pas.users and pas.groups need initialization; previous | ||
189 | * posix_acl_valid() calls ensure that the other fields will be | ||
190 | * initialized in the following loop. But, just to placate gcc: | ||
191 | */ | ||
192 | memset(pas, 0, sizeof(*pas)); | ||
188 | pas->mask = 07; | 193 | pas->mask = 07; |
189 | 194 | ||
190 | pe = acl->a_entries + acl->a_count; | 195 | pe = acl->a_entries + acl->a_count; |
@@ -732,13 +737,16 @@ int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, | |||
732 | *pacl = posix_state_to_acl(&effective_acl_state, flags); | 737 | *pacl = posix_state_to_acl(&effective_acl_state, flags); |
733 | if (IS_ERR(*pacl)) { | 738 | if (IS_ERR(*pacl)) { |
734 | ret = PTR_ERR(*pacl); | 739 | ret = PTR_ERR(*pacl); |
740 | *pacl = NULL; | ||
735 | goto out_dstate; | 741 | goto out_dstate; |
736 | } | 742 | } |
737 | *dpacl = posix_state_to_acl(&default_acl_state, | 743 | *dpacl = posix_state_to_acl(&default_acl_state, |
738 | flags | NFS4_ACL_TYPE_DEFAULT); | 744 | flags | NFS4_ACL_TYPE_DEFAULT); |
739 | if (IS_ERR(*dpacl)) { | 745 | if (IS_ERR(*dpacl)) { |
740 | ret = PTR_ERR(*dpacl); | 746 | ret = PTR_ERR(*dpacl); |
747 | *dpacl = NULL; | ||
741 | posix_acl_release(*pacl); | 748 | posix_acl_release(*pacl); |
749 | *pacl = NULL; | ||
742 | goto out_dstate; | 750 | goto out_dstate; |
743 | } | 751 | } |
744 | sort_pacl(*pacl); | 752 | sort_pacl(*pacl); |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5443c52b57aa..31d6633c7fe4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -75,7 +75,7 @@ enum nfs_cb_opnum4 { | |||
75 | #define op_enc_sz 1 | 75 | #define op_enc_sz 1 |
76 | #define op_dec_sz 2 | 76 | #define op_dec_sz 2 |
77 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) | 77 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) |
78 | #define enc_stateid_sz 16 | 78 | #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) |
79 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ | 79 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ |
80 | 1 + enc_stateid_sz + \ | 80 | 1 + enc_stateid_sz + \ |
81 | enc_nfs4_fh_sz) | 81 | enc_nfs4_fh_sz) |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 45aa21ce6784..2cf9a9a2d89c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -587,6 +587,15 @@ idmap_lookup(struct svc_rqst *rqstp, | |||
587 | return ret; | 587 | return ret; |
588 | } | 588 | } |
589 | 589 | ||
590 | static char * | ||
591 | rqst_authname(struct svc_rqst *rqstp) | ||
592 | { | ||
593 | struct auth_domain *clp; | ||
594 | |||
595 | clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; | ||
596 | return clp->name; | ||
597 | } | ||
598 | |||
590 | static int | 599 | static int |
591 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, | 600 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, |
592 | uid_t *id) | 601 | uid_t *id) |
@@ -600,7 +609,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen | |||
600 | return -EINVAL; | 609 | return -EINVAL; |
601 | memcpy(key.name, name, namelen); | 610 | memcpy(key.name, name, namelen); |
602 | key.name[namelen] = '\0'; | 611 | key.name[namelen] = '\0'; |
603 | strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); | 612 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); |
604 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); | 613 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); |
605 | if (ret == -ENOENT) | 614 | if (ret == -ENOENT) |
606 | ret = -ESRCH; /* nfserr_badname */ | 615 | ret = -ESRCH; /* nfserr_badname */ |
@@ -620,7 +629,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) | |||
620 | }; | 629 | }; |
621 | int ret; | 630 | int ret; |
622 | 631 | ||
623 | strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); | 632 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); |
624 | ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); | 633 | ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); |
625 | if (ret == -ENOENT) | 634 | if (ret == -ENOENT) |
626 | return sprintf(name, "%u", id); | 635 | return sprintf(name, "%u", id); |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8522729830db..3c627128e205 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/nfsd/state.h> | 47 | #include <linux/nfsd/state.h> |
48 | #include <linux/nfsd/xdr4.h> | 48 | #include <linux/nfsd/xdr4.h> |
49 | #include <linux/nfs4_acl.h> | 49 | #include <linux/nfs4_acl.h> |
50 | #include <linux/sunrpc/gss_api.h> | ||
50 | 51 | ||
51 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 52 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
52 | 53 | ||
@@ -286,8 +287,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
286 | __be32 status; | 287 | __be32 status; |
287 | 288 | ||
288 | fh_put(&cstate->current_fh); | 289 | fh_put(&cstate->current_fh); |
289 | status = exp_pseudoroot(rqstp->rq_client, &cstate->current_fh, | 290 | status = exp_pseudoroot(rqstp, &cstate->current_fh); |
290 | &rqstp->rq_chandle); | ||
291 | return status; | 291 | return status; |
292 | } | 292 | } |
293 | 293 | ||
@@ -474,8 +474,8 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
474 | __be32 ret; | 474 | __be32 ret; |
475 | 475 | ||
476 | fh_init(&tmp_fh, NFS4_FHSIZE); | 476 | fh_init(&tmp_fh, NFS4_FHSIZE); |
477 | if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh, | 477 | ret = exp_pseudoroot(rqstp, &tmp_fh); |
478 | &rqstp->rq_chandle)) != 0) | 478 | if (ret) |
479 | return ret; | 479 | return ret; |
480 | if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { | 480 | if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { |
481 | fh_put(&tmp_fh); | 481 | fh_put(&tmp_fh); |
@@ -611,6 +611,30 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
611 | } | 611 | } |
612 | 612 | ||
613 | static __be32 | 613 | static __be32 |
614 | nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
615 | struct nfsd4_secinfo *secinfo) | ||
616 | { | ||
617 | struct svc_fh resfh; | ||
618 | struct svc_export *exp; | ||
619 | struct dentry *dentry; | ||
620 | __be32 err; | ||
621 | |||
622 | fh_init(&resfh, NFS4_FHSIZE); | ||
623 | err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, | ||
624 | secinfo->si_name, secinfo->si_namelen, | ||
625 | &exp, &dentry); | ||
626 | if (err) | ||
627 | return err; | ||
628 | if (dentry->d_inode == NULL) { | ||
629 | exp_put(exp); | ||
630 | err = nfserr_noent; | ||
631 | } else | ||
632 | secinfo->si_exp = exp; | ||
633 | dput(dentry); | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | static __be32 | ||
614 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 638 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
615 | struct nfsd4_setattr *setattr) | 639 | struct nfsd4_setattr *setattr) |
616 | { | 640 | { |
@@ -1009,6 +1033,9 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
1009 | [OP_SAVEFH] = { | 1033 | [OP_SAVEFH] = { |
1010 | .op_func = (nfsd4op_func)nfsd4_savefh, | 1034 | .op_func = (nfsd4op_func)nfsd4_savefh, |
1011 | }, | 1035 | }, |
1036 | [OP_SECINFO] = { | ||
1037 | .op_func = (nfsd4op_func)nfsd4_secinfo, | ||
1038 | }, | ||
1012 | [OP_SETATTR] = { | 1039 | [OP_SETATTR] = { |
1013 | .op_func = (nfsd4op_func)nfsd4_setattr, | 1040 | .op_func = (nfsd4op_func)nfsd4_setattr, |
1014 | }, | 1041 | }, |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8c52913d7cb6..e4a4c87ec8c6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -49,8 +49,10 @@ | |||
49 | #include <linux/nfsd/state.h> | 49 | #include <linux/nfsd/state.h> |
50 | #include <linux/nfsd/xdr4.h> | 50 | #include <linux/nfsd/xdr4.h> |
51 | #include <linux/namei.h> | 51 | #include <linux/namei.h> |
52 | #include <linux/swap.h> | ||
52 | #include <linux/mutex.h> | 53 | #include <linux/mutex.h> |
53 | #include <linux/lockd/bind.h> | 54 | #include <linux/lockd/bind.h> |
55 | #include <linux/module.h> | ||
54 | 56 | ||
55 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 57 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
56 | 58 | ||
@@ -149,6 +151,7 @@ get_nfs4_file(struct nfs4_file *fi) | |||
149 | } | 151 | } |
150 | 152 | ||
151 | static int num_delegations; | 153 | static int num_delegations; |
154 | unsigned int max_delegations; | ||
152 | 155 | ||
153 | /* | 156 | /* |
154 | * Open owner state (share locks) | 157 | * Open owner state (share locks) |
@@ -192,7 +195,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
192 | struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; | 195 | struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; |
193 | 196 | ||
194 | dprintk("NFSD alloc_init_deleg\n"); | 197 | dprintk("NFSD alloc_init_deleg\n"); |
195 | if (num_delegations > STATEID_HASH_SIZE * 4) | 198 | if (fp->fi_had_conflict) |
199 | return NULL; | ||
200 | if (num_delegations > max_delegations) | ||
196 | return NULL; | 201 | return NULL; |
197 | dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); | 202 | dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); |
198 | if (dp == NULL) | 203 | if (dp == NULL) |
@@ -999,6 +1004,7 @@ alloc_init_file(struct inode *ino) | |||
999 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | 1004 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
1000 | fp->fi_inode = igrab(ino); | 1005 | fp->fi_inode = igrab(ino); |
1001 | fp->fi_id = current_fileid++; | 1006 | fp->fi_id = current_fileid++; |
1007 | fp->fi_had_conflict = false; | ||
1002 | return fp; | 1008 | return fp; |
1003 | } | 1009 | } |
1004 | return NULL; | 1010 | return NULL; |
@@ -1325,6 +1331,7 @@ do_recall(void *__dp) | |||
1325 | { | 1331 | { |
1326 | struct nfs4_delegation *dp = __dp; | 1332 | struct nfs4_delegation *dp = __dp; |
1327 | 1333 | ||
1334 | dp->dl_file->fi_had_conflict = true; | ||
1328 | nfsd4_cb_recall(dp); | 1335 | nfsd4_cb_recall(dp); |
1329 | return 0; | 1336 | return 0; |
1330 | } | 1337 | } |
@@ -3190,20 +3197,49 @@ nfsd4_load_reboot_recovery_data(void) | |||
3190 | printk("NFSD: Failure reading reboot recovery data\n"); | 3197 | printk("NFSD: Failure reading reboot recovery data\n"); |
3191 | } | 3198 | } |
3192 | 3199 | ||
3200 | unsigned long | ||
3201 | get_nfs4_grace_period(void) | ||
3202 | { | ||
3203 | return max(user_lease_time, lease_time) * HZ; | ||
3204 | } | ||
3205 | |||
3206 | /* | ||
3207 | * Since the lifetime of a delegation isn't limited to that of an open, a | ||
3208 | * client may quite reasonably hang on to a delegation as long as it has | ||
3209 | * the inode cached. This becomes an obvious problem the first time a | ||
3210 | * client's inode cache approaches the size of the server's total memory. | ||
3211 | * | ||
3212 | * For now we avoid this problem by imposing a hard limit on the number | ||
3213 | * of delegations, which varies according to the server's memory size. | ||
3214 | */ | ||
3215 | static void | ||
3216 | set_max_delegations(void) | ||
3217 | { | ||
3218 | /* | ||
3219 | * Allow at most 4 delegations per megabyte of RAM. Quick | ||
3220 | * estimates suggest that in the worst case (where every delegation | ||
3221 | * is for a different inode), a delegation could take about 1.5K, | ||
3222 | * giving a worst case usage of about 6% of memory. | ||
3223 | */ | ||
3224 | max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); | ||
3225 | } | ||
3226 | |||
3193 | /* initialization to perform when the nfsd service is started: */ | 3227 | /* initialization to perform when the nfsd service is started: */ |
3194 | 3228 | ||
3195 | static void | 3229 | static void |
3196 | __nfs4_state_start(void) | 3230 | __nfs4_state_start(void) |
3197 | { | 3231 | { |
3198 | time_t grace_time; | 3232 | unsigned long grace_time; |
3199 | 3233 | ||
3200 | boot_time = get_seconds(); | 3234 | boot_time = get_seconds(); |
3201 | grace_time = max(user_lease_time, lease_time); | 3235 | grace_time = get_nfs_grace_period(); |
3202 | lease_time = user_lease_time; | 3236 | lease_time = user_lease_time; |
3203 | in_grace = 1; | 3237 | in_grace = 1; |
3204 | printk("NFSD: starting %ld-second grace period\n", grace_time); | 3238 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
3239 | grace_time/HZ); | ||
3205 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 3240 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
3206 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ); | 3241 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time); |
3242 | set_max_delegations(); | ||
3207 | } | 3243 | } |
3208 | 3244 | ||
3209 | int | 3245 | int |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15809dfd88a5..b3d55c6747fd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -56,6 +56,8 @@ | |||
56 | #include <linux/nfsd_idmap.h> | 56 | #include <linux/nfsd_idmap.h> |
57 | #include <linux/nfs4.h> | 57 | #include <linux/nfs4.h> |
58 | #include <linux/nfs4_acl.h> | 58 | #include <linux/nfs4_acl.h> |
59 | #include <linux/sunrpc/gss_api.h> | ||
60 | #include <linux/sunrpc/svcauth_gss.h> | ||
59 | 61 | ||
60 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 62 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
61 | 63 | ||
@@ -819,6 +821,23 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) | |||
819 | } | 821 | } |
820 | 822 | ||
821 | static __be32 | 823 | static __be32 |
824 | nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | ||
825 | struct nfsd4_secinfo *secinfo) | ||
826 | { | ||
827 | DECODE_HEAD; | ||
828 | |||
829 | READ_BUF(4); | ||
830 | READ32(secinfo->si_namelen); | ||
831 | READ_BUF(secinfo->si_namelen); | ||
832 | SAVEMEM(secinfo->si_name, secinfo->si_namelen); | ||
833 | status = check_filename(secinfo->si_name, secinfo->si_namelen, | ||
834 | nfserr_noent); | ||
835 | if (status) | ||
836 | return status; | ||
837 | DECODE_TAIL; | ||
838 | } | ||
839 | |||
840 | static __be32 | ||
822 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 841 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
823 | { | 842 | { |
824 | DECODE_HEAD; | 843 | DECODE_HEAD; |
@@ -1131,6 +1150,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1131 | case OP_SAVEFH: | 1150 | case OP_SAVEFH: |
1132 | op->status = nfs_ok; | 1151 | op->status = nfs_ok; |
1133 | break; | 1152 | break; |
1153 | case OP_SECINFO: | ||
1154 | op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo); | ||
1155 | break; | ||
1134 | case OP_SETATTR: | 1156 | case OP_SETATTR: |
1135 | op->status = nfsd4_decode_setattr(argp, &op->u.setattr); | 1157 | op->status = nfsd4_decode_setattr(argp, &op->u.setattr); |
1136 | break; | 1158 | break; |
@@ -1296,7 +1318,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * | |||
1296 | char *path, *rootpath; | 1318 | char *path, *rootpath; |
1297 | 1319 | ||
1298 | fh_init(&tmp_fh, NFS4_FHSIZE); | 1320 | fh_init(&tmp_fh, NFS4_FHSIZE); |
1299 | *stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle); | 1321 | *stat = exp_pseudoroot(rqstp, &tmp_fh); |
1300 | if (*stat) | 1322 | if (*stat) |
1301 | return NULL; | 1323 | return NULL; |
1302 | rootpath = tmp_fh.fh_export->ex_path; | 1324 | rootpath = tmp_fh.fh_export->ex_path; |
@@ -1847,11 +1869,19 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
1847 | if (d_mountpoint(dentry)) { | 1869 | if (d_mountpoint(dentry)) { |
1848 | int err; | 1870 | int err; |
1849 | 1871 | ||
1872 | /* | ||
1873 | * Why the heck aren't we just using nfsd_lookup?? | ||
1874 | * Different "."/".." handling? Something else? | ||
1875 | * At least, add a comment here to explain.... | ||
1876 | */ | ||
1850 | err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp); | 1877 | err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp); |
1851 | if (err) { | 1878 | if (err) { |
1852 | nfserr = nfserrno(err); | 1879 | nfserr = nfserrno(err); |
1853 | goto out_put; | 1880 | goto out_put; |
1854 | } | 1881 | } |
1882 | nfserr = check_nfsd_access(exp, cd->rd_rqstp); | ||
1883 | if (nfserr) | ||
1884 | goto out_put; | ||
1855 | 1885 | ||
1856 | } | 1886 | } |
1857 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, | 1887 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, |
@@ -2419,6 +2449,72 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ | |||
2419 | } | 2449 | } |
2420 | } | 2450 | } |
2421 | 2451 | ||
2452 | static void | ||
2453 | nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, int nfserr, | ||
2454 | struct nfsd4_secinfo *secinfo) | ||
2455 | { | ||
2456 | int i = 0; | ||
2457 | struct svc_export *exp = secinfo->si_exp; | ||
2458 | u32 nflavs; | ||
2459 | struct exp_flavor_info *flavs; | ||
2460 | struct exp_flavor_info def_flavs[2]; | ||
2461 | ENCODE_HEAD; | ||
2462 | |||
2463 | if (nfserr) | ||
2464 | goto out; | ||
2465 | if (exp->ex_nflavors) { | ||
2466 | flavs = exp->ex_flavors; | ||
2467 | nflavs = exp->ex_nflavors; | ||
2468 | } else { /* Handling of some defaults in absence of real secinfo: */ | ||
2469 | flavs = def_flavs; | ||
2470 | if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) { | ||
2471 | nflavs = 2; | ||
2472 | flavs[0].pseudoflavor = RPC_AUTH_UNIX; | ||
2473 | flavs[1].pseudoflavor = RPC_AUTH_NULL; | ||
2474 | } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) { | ||
2475 | nflavs = 1; | ||
2476 | flavs[0].pseudoflavor | ||
2477 | = svcauth_gss_flavor(exp->ex_client); | ||
2478 | } else { | ||
2479 | nflavs = 1; | ||
2480 | flavs[0].pseudoflavor | ||
2481 | = exp->ex_client->flavour->flavour; | ||
2482 | } | ||
2483 | } | ||
2484 | |||
2485 | RESERVE_SPACE(4); | ||
2486 | WRITE32(nflavs); | ||
2487 | ADJUST_ARGS(); | ||
2488 | for (i = 0; i < nflavs; i++) { | ||
2489 | u32 flav = flavs[i].pseudoflavor; | ||
2490 | struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); | ||
2491 | |||
2492 | if (gm) { | ||
2493 | RESERVE_SPACE(4); | ||
2494 | WRITE32(RPC_AUTH_GSS); | ||
2495 | ADJUST_ARGS(); | ||
2496 | RESERVE_SPACE(4 + gm->gm_oid.len); | ||
2497 | WRITE32(gm->gm_oid.len); | ||
2498 | WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); | ||
2499 | ADJUST_ARGS(); | ||
2500 | RESERVE_SPACE(4); | ||
2501 | WRITE32(0); /* qop */ | ||
2502 | ADJUST_ARGS(); | ||
2503 | RESERVE_SPACE(4); | ||
2504 | WRITE32(gss_pseudoflavor_to_service(gm, flav)); | ||
2505 | ADJUST_ARGS(); | ||
2506 | gss_mech_put(gm); | ||
2507 | } else { | ||
2508 | RESERVE_SPACE(4); | ||
2509 | WRITE32(flav); | ||
2510 | ADJUST_ARGS(); | ||
2511 | } | ||
2512 | } | ||
2513 | out: | ||
2514 | if (exp) | ||
2515 | exp_put(exp); | ||
2516 | } | ||
2517 | |||
2422 | /* | 2518 | /* |
2423 | * The SETATTR encode routine is special -- it always encodes a bitmap, | 2519 | * The SETATTR encode routine is special -- it always encodes a bitmap, |
2424 | * regardless of the error status. | 2520 | * regardless of the error status. |
@@ -2559,6 +2655,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | |||
2559 | break; | 2655 | break; |
2560 | case OP_SAVEFH: | 2656 | case OP_SAVEFH: |
2561 | break; | 2657 | break; |
2658 | case OP_SECINFO: | ||
2659 | nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo); | ||
2660 | break; | ||
2562 | case OP_SETATTR: | 2661 | case OP_SETATTR: |
2563 | nfsd4_encode_setattr(resp, op->status, &op->u.setattr); | 2662 | nfsd4_encode_setattr(resp, op->status, &op->u.setattr); |
2564 | break; | 2663 | break; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 71c686dc7257..baac89d917ca 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/nfsd/cache.h> | 35 | #include <linux/nfsd/cache.h> |
36 | #include <linux/nfsd/xdr.h> | 36 | #include <linux/nfsd/xdr.h> |
37 | #include <linux/nfsd/syscall.h> | 37 | #include <linux/nfsd/syscall.h> |
38 | #include <linux/nfsd/interface.h> | ||
39 | 38 | ||
40 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
41 | 40 | ||
@@ -245,7 +244,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) | |||
245 | } | 244 | } |
246 | exp_readunlock(); | 245 | exp_readunlock(); |
247 | if (err == 0) | 246 | if (err == 0) |
248 | err = res->fh_size + (int)&((struct knfsd_fh*)0)->fh_base; | 247 | err = res->fh_size + offsetof(struct knfsd_fh, fh_base); |
249 | out: | 248 | out: |
250 | return err; | 249 | return err; |
251 | } | 250 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6ca2d24fc216..0eb464a39aae 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -15,10 +15,12 @@ | |||
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/stat.h> | 16 | #include <linux/stat.h> |
17 | #include <linux/dcache.h> | 17 | #include <linux/dcache.h> |
18 | #include <linux/exportfs.h> | ||
18 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
19 | 20 | ||
20 | #include <linux/sunrpc/clnt.h> | 21 | #include <linux/sunrpc/clnt.h> |
21 | #include <linux/sunrpc/svc.h> | 22 | #include <linux/sunrpc/svc.h> |
23 | #include <linux/sunrpc/svcauth_gss.h> | ||
22 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
23 | 25 | ||
24 | #define NFSDDBG_FACILITY NFSDDBG_FH | 26 | #define NFSDDBG_FACILITY NFSDDBG_FH |
@@ -27,10 +29,6 @@ | |||
27 | static int nfsd_nr_verified; | 29 | static int nfsd_nr_verified; |
28 | static int nfsd_nr_put; | 30 | static int nfsd_nr_put; |
29 | 31 | ||
30 | extern struct export_operations export_op_default; | ||
31 | |||
32 | #define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun) | ||
33 | |||
34 | /* | 32 | /* |
35 | * our acceptability function. | 33 | * our acceptability function. |
36 | * if NOSUBTREECHECK, accept anything | 34 | * if NOSUBTREECHECK, accept anything |
@@ -123,8 +121,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
123 | int data_left = fh->fh_size/4; | 121 | int data_left = fh->fh_size/4; |
124 | 122 | ||
125 | error = nfserr_stale; | 123 | error = nfserr_stale; |
126 | if (rqstp->rq_client == NULL) | ||
127 | goto out; | ||
128 | if (rqstp->rq_vers > 2) | 124 | if (rqstp->rq_vers > 2) |
129 | error = nfserr_badhandle; | 125 | error = nfserr_badhandle; |
130 | if (rqstp->rq_vers == 4 && fh->fh_size == 0) | 126 | if (rqstp->rq_vers == 4 && fh->fh_size == 0) |
@@ -148,7 +144,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
148 | fh->fh_fsid[1] = fh->fh_fsid[2]; | 144 | fh->fh_fsid[1] = fh->fh_fsid[2]; |
149 | } | 145 | } |
150 | if ((data_left -= len)<0) goto out; | 146 | if ((data_left -= len)<0) goto out; |
151 | exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle); | 147 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap); |
152 | datap += len; | 148 | datap += len; |
153 | } else { | 149 | } else { |
154 | dev_t xdev; | 150 | dev_t xdev; |
@@ -159,19 +155,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
159 | xdev = old_decode_dev(fh->ofh_xdev); | 155 | xdev = old_decode_dev(fh->ofh_xdev); |
160 | xino = u32_to_ino_t(fh->ofh_xino); | 156 | xino = u32_to_ino_t(fh->ofh_xino); |
161 | mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); | 157 | mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); |
162 | exp = exp_find(rqstp->rq_client, FSID_DEV, tfh, | 158 | exp = rqst_exp_find(rqstp, FSID_DEV, tfh); |
163 | &rqstp->rq_chandle); | ||
164 | } | 159 | } |
165 | 160 | ||
166 | if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN | 161 | error = nfserr_stale; |
167 | || PTR_ERR(exp) == -ETIMEDOUT)) { | 162 | if (PTR_ERR(exp) == -ENOENT) |
168 | error = nfserrno(PTR_ERR(exp)); | ||
169 | goto out; | 163 | goto out; |
170 | } | ||
171 | 164 | ||
172 | error = nfserr_stale; | 165 | if (IS_ERR(exp)) { |
173 | if (!exp || IS_ERR(exp)) | 166 | error = nfserrno(PTR_ERR(exp)); |
174 | goto out; | 167 | goto out; |
168 | } | ||
175 | 169 | ||
176 | /* Check if the request originated from a secure port. */ | 170 | /* Check if the request originated from a secure port. */ |
177 | error = nfserr_perm; | 171 | error = nfserr_perm; |
@@ -211,11 +205,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
211 | if (fileid_type == 0) | 205 | if (fileid_type == 0) |
212 | dentry = dget(exp->ex_dentry); | 206 | dentry = dget(exp->ex_dentry); |
213 | else { | 207 | else { |
214 | struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; | 208 | dentry = exportfs_decode_fh(exp->ex_mnt, datap, |
215 | dentry = CALL(nop,decode_fh)(exp->ex_mnt->mnt_sb, | 209 | data_left, fileid_type, |
216 | datap, data_left, | 210 | nfsd_acceptable, exp); |
217 | fileid_type, | ||
218 | nfsd_acceptable, exp); | ||
219 | } | 211 | } |
220 | if (dentry == NULL) | 212 | if (dentry == NULL) |
221 | goto out; | 213 | goto out; |
@@ -257,8 +249,19 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
257 | if (error) | 249 | if (error) |
258 | goto out; | 250 | goto out; |
259 | 251 | ||
252 | if (!(access & MAY_LOCK)) { | ||
253 | /* | ||
254 | * pseudoflavor restrictions are not enforced on NLM, | ||
255 | * which clients virtually always use auth_sys for, | ||
256 | * even while using RPCSEC_GSS for NFS. | ||
257 | */ | ||
258 | error = check_nfsd_access(exp, rqstp); | ||
259 | if (error) | ||
260 | goto out; | ||
261 | } | ||
262 | |||
260 | /* Finally, check access permissions. */ | 263 | /* Finally, check access permissions. */ |
261 | error = nfsd_permission(exp, dentry, access); | 264 | error = nfsd_permission(rqstp, exp, dentry, access); |
262 | 265 | ||
263 | if (error) { | 266 | if (error) { |
264 | dprintk("fh_verify: %s/%s permission failure, " | 267 | dprintk("fh_verify: %s/%s permission failure, " |
@@ -286,15 +289,13 @@ out: | |||
286 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, | 289 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, |
287 | __u32 *datap, int *maxsize) | 290 | __u32 *datap, int *maxsize) |
288 | { | 291 | { |
289 | struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; | ||
290 | |||
291 | if (dentry == exp->ex_dentry) { | 292 | if (dentry == exp->ex_dentry) { |
292 | *maxsize = 0; | 293 | *maxsize = 0; |
293 | return 0; | 294 | return 0; |
294 | } | 295 | } |
295 | 296 | ||
296 | return CALL(nop,encode_fh)(dentry, datap, maxsize, | 297 | return exportfs_encode_fh(dentry, datap, maxsize, |
297 | !(exp->ex_flags&NFSEXP_NOSUBTREECHECK)); | 298 | !(exp->ex_flags & NFSEXP_NOSUBTREECHECK)); |
298 | } | 299 | } |
299 | 300 | ||
300 | /* | 301 | /* |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b2c7147aa921..977a71f64e19 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -278,7 +278,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
278 | * echo thing > device-special-file-or-pipe | 278 | * echo thing > device-special-file-or-pipe |
279 | * by doing a CREATE with type==0 | 279 | * by doing a CREATE with type==0 |
280 | */ | 280 | */ |
281 | nfserr = nfsd_permission(newfhp->fh_export, | 281 | nfserr = nfsd_permission(rqstp, |
282 | newfhp->fh_export, | ||
282 | newfhp->fh_dentry, | 283 | newfhp->fh_dentry, |
283 | MAY_WRITE|MAY_LOCAL_ACCESS); | 284 | MAY_WRITE|MAY_LOCAL_ACCESS); |
284 | if (nfserr && nfserr != nfserr_rofs) | 285 | if (nfserr && nfserr != nfserr_rofs) |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ff55950efb43..a8c89ae4c743 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/smp_lock.h> | 21 | #include <linux/smp_lock.h> |
22 | #include <linux/freezer.h> | ||
22 | #include <linux/fs_struct.h> | 23 | #include <linux/fs_struct.h> |
23 | 24 | ||
24 | #include <linux/sunrpc/types.h> | 25 | #include <linux/sunrpc/types.h> |
@@ -432,6 +433,7 @@ nfsd(struct svc_rqst *rqstp) | |||
432 | * dirty pages. | 433 | * dirty pages. |
433 | */ | 434 | */ |
434 | current->flags |= PF_LESS_THROTTLE; | 435 | current->flags |= PF_LESS_THROTTLE; |
436 | set_freezable(); | ||
435 | 437 | ||
436 | /* | 438 | /* |
437 | * The main request loop | 439 | * The main request loop |
@@ -492,6 +494,15 @@ out: | |||
492 | module_put_and_exit(0); | 494 | module_put_and_exit(0); |
493 | } | 495 | } |
494 | 496 | ||
497 | static __be32 map_new_errors(u32 vers, __be32 nfserr) | ||
498 | { | ||
499 | if (nfserr == nfserr_jukebox && vers == 2) | ||
500 | return nfserr_dropit; | ||
501 | if (nfserr == nfserr_wrongsec && vers < 4) | ||
502 | return nfserr_acces; | ||
503 | return nfserr; | ||
504 | } | ||
505 | |||
495 | int | 506 | int |
496 | nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | 507 | nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) |
497 | { | 508 | { |
@@ -534,6 +545,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
534 | 545 | ||
535 | /* Now call the procedure handler, and encode NFS status. */ | 546 | /* Now call the procedure handler, and encode NFS status. */ |
536 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 547 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
548 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | ||
537 | if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2) | 549 | if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2) |
538 | nfserr = nfserr_dropit; | 550 | nfserr = nfserr_dropit; |
539 | if (nfserr == nfserr_dropit) { | 551 | if (nfserr == nfserr_dropit) { |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 945b1cedde2b..e90f4a8a1d01 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -113,7 +113,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
113 | 113 | ||
114 | while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); | 114 | while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); |
115 | 115 | ||
116 | exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle); | 116 | exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); |
117 | if (IS_ERR(exp2)) { | 117 | if (IS_ERR(exp2)) { |
118 | err = PTR_ERR(exp2); | 118 | err = PTR_ERR(exp2); |
119 | dput(mounts); | 119 | dput(mounts); |
@@ -135,21 +135,10 @@ out: | |||
135 | return err; | 135 | return err; |
136 | } | 136 | } |
137 | 137 | ||
138 | /* | ||
139 | * Look up one component of a pathname. | ||
140 | * N.B. After this call _both_ fhp and resfh need an fh_put | ||
141 | * | ||
142 | * If the lookup would cross a mountpoint, and the mounted filesystem | ||
143 | * is exported to the client with NFSEXP_NOHIDE, then the lookup is | ||
144 | * accepted as it stands and the mounted directory is | ||
145 | * returned. Otherwise the covered directory is returned. | ||
146 | * NOTE: this mountpoint crossing is not supported properly by all | ||
147 | * clients and is explicitly disallowed for NFSv3 | ||
148 | * NeilBrown <neilb@cse.unsw.edu.au> | ||
149 | */ | ||
150 | __be32 | 138 | __be32 |
151 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | 139 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, |
152 | int len, struct svc_fh *resfh) | 140 | const char *name, int len, |
141 | struct svc_export **exp_ret, struct dentry **dentry_ret) | ||
153 | { | 142 | { |
154 | struct svc_export *exp; | 143 | struct svc_export *exp; |
155 | struct dentry *dparent; | 144 | struct dentry *dparent; |
@@ -168,8 +157,6 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
168 | exp = fhp->fh_export; | 157 | exp = fhp->fh_export; |
169 | exp_get(exp); | 158 | exp_get(exp); |
170 | 159 | ||
171 | err = nfserr_acces; | ||
172 | |||
173 | /* Lookup the name, but don't follow links */ | 160 | /* Lookup the name, but don't follow links */ |
174 | if (isdotent(name, len)) { | 161 | if (isdotent(name, len)) { |
175 | if (len==1) | 162 | if (len==1) |
@@ -190,17 +177,15 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
190 | dput(dentry); | 177 | dput(dentry); |
191 | dentry = dp; | 178 | dentry = dp; |
192 | 179 | ||
193 | exp2 = exp_parent(exp->ex_client, mnt, dentry, | 180 | exp2 = rqst_exp_parent(rqstp, mnt, dentry); |
194 | &rqstp->rq_chandle); | 181 | if (PTR_ERR(exp2) == -ENOENT) { |
195 | if (IS_ERR(exp2)) { | 182 | dput(dentry); |
183 | dentry = dget(dparent); | ||
184 | } else if (IS_ERR(exp2)) { | ||
196 | host_err = PTR_ERR(exp2); | 185 | host_err = PTR_ERR(exp2); |
197 | dput(dentry); | 186 | dput(dentry); |
198 | mntput(mnt); | 187 | mntput(mnt); |
199 | goto out_nfserr; | 188 | goto out_nfserr; |
200 | } | ||
201 | if (!exp2) { | ||
202 | dput(dentry); | ||
203 | dentry = dget(dparent); | ||
204 | } else { | 189 | } else { |
205 | exp_put(exp); | 190 | exp_put(exp); |
206 | exp = exp2; | 191 | exp = exp2; |
@@ -223,6 +208,41 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
223 | } | 208 | } |
224 | } | 209 | } |
225 | } | 210 | } |
211 | *dentry_ret = dentry; | ||
212 | *exp_ret = exp; | ||
213 | return 0; | ||
214 | |||
215 | out_nfserr: | ||
216 | exp_put(exp); | ||
217 | return nfserrno(host_err); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Look up one component of a pathname. | ||
222 | * N.B. After this call _both_ fhp and resfh need an fh_put | ||
223 | * | ||
224 | * If the lookup would cross a mountpoint, and the mounted filesystem | ||
225 | * is exported to the client with NFSEXP_NOHIDE, then the lookup is | ||
226 | * accepted as it stands and the mounted directory is | ||
227 | * returned. Otherwise the covered directory is returned. | ||
228 | * NOTE: this mountpoint crossing is not supported properly by all | ||
229 | * clients and is explicitly disallowed for NFSv3 | ||
230 | * NeilBrown <neilb@cse.unsw.edu.au> | ||
231 | */ | ||
232 | __be32 | ||
233 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | ||
234 | int len, struct svc_fh *resfh) | ||
235 | { | ||
236 | struct svc_export *exp; | ||
237 | struct dentry *dentry; | ||
238 | __be32 err; | ||
239 | |||
240 | err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); | ||
241 | if (err) | ||
242 | return err; | ||
243 | err = check_nfsd_access(exp, rqstp); | ||
244 | if (err) | ||
245 | goto out; | ||
226 | /* | 246 | /* |
227 | * Note: we compose the file handle now, but as the | 247 | * Note: we compose the file handle now, but as the |
228 | * dentry may be negative, it may need to be updated. | 248 | * dentry may be negative, it may need to be updated. |
@@ -230,16 +250,13 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
230 | err = fh_compose(resfh, exp, dentry, fhp); | 250 | err = fh_compose(resfh, exp, dentry, fhp); |
231 | if (!err && !dentry->d_inode) | 251 | if (!err && !dentry->d_inode) |
232 | err = nfserr_noent; | 252 | err = nfserr_noent; |
233 | dput(dentry); | ||
234 | out: | 253 | out: |
254 | dput(dentry); | ||
235 | exp_put(exp); | 255 | exp_put(exp); |
236 | return err; | 256 | return err; |
237 | |||
238 | out_nfserr: | ||
239 | err = nfserrno(host_err); | ||
240 | goto out; | ||
241 | } | 257 | } |
242 | 258 | ||
259 | |||
243 | /* | 260 | /* |
244 | * Set various file attributes. | 261 | * Set various file attributes. |
245 | * N.B. After this call fhp needs an fh_put | 262 | * N.B. After this call fhp needs an fh_put |
@@ -311,7 +328,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
311 | /* The size case is special. It changes the file as well as the attributes. */ | 328 | /* The size case is special. It changes the file as well as the attributes. */ |
312 | if (iap->ia_valid & ATTR_SIZE) { | 329 | if (iap->ia_valid & ATTR_SIZE) { |
313 | if (iap->ia_size < inode->i_size) { | 330 | if (iap->ia_size < inode->i_size) { |
314 | err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); | 331 | err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); |
315 | if (err) | 332 | if (err) |
316 | goto out; | 333 | goto out; |
317 | } | 334 | } |
@@ -435,7 +452,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
435 | /* Get inode */ | 452 | /* Get inode */ |
436 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); | 453 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); |
437 | if (error) | 454 | if (error) |
438 | goto out; | 455 | return error; |
439 | 456 | ||
440 | dentry = fhp->fh_dentry; | 457 | dentry = fhp->fh_dentry; |
441 | inode = dentry->d_inode; | 458 | inode = dentry->d_inode; |
@@ -444,33 +461,25 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
444 | 461 | ||
445 | host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); | 462 | host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); |
446 | if (host_error == -EINVAL) { | 463 | if (host_error == -EINVAL) { |
447 | error = nfserr_attrnotsupp; | 464 | return nfserr_attrnotsupp; |
448 | goto out; | ||
449 | } else if (host_error < 0) | 465 | } else if (host_error < 0) |
450 | goto out_nfserr; | 466 | goto out_nfserr; |
451 | 467 | ||
452 | host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); | 468 | host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); |
453 | if (host_error < 0) | 469 | if (host_error < 0) |
454 | goto out_nfserr; | 470 | goto out_release; |
455 | 471 | ||
456 | if (S_ISDIR(inode->i_mode)) { | 472 | if (S_ISDIR(inode->i_mode)) |
457 | host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); | 473 | host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); |
458 | if (host_error < 0) | ||
459 | goto out_nfserr; | ||
460 | } | ||
461 | |||
462 | error = nfs_ok; | ||
463 | 474 | ||
464 | out: | 475 | out_release: |
465 | posix_acl_release(pacl); | 476 | posix_acl_release(pacl); |
466 | posix_acl_release(dpacl); | 477 | posix_acl_release(dpacl); |
467 | return (error); | ||
468 | out_nfserr: | 478 | out_nfserr: |
469 | if (host_error == -EOPNOTSUPP) | 479 | if (host_error == -EOPNOTSUPP) |
470 | error = nfserr_attrnotsupp; | 480 | return nfserr_attrnotsupp; |
471 | else | 481 | else |
472 | error = nfserrno(host_error); | 482 | return nfserrno(host_error); |
473 | goto out; | ||
474 | } | 483 | } |
475 | 484 | ||
476 | static struct posix_acl * | 485 | static struct posix_acl * |
@@ -607,7 +616,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor | |||
607 | 616 | ||
608 | sresult |= map->access; | 617 | sresult |= map->access; |
609 | 618 | ||
610 | err2 = nfsd_permission(export, dentry, map->how); | 619 | err2 = nfsd_permission(rqstp, export, dentry, map->how); |
611 | switch (err2) { | 620 | switch (err2) { |
612 | case nfs_ok: | 621 | case nfs_ok: |
613 | result |= map->access; | 622 | result |= map->access; |
@@ -1034,7 +1043,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1034 | __be32 err; | 1043 | __be32 err; |
1035 | 1044 | ||
1036 | if (file) { | 1045 | if (file) { |
1037 | err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, | 1046 | err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, |
1038 | MAY_READ|MAY_OWNER_OVERRIDE); | 1047 | MAY_READ|MAY_OWNER_OVERRIDE); |
1039 | if (err) | 1048 | if (err) |
1040 | goto out; | 1049 | goto out; |
@@ -1063,7 +1072,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1063 | __be32 err = 0; | 1072 | __be32 err = 0; |
1064 | 1073 | ||
1065 | if (file) { | 1074 | if (file) { |
1066 | err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, | 1075 | err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, |
1067 | MAY_WRITE|MAY_OWNER_OVERRIDE); | 1076 | MAY_WRITE|MAY_OWNER_OVERRIDE); |
1068 | if (err) | 1077 | if (err) |
1069 | goto out; | 1078 | goto out; |
@@ -1792,7 +1801,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) | |||
1792 | * Check for a user's access permissions to this inode. | 1801 | * Check for a user's access permissions to this inode. |
1793 | */ | 1802 | */ |
1794 | __be32 | 1803 | __be32 |
1795 | nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | 1804 | nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, |
1805 | struct dentry *dentry, int acc) | ||
1796 | { | 1806 | { |
1797 | struct inode *inode = dentry->d_inode; | 1807 | struct inode *inode = dentry->d_inode; |
1798 | int err; | 1808 | int err; |
@@ -1823,7 +1833,7 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | |||
1823 | */ | 1833 | */ |
1824 | if (!(acc & MAY_LOCAL_ACCESS)) | 1834 | if (!(acc & MAY_LOCAL_ACCESS)) |
1825 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { | 1835 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { |
1826 | if (EX_RDONLY(exp) || IS_RDONLY(inode)) | 1836 | if (EX_RDONLY(exp, rqstp) || IS_RDONLY(inode)) |
1827 | return nfserr_rofs; | 1837 | return nfserr_rofs; |
1828 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) | 1838 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) |
1829 | return nfserr_perm; | 1839 | return nfserr_perm; |
diff --git a/fs/nls/Makefile b/fs/nls/Makefile index a7ade138d684..f499dd7c3905 100644 --- a/fs/nls/Makefile +++ b/fs/nls/Makefile | |||
@@ -36,11 +36,9 @@ obj-$(CONFIG_NLS_ISO8859_6) += nls_iso8859-6.o | |||
36 | obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o | 36 | obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o |
37 | obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o | 37 | obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o |
38 | obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o | 38 | obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o |
39 | obj-$(CONFIG_NLS_ISO8859_10) += nls_iso8859-10.o | ||
40 | obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o | 39 | obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o |
41 | obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o | 40 | obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o |
42 | obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o | 41 | obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o |
43 | obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o | 42 | obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o |
44 | obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o | 43 | obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o |
45 | obj-$(CONFIG_NLS_ABC) += nls_abc.o | ||
46 | obj-$(CONFIG_NLS_UTF8) += nls_utf8.o | 44 | obj-$(CONFIG_NLS_UTF8) += nls_utf8.o |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index bff01a54675a..e93c6142b23c 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/dcache.h> | 23 | #include <linux/dcache.h> |
24 | #include <linux/exportfs.h> | ||
24 | #include <linux/security.h> | 25 | #include <linux/security.h> |
25 | 26 | ||
26 | #include "attrib.h" | 27 | #include "attrib.h" |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 19712a7d145f..f5e11f4fa952 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -50,6 +50,8 @@ | |||
50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
51 | 51 | ||
52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
53 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
54 | struct ocfs2_extent_block *eb); | ||
53 | 55 | ||
54 | /* | 56 | /* |
55 | * Structures which describe a path through a btree, and functions to | 57 | * Structures which describe a path through a btree, and functions to |
@@ -117,6 +119,31 @@ static void ocfs2_free_path(struct ocfs2_path *path) | |||
117 | } | 119 | } |
118 | 120 | ||
119 | /* | 121 | /* |
122 | * All the elements of src into dest. After this call, src could be freed | ||
123 | * without affecting dest. | ||
124 | * | ||
125 | * Both paths should have the same root. Any non-root elements of dest | ||
126 | * will be freed. | ||
127 | */ | ||
128 | static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) | ||
129 | { | ||
130 | int i; | ||
131 | |||
132 | BUG_ON(path_root_bh(dest) != path_root_bh(src)); | ||
133 | BUG_ON(path_root_el(dest) != path_root_el(src)); | ||
134 | |||
135 | ocfs2_reinit_path(dest, 1); | ||
136 | |||
137 | for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { | ||
138 | dest->p_node[i].bh = src->p_node[i].bh; | ||
139 | dest->p_node[i].el = src->p_node[i].el; | ||
140 | |||
141 | if (dest->p_node[i].bh) | ||
142 | get_bh(dest->p_node[i].bh); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
120 | * Make the *dest path the same as src and re-initialize src path to | 147 | * Make the *dest path the same as src and re-initialize src path to |
121 | * have a root only. | 148 | * have a root only. |
122 | */ | 149 | */ |
@@ -212,10 +239,41 @@ out: | |||
212 | return ret; | 239 | return ret; |
213 | } | 240 | } |
214 | 241 | ||
242 | /* | ||
243 | * Return the index of the extent record which contains cluster #v_cluster. | ||
244 | * -1 is returned if it was not found. | ||
245 | * | ||
246 | * Should work fine on interior and exterior nodes. | ||
247 | */ | ||
248 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) | ||
249 | { | ||
250 | int ret = -1; | ||
251 | int i; | ||
252 | struct ocfs2_extent_rec *rec; | ||
253 | u32 rec_end, rec_start, clusters; | ||
254 | |||
255 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
256 | rec = &el->l_recs[i]; | ||
257 | |||
258 | rec_start = le32_to_cpu(rec->e_cpos); | ||
259 | clusters = ocfs2_rec_clusters(el, rec); | ||
260 | |||
261 | rec_end = rec_start + clusters; | ||
262 | |||
263 | if (v_cluster >= rec_start && v_cluster < rec_end) { | ||
264 | ret = i; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | return ret; | ||
270 | } | ||
271 | |||
215 | enum ocfs2_contig_type { | 272 | enum ocfs2_contig_type { |
216 | CONTIG_NONE = 0, | 273 | CONTIG_NONE = 0, |
217 | CONTIG_LEFT, | 274 | CONTIG_LEFT, |
218 | CONTIG_RIGHT | 275 | CONTIG_RIGHT, |
276 | CONTIG_LEFTRIGHT, | ||
219 | }; | 277 | }; |
220 | 278 | ||
221 | 279 | ||
@@ -253,6 +311,14 @@ static enum ocfs2_contig_type | |||
253 | { | 311 | { |
254 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); | 312 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); |
255 | 313 | ||
314 | /* | ||
315 | * Refuse to coalesce extent records with different flag | ||
316 | * fields - we don't want to mix unwritten extents with user | ||
317 | * data. | ||
318 | */ | ||
319 | if (ext->e_flags != insert_rec->e_flags) | ||
320 | return CONTIG_NONE; | ||
321 | |||
256 | if (ocfs2_extents_adjacent(ext, insert_rec) && | 322 | if (ocfs2_extents_adjacent(ext, insert_rec) && |
257 | ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) | 323 | ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) |
258 | return CONTIG_RIGHT; | 324 | return CONTIG_RIGHT; |
@@ -277,7 +343,14 @@ enum ocfs2_append_type { | |||
277 | APPEND_TAIL, | 343 | APPEND_TAIL, |
278 | }; | 344 | }; |
279 | 345 | ||
346 | enum ocfs2_split_type { | ||
347 | SPLIT_NONE = 0, | ||
348 | SPLIT_LEFT, | ||
349 | SPLIT_RIGHT, | ||
350 | }; | ||
351 | |||
280 | struct ocfs2_insert_type { | 352 | struct ocfs2_insert_type { |
353 | enum ocfs2_split_type ins_split; | ||
281 | enum ocfs2_append_type ins_appending; | 354 | enum ocfs2_append_type ins_appending; |
282 | enum ocfs2_contig_type ins_contig; | 355 | enum ocfs2_contig_type ins_contig; |
283 | int ins_contig_index; | 356 | int ins_contig_index; |
@@ -285,6 +358,13 @@ struct ocfs2_insert_type { | |||
285 | int ins_tree_depth; | 358 | int ins_tree_depth; |
286 | }; | 359 | }; |
287 | 360 | ||
361 | struct ocfs2_merge_ctxt { | ||
362 | enum ocfs2_contig_type c_contig_type; | ||
363 | int c_has_empty_extent; | ||
364 | int c_split_covers_rec; | ||
365 | int c_used_tail_recs; | ||
366 | }; | ||
367 | |||
288 | /* | 368 | /* |
289 | * How many free extents have we got before we need more meta data? | 369 | * How many free extents have we got before we need more meta data? |
290 | */ | 370 | */ |
@@ -384,13 +464,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
384 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); | 464 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); |
385 | eb->h_blkno = cpu_to_le64(first_blkno); | 465 | eb->h_blkno = cpu_to_le64(first_blkno); |
386 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); | 466 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); |
387 | |||
388 | #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS | ||
389 | /* we always use slot zero's suballocator */ | ||
390 | eb->h_suballoc_slot = 0; | ||
391 | #else | ||
392 | eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); | 467 | eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); |
393 | #endif | ||
394 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 468 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
395 | eb->h_list.l_count = | 469 | eb->h_list.l_count = |
396 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); | 470 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); |
@@ -461,7 +535,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
461 | struct inode *inode, | 535 | struct inode *inode, |
462 | struct buffer_head *fe_bh, | 536 | struct buffer_head *fe_bh, |
463 | struct buffer_head *eb_bh, | 537 | struct buffer_head *eb_bh, |
464 | struct buffer_head *last_eb_bh, | 538 | struct buffer_head **last_eb_bh, |
465 | struct ocfs2_alloc_context *meta_ac) | 539 | struct ocfs2_alloc_context *meta_ac) |
466 | { | 540 | { |
467 | int status, new_blocks, i; | 541 | int status, new_blocks, i; |
@@ -476,7 +550,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
476 | 550 | ||
477 | mlog_entry_void(); | 551 | mlog_entry_void(); |
478 | 552 | ||
479 | BUG_ON(!last_eb_bh); | 553 | BUG_ON(!last_eb_bh || !*last_eb_bh); |
480 | 554 | ||
481 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 555 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
482 | 556 | ||
@@ -507,7 +581,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
507 | goto bail; | 581 | goto bail; |
508 | } | 582 | } |
509 | 583 | ||
510 | eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; | 584 | eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; |
511 | new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); | 585 | new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); |
512 | 586 | ||
513 | /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be | 587 | /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be |
@@ -568,7 +642,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
568 | * journal_dirty erroring as it won't unless we've aborted the | 642 | * journal_dirty erroring as it won't unless we've aborted the |
569 | * handle (in which case we would never be here) so reserving | 643 | * handle (in which case we would never be here) so reserving |
570 | * the write with journal_access is all we need to do. */ | 644 | * the write with journal_access is all we need to do. */ |
571 | status = ocfs2_journal_access(handle, inode, last_eb_bh, | 645 | status = ocfs2_journal_access(handle, inode, *last_eb_bh, |
572 | OCFS2_JOURNAL_ACCESS_WRITE); | 646 | OCFS2_JOURNAL_ACCESS_WRITE); |
573 | if (status < 0) { | 647 | if (status < 0) { |
574 | mlog_errno(status); | 648 | mlog_errno(status); |
@@ -601,10 +675,10 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
601 | * next_leaf on the previously last-extent-block. */ | 675 | * next_leaf on the previously last-extent-block. */ |
602 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); | 676 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); |
603 | 677 | ||
604 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | 678 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; |
605 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); | 679 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); |
606 | 680 | ||
607 | status = ocfs2_journal_dirty(handle, last_eb_bh); | 681 | status = ocfs2_journal_dirty(handle, *last_eb_bh); |
608 | if (status < 0) | 682 | if (status < 0) |
609 | mlog_errno(status); | 683 | mlog_errno(status); |
610 | status = ocfs2_journal_dirty(handle, fe_bh); | 684 | status = ocfs2_journal_dirty(handle, fe_bh); |
@@ -616,6 +690,14 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
616 | mlog_errno(status); | 690 | mlog_errno(status); |
617 | } | 691 | } |
618 | 692 | ||
693 | /* | ||
694 | * Some callers want to track the rightmost leaf so pass it | ||
695 | * back here. | ||
696 | */ | ||
697 | brelse(*last_eb_bh); | ||
698 | get_bh(new_eb_bhs[0]); | ||
699 | *last_eb_bh = new_eb_bhs[0]; | ||
700 | |||
619 | status = 0; | 701 | status = 0; |
620 | bail: | 702 | bail: |
621 | if (new_eb_bhs) { | 703 | if (new_eb_bhs) { |
@@ -829,6 +911,87 @@ bail: | |||
829 | } | 911 | } |
830 | 912 | ||
831 | /* | 913 | /* |
914 | * Grow a b-tree so that it has more records. | ||
915 | * | ||
916 | * We might shift the tree depth in which case existing paths should | ||
917 | * be considered invalid. | ||
918 | * | ||
919 | * Tree depth after the grow is returned via *final_depth. | ||
920 | * | ||
921 | * *last_eb_bh will be updated by ocfs2_add_branch(). | ||
922 | */ | ||
923 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | ||
924 | struct buffer_head *di_bh, int *final_depth, | ||
925 | struct buffer_head **last_eb_bh, | ||
926 | struct ocfs2_alloc_context *meta_ac) | ||
927 | { | ||
928 | int ret, shift; | ||
929 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
930 | int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); | ||
931 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
932 | struct buffer_head *bh = NULL; | ||
933 | |||
934 | BUG_ON(meta_ac == NULL); | ||
935 | |||
936 | shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); | ||
937 | if (shift < 0) { | ||
938 | ret = shift; | ||
939 | mlog_errno(ret); | ||
940 | goto out; | ||
941 | } | ||
942 | |||
943 | /* We traveled all the way to the bottom of the allocation tree | ||
944 | * and didn't find room for any more extents - we need to add | ||
945 | * another tree level */ | ||
946 | if (shift) { | ||
947 | BUG_ON(bh); | ||
948 | mlog(0, "need to shift tree depth (current = %d)\n", depth); | ||
949 | |||
950 | /* ocfs2_shift_tree_depth will return us a buffer with | ||
951 | * the new extent block (so we can pass that to | ||
952 | * ocfs2_add_branch). */ | ||
953 | ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, | ||
954 | meta_ac, &bh); | ||
955 | if (ret < 0) { | ||
956 | mlog_errno(ret); | ||
957 | goto out; | ||
958 | } | ||
959 | depth++; | ||
960 | if (depth == 1) { | ||
961 | /* | ||
962 | * Special case: we have room now if we shifted from | ||
963 | * tree_depth 0, so no more work needs to be done. | ||
964 | * | ||
965 | * We won't be calling add_branch, so pass | ||
966 | * back *last_eb_bh as the new leaf. At depth | ||
967 | * zero, it should always be null so there's | ||
968 | * no reason to brelse. | ||
969 | */ | ||
970 | BUG_ON(*last_eb_bh); | ||
971 | get_bh(bh); | ||
972 | *last_eb_bh = bh; | ||
973 | goto out; | ||
974 | } | ||
975 | } | ||
976 | |||
977 | /* call ocfs2_add_branch to add the final part of the tree with | ||
978 | * the new data. */ | ||
979 | mlog(0, "add branch. bh = %p\n", bh); | ||
980 | ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, | ||
981 | meta_ac); | ||
982 | if (ret < 0) { | ||
983 | mlog_errno(ret); | ||
984 | goto out; | ||
985 | } | ||
986 | |||
987 | out: | ||
988 | if (final_depth) | ||
989 | *final_depth = depth; | ||
990 | brelse(bh); | ||
991 | return ret; | ||
992 | } | ||
993 | |||
994 | /* | ||
832 | * This is only valid for leaf nodes, which are the only ones that can | 995 | * This is only valid for leaf nodes, which are the only ones that can |
833 | * have empty extents anyway. | 996 | * have empty extents anyway. |
834 | */ | 997 | */ |
@@ -934,6 +1097,22 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, | |||
934 | 1097 | ||
935 | } | 1098 | } |
936 | 1099 | ||
1100 | static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el) | ||
1101 | { | ||
1102 | int size, num_recs = le16_to_cpu(el->l_next_free_rec); | ||
1103 | |||
1104 | BUG_ON(num_recs == 0); | ||
1105 | |||
1106 | if (ocfs2_is_empty_extent(&el->l_recs[0])) { | ||
1107 | num_recs--; | ||
1108 | size = num_recs * sizeof(struct ocfs2_extent_rec); | ||
1109 | memmove(&el->l_recs[0], &el->l_recs[1], size); | ||
1110 | memset(&el->l_recs[num_recs], 0, | ||
1111 | sizeof(struct ocfs2_extent_rec)); | ||
1112 | el->l_next_free_rec = cpu_to_le16(num_recs); | ||
1113 | } | ||
1114 | } | ||
1115 | |||
937 | /* | 1116 | /* |
938 | * Create an empty extent record . | 1117 | * Create an empty extent record . |
939 | * | 1118 | * |
@@ -1211,6 +1390,10 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, | |||
1211 | * immediately to their right. | 1390 | * immediately to their right. |
1212 | */ | 1391 | */ |
1213 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); | 1392 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); |
1393 | if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { | ||
1394 | BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); | ||
1395 | left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); | ||
1396 | } | ||
1214 | left_clusters -= le32_to_cpu(left_rec->e_cpos); | 1397 | left_clusters -= le32_to_cpu(left_rec->e_cpos); |
1215 | left_rec->e_int_clusters = cpu_to_le32(left_clusters); | 1398 | left_rec->e_int_clusters = cpu_to_le32(left_clusters); |
1216 | 1399 | ||
@@ -1531,10 +1714,16 @@ out: | |||
1531 | return ret; | 1714 | return ret; |
1532 | } | 1715 | } |
1533 | 1716 | ||
1717 | /* | ||
1718 | * Extend the transaction by enough credits to complete the rotation, | ||
1719 | * and still leave at least the original number of credits allocated | ||
1720 | * to this transaction. | ||
1721 | */ | ||
1534 | static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, | 1722 | static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, |
1723 | int op_credits, | ||
1535 | struct ocfs2_path *path) | 1724 | struct ocfs2_path *path) |
1536 | { | 1725 | { |
1537 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; | 1726 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; |
1538 | 1727 | ||
1539 | if (handle->h_buffer_credits < credits) | 1728 | if (handle->h_buffer_credits < credits) |
1540 | return ocfs2_extend_trans(handle, credits); | 1729 | return ocfs2_extend_trans(handle, credits); |
@@ -1568,6 +1757,29 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, | |||
1568 | return 0; | 1757 | return 0; |
1569 | } | 1758 | } |
1570 | 1759 | ||
1760 | static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) | ||
1761 | { | ||
1762 | int next_free = le16_to_cpu(el->l_next_free_rec); | ||
1763 | unsigned int range; | ||
1764 | struct ocfs2_extent_rec *rec; | ||
1765 | |||
1766 | if (next_free == 0) | ||
1767 | return 0; | ||
1768 | |||
1769 | rec = &el->l_recs[0]; | ||
1770 | if (ocfs2_is_empty_extent(rec)) { | ||
1771 | /* Empty list. */ | ||
1772 | if (next_free == 1) | ||
1773 | return 0; | ||
1774 | rec = &el->l_recs[1]; | ||
1775 | } | ||
1776 | |||
1777 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
1778 | if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) | ||
1779 | return 1; | ||
1780 | return 0; | ||
1781 | } | ||
1782 | |||
1571 | /* | 1783 | /* |
1572 | * Rotate all the records in a btree right one record, starting at insert_cpos. | 1784 | * Rotate all the records in a btree right one record, starting at insert_cpos. |
1573 | * | 1785 | * |
@@ -1586,11 +1798,12 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, | |||
1586 | */ | 1798 | */ |
1587 | static int ocfs2_rotate_tree_right(struct inode *inode, | 1799 | static int ocfs2_rotate_tree_right(struct inode *inode, |
1588 | handle_t *handle, | 1800 | handle_t *handle, |
1801 | enum ocfs2_split_type split, | ||
1589 | u32 insert_cpos, | 1802 | u32 insert_cpos, |
1590 | struct ocfs2_path *right_path, | 1803 | struct ocfs2_path *right_path, |
1591 | struct ocfs2_path **ret_left_path) | 1804 | struct ocfs2_path **ret_left_path) |
1592 | { | 1805 | { |
1593 | int ret, start; | 1806 | int ret, start, orig_credits = handle->h_buffer_credits; |
1594 | u32 cpos; | 1807 | u32 cpos; |
1595 | struct ocfs2_path *left_path = NULL; | 1808 | struct ocfs2_path *left_path = NULL; |
1596 | 1809 | ||
@@ -1657,9 +1870,9 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
1657 | (unsigned long long) | 1870 | (unsigned long long) |
1658 | path_leaf_bh(left_path)->b_blocknr); | 1871 | path_leaf_bh(left_path)->b_blocknr); |
1659 | 1872 | ||
1660 | if (ocfs2_rotate_requires_path_adjustment(left_path, | 1873 | if (split == SPLIT_NONE && |
1874 | ocfs2_rotate_requires_path_adjustment(left_path, | ||
1661 | insert_cpos)) { | 1875 | insert_cpos)) { |
1662 | mlog(0, "Path adjustment required\n"); | ||
1663 | 1876 | ||
1664 | /* | 1877 | /* |
1665 | * We've rotated the tree as much as we | 1878 | * We've rotated the tree as much as we |
@@ -1687,7 +1900,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
1687 | right_path->p_tree_depth); | 1900 | right_path->p_tree_depth); |
1688 | 1901 | ||
1689 | ret = ocfs2_extend_rotate_transaction(handle, start, | 1902 | ret = ocfs2_extend_rotate_transaction(handle, start, |
1690 | right_path); | 1903 | orig_credits, right_path); |
1691 | if (ret) { | 1904 | if (ret) { |
1692 | mlog_errno(ret); | 1905 | mlog_errno(ret); |
1693 | goto out; | 1906 | goto out; |
@@ -1700,6 +1913,24 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
1700 | goto out; | 1913 | goto out; |
1701 | } | 1914 | } |
1702 | 1915 | ||
1916 | if (split != SPLIT_NONE && | ||
1917 | ocfs2_leftmost_rec_contains(path_leaf_el(right_path), | ||
1918 | insert_cpos)) { | ||
1919 | /* | ||
1920 | * A rotate moves the rightmost left leaf | ||
1921 | * record over to the leftmost right leaf | ||
1922 | * slot. If we're doing an extent split | ||
1923 | * instead of a real insert, then we have to | ||
1924 | * check that the extent to be split wasn't | ||
1925 | * just moved over. If it was, then we can | ||
1926 | * exit here, passing left_path back - | ||
1927 | * ocfs2_split_extent() is smart enough to | ||
1928 | * search both leaves. | ||
1929 | */ | ||
1930 | *ret_left_path = left_path; | ||
1931 | goto out_ret_path; | ||
1932 | } | ||
1933 | |||
1703 | /* | 1934 | /* |
1704 | * There is no need to re-read the next right path | 1935 | * There is no need to re-read the next right path |
1705 | * as we know that it'll be our current left | 1936 | * as we know that it'll be our current left |
@@ -1722,6 +1953,1031 @@ out_ret_path: | |||
1722 | return ret; | 1953 | return ret; |
1723 | } | 1954 | } |
1724 | 1955 | ||
1956 | static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | ||
1957 | struct ocfs2_path *path) | ||
1958 | { | ||
1959 | int i, idx; | ||
1960 | struct ocfs2_extent_rec *rec; | ||
1961 | struct ocfs2_extent_list *el; | ||
1962 | struct ocfs2_extent_block *eb; | ||
1963 | u32 range; | ||
1964 | |||
1965 | /* Path should always be rightmost. */ | ||
1966 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
1967 | BUG_ON(eb->h_next_leaf_blk != 0ULL); | ||
1968 | |||
1969 | el = &eb->h_list; | ||
1970 | BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); | ||
1971 | idx = le16_to_cpu(el->l_next_free_rec) - 1; | ||
1972 | rec = &el->l_recs[idx]; | ||
1973 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
1974 | |||
1975 | for (i = 0; i < path->p_tree_depth; i++) { | ||
1976 | el = path->p_node[i].el; | ||
1977 | idx = le16_to_cpu(el->l_next_free_rec) - 1; | ||
1978 | rec = &el->l_recs[idx]; | ||
1979 | |||
1980 | rec->e_int_clusters = cpu_to_le32(range); | ||
1981 | le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); | ||
1982 | |||
1983 | ocfs2_journal_dirty(handle, path->p_node[i].bh); | ||
1984 | } | ||
1985 | } | ||
1986 | |||
1987 | static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | ||
1988 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
1989 | struct ocfs2_path *path, int unlink_start) | ||
1990 | { | ||
1991 | int ret, i; | ||
1992 | struct ocfs2_extent_block *eb; | ||
1993 | struct ocfs2_extent_list *el; | ||
1994 | struct buffer_head *bh; | ||
1995 | |||
1996 | for(i = unlink_start; i < path_num_items(path); i++) { | ||
1997 | bh = path->p_node[i].bh; | ||
1998 | |||
1999 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
2000 | /* | ||
2001 | * Not all nodes might have had their final count | ||
2002 | * decremented by the caller - handle this here. | ||
2003 | */ | ||
2004 | el = &eb->h_list; | ||
2005 | if (le16_to_cpu(el->l_next_free_rec) > 1) { | ||
2006 | mlog(ML_ERROR, | ||
2007 | "Inode %llu, attempted to remove extent block " | ||
2008 | "%llu with %u records\n", | ||
2009 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
2010 | (unsigned long long)le64_to_cpu(eb->h_blkno), | ||
2011 | le16_to_cpu(el->l_next_free_rec)); | ||
2012 | |||
2013 | ocfs2_journal_dirty(handle, bh); | ||
2014 | ocfs2_remove_from_cache(inode, bh); | ||
2015 | continue; | ||
2016 | } | ||
2017 | |||
2018 | el->l_next_free_rec = 0; | ||
2019 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
2020 | |||
2021 | ocfs2_journal_dirty(handle, bh); | ||
2022 | |||
2023 | ret = ocfs2_cache_extent_block_free(dealloc, eb); | ||
2024 | if (ret) | ||
2025 | mlog_errno(ret); | ||
2026 | |||
2027 | ocfs2_remove_from_cache(inode, bh); | ||
2028 | } | ||
2029 | } | ||
2030 | |||
2031 | static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, | ||
2032 | struct ocfs2_path *left_path, | ||
2033 | struct ocfs2_path *right_path, | ||
2034 | int subtree_index, | ||
2035 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2036 | { | ||
2037 | int i; | ||
2038 | struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; | ||
2039 | struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; | ||
2040 | struct ocfs2_extent_list *el; | ||
2041 | struct ocfs2_extent_block *eb; | ||
2042 | |||
2043 | el = path_leaf_el(left_path); | ||
2044 | |||
2045 | eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; | ||
2046 | |||
2047 | for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) | ||
2048 | if (root_el->l_recs[i].e_blkno == eb->h_blkno) | ||
2049 | break; | ||
2050 | |||
2051 | BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec)); | ||
2052 | |||
2053 | memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); | ||
2054 | le16_add_cpu(&root_el->l_next_free_rec, -1); | ||
2055 | |||
2056 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
2057 | eb->h_next_leaf_blk = 0; | ||
2058 | |||
2059 | ocfs2_journal_dirty(handle, root_bh); | ||
2060 | ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
2061 | |||
2062 | ocfs2_unlink_path(inode, handle, dealloc, right_path, | ||
2063 | subtree_index + 1); | ||
2064 | } | ||
2065 | |||
2066 | static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | ||
2067 | struct ocfs2_path *left_path, | ||
2068 | struct ocfs2_path *right_path, | ||
2069 | int subtree_index, | ||
2070 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
2071 | int *deleted) | ||
2072 | { | ||
2073 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | ||
2074 | struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); | ||
2075 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2076 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; | ||
2077 | struct ocfs2_extent_block *eb; | ||
2078 | |||
2079 | *deleted = 0; | ||
2080 | |||
2081 | right_leaf_el = path_leaf_el(right_path); | ||
2082 | left_leaf_el = path_leaf_el(left_path); | ||
2083 | root_bh = left_path->p_node[subtree_index].bh; | ||
2084 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
2085 | |||
2086 | if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0])) | ||
2087 | return 0; | ||
2088 | |||
2089 | eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data; | ||
2090 | if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) { | ||
2091 | /* | ||
2092 | * It's legal for us to proceed if the right leaf is | ||
2093 | * the rightmost one and it has an empty extent. There | ||
2094 | * are two cases to handle - whether the leaf will be | ||
2095 | * empty after removal or not. If the leaf isn't empty | ||
2096 | * then just remove the empty extent up front. The | ||
2097 | * next block will handle empty leaves by flagging | ||
2098 | * them for unlink. | ||
2099 | * | ||
2100 | * Non rightmost leaves will throw -EAGAIN and the | ||
2101 | * caller can manually move the subtree and retry. | ||
2102 | */ | ||
2103 | |||
2104 | if (eb->h_next_leaf_blk != 0ULL) | ||
2105 | return -EAGAIN; | ||
2106 | |||
2107 | if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { | ||
2108 | ret = ocfs2_journal_access(handle, inode, | ||
2109 | path_leaf_bh(right_path), | ||
2110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2111 | if (ret) { | ||
2112 | mlog_errno(ret); | ||
2113 | goto out; | ||
2114 | } | ||
2115 | |||
2116 | ocfs2_remove_empty_extent(right_leaf_el); | ||
2117 | } else | ||
2118 | right_has_empty = 1; | ||
2119 | } | ||
2120 | |||
2121 | if (eb->h_next_leaf_blk == 0ULL && | ||
2122 | le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) { | ||
2123 | /* | ||
2124 | * We have to update i_last_eb_blk during the meta | ||
2125 | * data delete. | ||
2126 | */ | ||
2127 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
2128 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2129 | if (ret) { | ||
2130 | mlog_errno(ret); | ||
2131 | goto out; | ||
2132 | } | ||
2133 | |||
2134 | del_right_subtree = 1; | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * Getting here with an empty extent in the right path implies | ||
2139 | * that it's the rightmost path and will be deleted. | ||
2140 | */ | ||
2141 | BUG_ON(right_has_empty && !del_right_subtree); | ||
2142 | |||
2143 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
2144 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2145 | if (ret) { | ||
2146 | mlog_errno(ret); | ||
2147 | goto out; | ||
2148 | } | ||
2149 | |||
2150 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { | ||
2151 | ret = ocfs2_journal_access(handle, inode, | ||
2152 | right_path->p_node[i].bh, | ||
2153 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2154 | if (ret) { | ||
2155 | mlog_errno(ret); | ||
2156 | goto out; | ||
2157 | } | ||
2158 | |||
2159 | ret = ocfs2_journal_access(handle, inode, | ||
2160 | left_path->p_node[i].bh, | ||
2161 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2162 | if (ret) { | ||
2163 | mlog_errno(ret); | ||
2164 | goto out; | ||
2165 | } | ||
2166 | } | ||
2167 | |||
2168 | if (!right_has_empty) { | ||
2169 | /* | ||
2170 | * Only do this if we're moving a real | ||
2171 | * record. Otherwise, the action is delayed until | ||
2172 | * after removal of the right path in which case we | ||
2173 | * can do a simple shift to remove the empty extent. | ||
2174 | */ | ||
2175 | ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]); | ||
2176 | memset(&right_leaf_el->l_recs[0], 0, | ||
2177 | sizeof(struct ocfs2_extent_rec)); | ||
2178 | } | ||
2179 | if (eb->h_next_leaf_blk == 0ULL) { | ||
2180 | /* | ||
2181 | * Move recs over to get rid of empty extent, decrease | ||
2182 | * next_free. This is allowed to remove the last | ||
2183 | * extent in our leaf (setting l_next_free_rec to | ||
2184 | * zero) - the delete code below won't care. | ||
2185 | */ | ||
2186 | ocfs2_remove_empty_extent(right_leaf_el); | ||
2187 | } | ||
2188 | |||
2189 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
2190 | if (ret) | ||
2191 | mlog_errno(ret); | ||
2192 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); | ||
2193 | if (ret) | ||
2194 | mlog_errno(ret); | ||
2195 | |||
2196 | if (del_right_subtree) { | ||
2197 | ocfs2_unlink_subtree(inode, handle, left_path, right_path, | ||
2198 | subtree_index, dealloc); | ||
2199 | ocfs2_update_edge_lengths(inode, handle, left_path); | ||
2200 | |||
2201 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
2202 | di->i_last_eb_blk = eb->h_blkno; | ||
2203 | |||
2204 | /* | ||
2205 | * Removal of the extent in the left leaf was skipped | ||
2206 | * above so we could delete the right path | ||
2207 | * 1st. | ||
2208 | */ | ||
2209 | if (right_has_empty) | ||
2210 | ocfs2_remove_empty_extent(left_leaf_el); | ||
2211 | |||
2212 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
2213 | if (ret) | ||
2214 | mlog_errno(ret); | ||
2215 | |||
2216 | *deleted = 1; | ||
2217 | } else | ||
2218 | ocfs2_complete_edge_insert(inode, handle, left_path, right_path, | ||
2219 | subtree_index); | ||
2220 | |||
2221 | out: | ||
2222 | return ret; | ||
2223 | } | ||
2224 | |||
2225 | /* | ||
2226 | * Given a full path, determine what cpos value would return us a path | ||
2227 | * containing the leaf immediately to the right of the current one. | ||
2228 | * | ||
2229 | * Will return zero if the path passed in is already the rightmost path. | ||
2230 | * | ||
2231 | * This looks similar, but is subtly different to | ||
2232 | * ocfs2_find_cpos_for_left_leaf(). | ||
2233 | */ | ||
2234 | static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | ||
2235 | struct ocfs2_path *path, u32 *cpos) | ||
2236 | { | ||
2237 | int i, j, ret = 0; | ||
2238 | u64 blkno; | ||
2239 | struct ocfs2_extent_list *el; | ||
2240 | |||
2241 | *cpos = 0; | ||
2242 | |||
2243 | if (path->p_tree_depth == 0) | ||
2244 | return 0; | ||
2245 | |||
2246 | blkno = path_leaf_bh(path)->b_blocknr; | ||
2247 | |||
2248 | /* Start at the tree node just above the leaf and work our way up. */ | ||
2249 | i = path->p_tree_depth - 1; | ||
2250 | while (i >= 0) { | ||
2251 | int next_free; | ||
2252 | |||
2253 | el = path->p_node[i].el; | ||
2254 | |||
2255 | /* | ||
2256 | * Find the extent record just after the one in our | ||
2257 | * path. | ||
2258 | */ | ||
2259 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
2260 | for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { | ||
2261 | if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { | ||
2262 | if (j == (next_free - 1)) { | ||
2263 | if (i == 0) { | ||
2264 | /* | ||
2265 | * We've determined that the | ||
2266 | * path specified is already | ||
2267 | * the rightmost one - return a | ||
2268 | * cpos of zero. | ||
2269 | */ | ||
2270 | goto out; | ||
2271 | } | ||
2272 | /* | ||
2273 | * The rightmost record points to our | ||
2274 | * leaf - we need to travel up the | ||
2275 | * tree one level. | ||
2276 | */ | ||
2277 | goto next_node; | ||
2278 | } | ||
2279 | |||
2280 | *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos); | ||
2281 | goto out; | ||
2282 | } | ||
2283 | } | ||
2284 | |||
2285 | /* | ||
2286 | * If we got here, we never found a valid node where | ||
2287 | * the tree indicated one should be. | ||
2288 | */ | ||
2289 | ocfs2_error(sb, | ||
2290 | "Invalid extent tree at extent block %llu\n", | ||
2291 | (unsigned long long)blkno); | ||
2292 | ret = -EROFS; | ||
2293 | goto out; | ||
2294 | |||
2295 | next_node: | ||
2296 | blkno = path->p_node[i].bh->b_blocknr; | ||
2297 | i--; | ||
2298 | } | ||
2299 | |||
2300 | out: | ||
2301 | return ret; | ||
2302 | } | ||
2303 | |||
2304 | static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, | ||
2305 | handle_t *handle, | ||
2306 | struct buffer_head *bh, | ||
2307 | struct ocfs2_extent_list *el) | ||
2308 | { | ||
2309 | int ret; | ||
2310 | |||
2311 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) | ||
2312 | return 0; | ||
2313 | |||
2314 | ret = ocfs2_journal_access(handle, inode, bh, | ||
2315 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2316 | if (ret) { | ||
2317 | mlog_errno(ret); | ||
2318 | goto out; | ||
2319 | } | ||
2320 | |||
2321 | ocfs2_remove_empty_extent(el); | ||
2322 | |||
2323 | ret = ocfs2_journal_dirty(handle, bh); | ||
2324 | if (ret) | ||
2325 | mlog_errno(ret); | ||
2326 | |||
2327 | out: | ||
2328 | return ret; | ||
2329 | } | ||
2330 | |||
2331 | static int __ocfs2_rotate_tree_left(struct inode *inode, | ||
2332 | handle_t *handle, int orig_credits, | ||
2333 | struct ocfs2_path *path, | ||
2334 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
2335 | struct ocfs2_path **empty_extent_path) | ||
2336 | { | ||
2337 | int ret, subtree_root, deleted; | ||
2338 | u32 right_cpos; | ||
2339 | struct ocfs2_path *left_path = NULL; | ||
2340 | struct ocfs2_path *right_path = NULL; | ||
2341 | |||
2342 | BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); | ||
2343 | |||
2344 | *empty_extent_path = NULL; | ||
2345 | |||
2346 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, | ||
2347 | &right_cpos); | ||
2348 | if (ret) { | ||
2349 | mlog_errno(ret); | ||
2350 | goto out; | ||
2351 | } | ||
2352 | |||
2353 | left_path = ocfs2_new_path(path_root_bh(path), | ||
2354 | path_root_el(path)); | ||
2355 | if (!left_path) { | ||
2356 | ret = -ENOMEM; | ||
2357 | mlog_errno(ret); | ||
2358 | goto out; | ||
2359 | } | ||
2360 | |||
2361 | ocfs2_cp_path(left_path, path); | ||
2362 | |||
2363 | right_path = ocfs2_new_path(path_root_bh(path), | ||
2364 | path_root_el(path)); | ||
2365 | if (!right_path) { | ||
2366 | ret = -ENOMEM; | ||
2367 | mlog_errno(ret); | ||
2368 | goto out; | ||
2369 | } | ||
2370 | |||
2371 | while (right_cpos) { | ||
2372 | ret = ocfs2_find_path(inode, right_path, right_cpos); | ||
2373 | if (ret) { | ||
2374 | mlog_errno(ret); | ||
2375 | goto out; | ||
2376 | } | ||
2377 | |||
2378 | subtree_root = ocfs2_find_subtree_root(inode, left_path, | ||
2379 | right_path); | ||
2380 | |||
2381 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", | ||
2382 | subtree_root, | ||
2383 | (unsigned long long) | ||
2384 | right_path->p_node[subtree_root].bh->b_blocknr, | ||
2385 | right_path->p_tree_depth); | ||
2386 | |||
2387 | ret = ocfs2_extend_rotate_transaction(handle, subtree_root, | ||
2388 | orig_credits, left_path); | ||
2389 | if (ret) { | ||
2390 | mlog_errno(ret); | ||
2391 | goto out; | ||
2392 | } | ||
2393 | |||
2394 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | ||
2395 | right_path, subtree_root, | ||
2396 | dealloc, &deleted); | ||
2397 | if (ret == -EAGAIN) { | ||
2398 | /* | ||
2399 | * The rotation has to temporarily stop due to | ||
2400 | * the right subtree having an empty | ||
2401 | * extent. Pass it back to the caller for a | ||
2402 | * fixup. | ||
2403 | */ | ||
2404 | *empty_extent_path = right_path; | ||
2405 | right_path = NULL; | ||
2406 | goto out; | ||
2407 | } | ||
2408 | if (ret) { | ||
2409 | mlog_errno(ret); | ||
2410 | goto out; | ||
2411 | } | ||
2412 | |||
2413 | /* | ||
2414 | * The subtree rotate might have removed records on | ||
2415 | * the rightmost edge. If so, then rotation is | ||
2416 | * complete. | ||
2417 | */ | ||
2418 | if (deleted) | ||
2419 | break; | ||
2420 | |||
2421 | ocfs2_mv_path(left_path, right_path); | ||
2422 | |||
2423 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | ||
2424 | &right_cpos); | ||
2425 | if (ret) { | ||
2426 | mlog_errno(ret); | ||
2427 | goto out; | ||
2428 | } | ||
2429 | } | ||
2430 | |||
2431 | out: | ||
2432 | ocfs2_free_path(right_path); | ||
2433 | ocfs2_free_path(left_path); | ||
2434 | |||
2435 | return ret; | ||
2436 | } | ||
2437 | |||
2438 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | ||
2439 | struct ocfs2_path *path, | ||
2440 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2441 | { | ||
2442 | int ret, subtree_index; | ||
2443 | u32 cpos; | ||
2444 | struct ocfs2_path *left_path = NULL; | ||
2445 | struct ocfs2_dinode *di; | ||
2446 | struct ocfs2_extent_block *eb; | ||
2447 | struct ocfs2_extent_list *el; | ||
2448 | |||
2449 | /* | ||
2450 | * XXX: This code assumes that the root is an inode, which is | ||
2451 | * true for now but may change as tree code gets generic. | ||
2452 | */ | ||
2453 | di = (struct ocfs2_dinode *)path_root_bh(path)->b_data; | ||
2454 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
2455 | ret = -EIO; | ||
2456 | ocfs2_error(inode->i_sb, | ||
2457 | "Inode %llu has invalid path root", | ||
2458 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
2459 | goto out; | ||
2460 | } | ||
2461 | |||
2462 | /* | ||
2463 | * There's two ways we handle this depending on | ||
2464 | * whether path is the only existing one. | ||
2465 | */ | ||
2466 | ret = ocfs2_extend_rotate_transaction(handle, 0, | ||
2467 | handle->h_buffer_credits, | ||
2468 | path); | ||
2469 | if (ret) { | ||
2470 | mlog_errno(ret); | ||
2471 | goto out; | ||
2472 | } | ||
2473 | |||
2474 | ret = ocfs2_journal_access_path(inode, handle, path); | ||
2475 | if (ret) { | ||
2476 | mlog_errno(ret); | ||
2477 | goto out; | ||
2478 | } | ||
2479 | |||
2480 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); | ||
2481 | if (ret) { | ||
2482 | mlog_errno(ret); | ||
2483 | goto out; | ||
2484 | } | ||
2485 | |||
2486 | if (cpos) { | ||
2487 | /* | ||
2488 | * We have a path to the left of this one - it needs | ||
2489 | * an update too. | ||
2490 | */ | ||
2491 | left_path = ocfs2_new_path(path_root_bh(path), | ||
2492 | path_root_el(path)); | ||
2493 | if (!left_path) { | ||
2494 | ret = -ENOMEM; | ||
2495 | mlog_errno(ret); | ||
2496 | goto out; | ||
2497 | } | ||
2498 | |||
2499 | ret = ocfs2_find_path(inode, left_path, cpos); | ||
2500 | if (ret) { | ||
2501 | mlog_errno(ret); | ||
2502 | goto out; | ||
2503 | } | ||
2504 | |||
2505 | ret = ocfs2_journal_access_path(inode, handle, left_path); | ||
2506 | if (ret) { | ||
2507 | mlog_errno(ret); | ||
2508 | goto out; | ||
2509 | } | ||
2510 | |||
2511 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | ||
2512 | |||
2513 | ocfs2_unlink_subtree(inode, handle, left_path, path, | ||
2514 | subtree_index, dealloc); | ||
2515 | ocfs2_update_edge_lengths(inode, handle, left_path); | ||
2516 | |||
2517 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
2518 | di->i_last_eb_blk = eb->h_blkno; | ||
2519 | } else { | ||
2520 | /* | ||
2521 | * 'path' is also the leftmost path which | ||
2522 | * means it must be the only one. This gets | ||
2523 | * handled differently because we want to | ||
2524 | * revert the inode back to having extents | ||
2525 | * in-line. | ||
2526 | */ | ||
2527 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | ||
2528 | |||
2529 | el = &di->id2.i_list; | ||
2530 | el->l_tree_depth = 0; | ||
2531 | el->l_next_free_rec = 0; | ||
2532 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
2533 | |||
2534 | di->i_last_eb_blk = 0; | ||
2535 | } | ||
2536 | |||
2537 | ocfs2_journal_dirty(handle, path_root_bh(path)); | ||
2538 | |||
2539 | out: | ||
2540 | ocfs2_free_path(left_path); | ||
2541 | return ret; | ||
2542 | } | ||
2543 | |||
2544 | /* | ||
2545 | * Left rotation of btree records. | ||
2546 | * | ||
2547 | * In many ways, this is (unsurprisingly) the opposite of right | ||
2548 | * rotation. We start at some non-rightmost path containing an empty | ||
2549 | * extent in the leaf block. The code works its way to the rightmost | ||
2550 | * path by rotating records to the left in every subtree. | ||
2551 | * | ||
2552 | * This is used by any code which reduces the number of extent records | ||
2553 | * in a leaf. After removal, an empty record should be placed in the | ||
2554 | * leftmost list position. | ||
2555 | * | ||
2556 | * This won't handle a length update of the rightmost path records if | ||
2557 | * the rightmost tree leaf record is removed so the caller is | ||
2558 | * responsible for detecting and correcting that. | ||
2559 | */ | ||
2560 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | ||
2561 | struct ocfs2_path *path, | ||
2562 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2563 | { | ||
2564 | int ret, orig_credits = handle->h_buffer_credits; | ||
2565 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | ||
2566 | struct ocfs2_extent_block *eb; | ||
2567 | struct ocfs2_extent_list *el; | ||
2568 | |||
2569 | el = path_leaf_el(path); | ||
2570 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) | ||
2571 | return 0; | ||
2572 | |||
2573 | if (path->p_tree_depth == 0) { | ||
2574 | rightmost_no_delete: | ||
2575 | /* | ||
2576 | * In-inode extents. This is trivially handled, so do | ||
2577 | * it up front. | ||
2578 | */ | ||
2579 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | ||
2580 | path_leaf_bh(path), | ||
2581 | path_leaf_el(path)); | ||
2582 | if (ret) | ||
2583 | mlog_errno(ret); | ||
2584 | goto out; | ||
2585 | } | ||
2586 | |||
2587 | /* | ||
2588 | * Handle rightmost branch now. There's several cases: | ||
2589 | * 1) simple rotation leaving records in there. That's trivial. | ||
2590 | * 2) rotation requiring a branch delete - there's no more | ||
2591 | * records left. Two cases of this: | ||
2592 | * a) There are branches to the left. | ||
2593 | * b) This is also the leftmost (the only) branch. | ||
2594 | * | ||
2595 | * 1) is handled via ocfs2_rotate_rightmost_leaf_left() | ||
2596 | * 2a) we need the left branch so that we can update it with the unlink | ||
2597 | * 2b) we need to bring the inode back to inline extents. | ||
2598 | */ | ||
2599 | |||
2600 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
2601 | el = &eb->h_list; | ||
2602 | if (eb->h_next_leaf_blk == 0) { | ||
2603 | /* | ||
2604 | * This gets a bit tricky if we're going to delete the | ||
2605 | * rightmost path. Get the other cases out of the way | ||
2606 | * 1st. | ||
2607 | */ | ||
2608 | if (le16_to_cpu(el->l_next_free_rec) > 1) | ||
2609 | goto rightmost_no_delete; | ||
2610 | |||
2611 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | ||
2612 | ret = -EIO; | ||
2613 | ocfs2_error(inode->i_sb, | ||
2614 | "Inode %llu has empty extent block at %llu", | ||
2615 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
2616 | (unsigned long long)le64_to_cpu(eb->h_blkno)); | ||
2617 | goto out; | ||
2618 | } | ||
2619 | |||
2620 | /* | ||
2621 | * XXX: The caller can not trust "path" any more after | ||
2622 | * this as it will have been deleted. What do we do? | ||
2623 | * | ||
2624 | * In theory the rotate-for-merge code will never get | ||
2625 | * here because it'll always ask for a rotate in a | ||
2626 | * nonempty list. | ||
2627 | */ | ||
2628 | |||
2629 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | ||
2630 | dealloc); | ||
2631 | if (ret) | ||
2632 | mlog_errno(ret); | ||
2633 | goto out; | ||
2634 | } | ||
2635 | |||
2636 | /* | ||
2637 | * Now we can loop, remembering the path we get from -EAGAIN | ||
2638 | * and restarting from there. | ||
2639 | */ | ||
2640 | try_rotate: | ||
2641 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | ||
2642 | dealloc, &restart_path); | ||
2643 | if (ret && ret != -EAGAIN) { | ||
2644 | mlog_errno(ret); | ||
2645 | goto out; | ||
2646 | } | ||
2647 | |||
2648 | while (ret == -EAGAIN) { | ||
2649 | tmp_path = restart_path; | ||
2650 | restart_path = NULL; | ||
2651 | |||
2652 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | ||
2653 | tmp_path, dealloc, | ||
2654 | &restart_path); | ||
2655 | if (ret && ret != -EAGAIN) { | ||
2656 | mlog_errno(ret); | ||
2657 | goto out; | ||
2658 | } | ||
2659 | |||
2660 | ocfs2_free_path(tmp_path); | ||
2661 | tmp_path = NULL; | ||
2662 | |||
2663 | if (ret == 0) | ||
2664 | goto try_rotate; | ||
2665 | } | ||
2666 | |||
2667 | out: | ||
2668 | ocfs2_free_path(tmp_path); | ||
2669 | ocfs2_free_path(restart_path); | ||
2670 | return ret; | ||
2671 | } | ||
2672 | |||
2673 | static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | ||
2674 | int index) | ||
2675 | { | ||
2676 | struct ocfs2_extent_rec *rec = &el->l_recs[index]; | ||
2677 | unsigned int size; | ||
2678 | |||
2679 | if (rec->e_leaf_clusters == 0) { | ||
2680 | /* | ||
2681 | * We consumed all of the merged-from record. An empty | ||
2682 | * extent cannot exist anywhere but the 1st array | ||
2683 | * position, so move things over if the merged-from | ||
2684 | * record doesn't occupy that position. | ||
2685 | * | ||
2686 | * This creates a new empty extent so the caller | ||
2687 | * should be smart enough to have removed any existing | ||
2688 | * ones. | ||
2689 | */ | ||
2690 | if (index > 0) { | ||
2691 | BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); | ||
2692 | size = index * sizeof(struct ocfs2_extent_rec); | ||
2693 | memmove(&el->l_recs[1], &el->l_recs[0], size); | ||
2694 | } | ||
2695 | |||
2696 | /* | ||
2697 | * Always memset - the caller doesn't check whether it | ||
2698 | * created an empty extent, so there could be junk in | ||
2699 | * the other fields. | ||
2700 | */ | ||
2701 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
2702 | } | ||
2703 | } | ||
2704 | |||
2705 | /* | ||
2706 | * Remove split_rec clusters from the record at index and merge them | ||
2707 | * onto the beginning of the record at index + 1. | ||
2708 | */ | ||
2709 | static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | ||
2710 | handle_t *handle, | ||
2711 | struct ocfs2_extent_rec *split_rec, | ||
2712 | struct ocfs2_extent_list *el, int index) | ||
2713 | { | ||
2714 | int ret; | ||
2715 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | ||
2716 | struct ocfs2_extent_rec *left_rec; | ||
2717 | struct ocfs2_extent_rec *right_rec; | ||
2718 | |||
2719 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); | ||
2720 | |||
2721 | left_rec = &el->l_recs[index]; | ||
2722 | right_rec = &el->l_recs[index + 1]; | ||
2723 | |||
2724 | ret = ocfs2_journal_access(handle, inode, bh, | ||
2725 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2726 | if (ret) { | ||
2727 | mlog_errno(ret); | ||
2728 | goto out; | ||
2729 | } | ||
2730 | |||
2731 | le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters); | ||
2732 | |||
2733 | le32_add_cpu(&right_rec->e_cpos, -split_clusters); | ||
2734 | le64_add_cpu(&right_rec->e_blkno, | ||
2735 | -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | ||
2736 | le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); | ||
2737 | |||
2738 | ocfs2_cleanup_merge(el, index); | ||
2739 | |||
2740 | ret = ocfs2_journal_dirty(handle, bh); | ||
2741 | if (ret) | ||
2742 | mlog_errno(ret); | ||
2743 | |||
2744 | out: | ||
2745 | return ret; | ||
2746 | } | ||
2747 | |||
2748 | /* | ||
2749 | * Remove split_rec clusters from the record at index and merge them | ||
2750 | * onto the tail of the record at index - 1. | ||
2751 | */ | ||
2752 | static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | ||
2753 | handle_t *handle, | ||
2754 | struct ocfs2_extent_rec *split_rec, | ||
2755 | struct ocfs2_extent_list *el, int index) | ||
2756 | { | ||
2757 | int ret, has_empty_extent = 0; | ||
2758 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | ||
2759 | struct ocfs2_extent_rec *left_rec; | ||
2760 | struct ocfs2_extent_rec *right_rec; | ||
2761 | |||
2762 | BUG_ON(index <= 0); | ||
2763 | |||
2764 | left_rec = &el->l_recs[index - 1]; | ||
2765 | right_rec = &el->l_recs[index]; | ||
2766 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
2767 | has_empty_extent = 1; | ||
2768 | |||
2769 | ret = ocfs2_journal_access(handle, inode, bh, | ||
2770 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2771 | if (ret) { | ||
2772 | mlog_errno(ret); | ||
2773 | goto out; | ||
2774 | } | ||
2775 | |||
2776 | if (has_empty_extent && index == 1) { | ||
2777 | /* | ||
2778 | * The easy case - we can just plop the record right in. | ||
2779 | */ | ||
2780 | *left_rec = *split_rec; | ||
2781 | |||
2782 | has_empty_extent = 0; | ||
2783 | } else { | ||
2784 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | ||
2785 | } | ||
2786 | |||
2787 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | ||
2788 | le64_add_cpu(&right_rec->e_blkno, | ||
2789 | ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | ||
2790 | le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); | ||
2791 | |||
2792 | ocfs2_cleanup_merge(el, index); | ||
2793 | |||
2794 | ret = ocfs2_journal_dirty(handle, bh); | ||
2795 | if (ret) | ||
2796 | mlog_errno(ret); | ||
2797 | |||
2798 | out: | ||
2799 | return ret; | ||
2800 | } | ||
2801 | |||
2802 | static int ocfs2_try_to_merge_extent(struct inode *inode, | ||
2803 | handle_t *handle, | ||
2804 | struct ocfs2_path *left_path, | ||
2805 | int split_index, | ||
2806 | struct ocfs2_extent_rec *split_rec, | ||
2807 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
2808 | struct ocfs2_merge_ctxt *ctxt) | ||
2809 | |||
2810 | { | ||
2811 | int ret = 0, delete_tail_recs = 0; | ||
2812 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | ||
2813 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | ||
2814 | |||
2815 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); | ||
2816 | |||
2817 | if (ctxt->c_split_covers_rec) { | ||
2818 | delete_tail_recs++; | ||
2819 | |||
2820 | if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || | ||
2821 | ctxt->c_has_empty_extent) | ||
2822 | delete_tail_recs++; | ||
2823 | |||
2824 | if (ctxt->c_has_empty_extent) { | ||
2825 | /* | ||
2826 | * The merge code will need to create an empty | ||
2827 | * extent to take the place of the newly | ||
2828 | * emptied slot. Remove any pre-existing empty | ||
2829 | * extents - having more than one in a leaf is | ||
2830 | * illegal. | ||
2831 | */ | ||
2832 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
2833 | dealloc); | ||
2834 | if (ret) { | ||
2835 | mlog_errno(ret); | ||
2836 | goto out; | ||
2837 | } | ||
2838 | split_index--; | ||
2839 | rec = &el->l_recs[split_index]; | ||
2840 | } | ||
2841 | } | ||
2842 | |||
2843 | if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { | ||
2844 | /* | ||
2845 | * Left-right contig implies this. | ||
2846 | */ | ||
2847 | BUG_ON(!ctxt->c_split_covers_rec); | ||
2848 | BUG_ON(split_index == 0); | ||
2849 | |||
2850 | /* | ||
2851 | * Since the leftright insert always covers the entire | ||
2852 | * extent, this call will delete the insert record | ||
2853 | * entirely, resulting in an empty extent record added to | ||
2854 | * the extent block. | ||
2855 | * | ||
2856 | * Since the adding of an empty extent shifts | ||
2857 | * everything back to the right, there's no need to | ||
2858 | * update split_index here. | ||
2859 | */ | ||
2860 | ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), | ||
2861 | handle, split_rec, el, split_index); | ||
2862 | if (ret) { | ||
2863 | mlog_errno(ret); | ||
2864 | goto out; | ||
2865 | } | ||
2866 | |||
2867 | /* | ||
2868 | * We can only get this from logic error above. | ||
2869 | */ | ||
2870 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | ||
2871 | |||
2872 | /* | ||
2873 | * The left merge left us with an empty extent, remove | ||
2874 | * it. | ||
2875 | */ | ||
2876 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); | ||
2877 | if (ret) { | ||
2878 | mlog_errno(ret); | ||
2879 | goto out; | ||
2880 | } | ||
2881 | split_index--; | ||
2882 | rec = &el->l_recs[split_index]; | ||
2883 | |||
2884 | /* | ||
2885 | * Note that we don't pass split_rec here on purpose - | ||
2886 | * we've merged it into the left side. | ||
2887 | */ | ||
2888 | ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), | ||
2889 | handle, rec, el, split_index); | ||
2890 | if (ret) { | ||
2891 | mlog_errno(ret); | ||
2892 | goto out; | ||
2893 | } | ||
2894 | |||
2895 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | ||
2896 | |||
2897 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
2898 | dealloc); | ||
2899 | /* | ||
2900 | * Error from this last rotate is not critical, so | ||
2901 | * print but don't bubble it up. | ||
2902 | */ | ||
2903 | if (ret) | ||
2904 | mlog_errno(ret); | ||
2905 | ret = 0; | ||
2906 | } else { | ||
2907 | /* | ||
2908 | * Merge a record to the left or right. | ||
2909 | * | ||
2910 | * 'contig_type' is relative to the existing record, | ||
2911 | * so for example, if we're "right contig", it's to | ||
2912 | * the record on the left (hence the left merge). | ||
2913 | */ | ||
2914 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | ||
2915 | ret = ocfs2_merge_rec_left(inode, | ||
2916 | path_leaf_bh(left_path), | ||
2917 | handle, split_rec, el, | ||
2918 | split_index); | ||
2919 | if (ret) { | ||
2920 | mlog_errno(ret); | ||
2921 | goto out; | ||
2922 | } | ||
2923 | } else { | ||
2924 | ret = ocfs2_merge_rec_right(inode, | ||
2925 | path_leaf_bh(left_path), | ||
2926 | handle, split_rec, el, | ||
2927 | split_index); | ||
2928 | if (ret) { | ||
2929 | mlog_errno(ret); | ||
2930 | goto out; | ||
2931 | } | ||
2932 | } | ||
2933 | |||
2934 | if (ctxt->c_split_covers_rec) { | ||
2935 | /* | ||
2936 | * The merge may have left an empty extent in | ||
2937 | * our leaf. Try to rotate it away. | ||
2938 | */ | ||
2939 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
2940 | dealloc); | ||
2941 | if (ret) | ||
2942 | mlog_errno(ret); | ||
2943 | ret = 0; | ||
2944 | } | ||
2945 | } | ||
2946 | |||
2947 | out: | ||
2948 | return ret; | ||
2949 | } | ||
2950 | |||
2951 | static void ocfs2_subtract_from_rec(struct super_block *sb, | ||
2952 | enum ocfs2_split_type split, | ||
2953 | struct ocfs2_extent_rec *rec, | ||
2954 | struct ocfs2_extent_rec *split_rec) | ||
2955 | { | ||
2956 | u64 len_blocks; | ||
2957 | |||
2958 | len_blocks = ocfs2_clusters_to_blocks(sb, | ||
2959 | le16_to_cpu(split_rec->e_leaf_clusters)); | ||
2960 | |||
2961 | if (split == SPLIT_LEFT) { | ||
2962 | /* | ||
2963 | * Region is on the left edge of the existing | ||
2964 | * record. | ||
2965 | */ | ||
2966 | le32_add_cpu(&rec->e_cpos, | ||
2967 | le16_to_cpu(split_rec->e_leaf_clusters)); | ||
2968 | le64_add_cpu(&rec->e_blkno, len_blocks); | ||
2969 | le16_add_cpu(&rec->e_leaf_clusters, | ||
2970 | -le16_to_cpu(split_rec->e_leaf_clusters)); | ||
2971 | } else { | ||
2972 | /* | ||
2973 | * Region is on the right edge of the existing | ||
2974 | * record. | ||
2975 | */ | ||
2976 | le16_add_cpu(&rec->e_leaf_clusters, | ||
2977 | -le16_to_cpu(split_rec->e_leaf_clusters)); | ||
2978 | } | ||
2979 | } | ||
2980 | |||
1725 | /* | 2981 | /* |
1726 | * Do the final bits of extent record insertion at the target leaf | 2982 | * Do the final bits of extent record insertion at the target leaf |
1727 | * list. If this leaf is part of an allocation tree, it is assumed | 2983 | * list. If this leaf is part of an allocation tree, it is assumed |
@@ -1738,6 +2994,15 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
1738 | 2994 | ||
1739 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); | 2995 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); |
1740 | 2996 | ||
2997 | if (insert->ins_split != SPLIT_NONE) { | ||
2998 | i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); | ||
2999 | BUG_ON(i == -1); | ||
3000 | rec = &el->l_recs[i]; | ||
3001 | ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, | ||
3002 | insert_rec); | ||
3003 | goto rotate; | ||
3004 | } | ||
3005 | |||
1741 | /* | 3006 | /* |
1742 | * Contiguous insert - either left or right. | 3007 | * Contiguous insert - either left or right. |
1743 | */ | 3008 | */ |
@@ -1792,6 +3057,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
1792 | return; | 3057 | return; |
1793 | } | 3058 | } |
1794 | 3059 | ||
3060 | rotate: | ||
1795 | /* | 3061 | /* |
1796 | * Ok, we have to rotate. | 3062 | * Ok, we have to rotate. |
1797 | * | 3063 | * |
@@ -1815,13 +3081,53 @@ static inline void ocfs2_update_dinode_clusters(struct inode *inode, | |||
1815 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 3081 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1816 | } | 3082 | } |
1817 | 3083 | ||
3084 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | ||
3085 | handle_t *handle, | ||
3086 | struct ocfs2_path *path, | ||
3087 | struct ocfs2_extent_rec *insert_rec) | ||
3088 | { | ||
3089 | int ret, i, next_free; | ||
3090 | struct buffer_head *bh; | ||
3091 | struct ocfs2_extent_list *el; | ||
3092 | struct ocfs2_extent_rec *rec; | ||
3093 | |||
3094 | /* | ||
3095 | * Update everything except the leaf block. | ||
3096 | */ | ||
3097 | for (i = 0; i < path->p_tree_depth; i++) { | ||
3098 | bh = path->p_node[i].bh; | ||
3099 | el = path->p_node[i].el; | ||
3100 | |||
3101 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
3102 | if (next_free == 0) { | ||
3103 | ocfs2_error(inode->i_sb, | ||
3104 | "Dinode %llu has a bad extent list", | ||
3105 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
3106 | ret = -EIO; | ||
3107 | return; | ||
3108 | } | ||
3109 | |||
3110 | rec = &el->l_recs[next_free - 1]; | ||
3111 | |||
3112 | rec->e_int_clusters = insert_rec->e_cpos; | ||
3113 | le32_add_cpu(&rec->e_int_clusters, | ||
3114 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
3115 | le32_add_cpu(&rec->e_int_clusters, | ||
3116 | -le32_to_cpu(rec->e_cpos)); | ||
3117 | |||
3118 | ret = ocfs2_journal_dirty(handle, bh); | ||
3119 | if (ret) | ||
3120 | mlog_errno(ret); | ||
3121 | |||
3122 | } | ||
3123 | } | ||
3124 | |||
1818 | static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | 3125 | static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, |
1819 | struct ocfs2_extent_rec *insert_rec, | 3126 | struct ocfs2_extent_rec *insert_rec, |
1820 | struct ocfs2_path *right_path, | 3127 | struct ocfs2_path *right_path, |
1821 | struct ocfs2_path **ret_left_path) | 3128 | struct ocfs2_path **ret_left_path) |
1822 | { | 3129 | { |
1823 | int ret, i, next_free; | 3130 | int ret, next_free; |
1824 | struct buffer_head *bh; | ||
1825 | struct ocfs2_extent_list *el; | 3131 | struct ocfs2_extent_list *el; |
1826 | struct ocfs2_path *left_path = NULL; | 3132 | struct ocfs2_path *left_path = NULL; |
1827 | 3133 | ||
@@ -1887,40 +3193,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | |||
1887 | goto out; | 3193 | goto out; |
1888 | } | 3194 | } |
1889 | 3195 | ||
1890 | el = path_root_el(right_path); | 3196 | ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); |
1891 | bh = path_root_bh(right_path); | ||
1892 | i = 0; | ||
1893 | while (1) { | ||
1894 | struct ocfs2_extent_rec *rec; | ||
1895 | |||
1896 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
1897 | if (next_free == 0) { | ||
1898 | ocfs2_error(inode->i_sb, | ||
1899 | "Dinode %llu has a bad extent list", | ||
1900 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
1901 | ret = -EIO; | ||
1902 | goto out; | ||
1903 | } | ||
1904 | |||
1905 | rec = &el->l_recs[next_free - 1]; | ||
1906 | |||
1907 | rec->e_int_clusters = insert_rec->e_cpos; | ||
1908 | le32_add_cpu(&rec->e_int_clusters, | ||
1909 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
1910 | le32_add_cpu(&rec->e_int_clusters, | ||
1911 | -le32_to_cpu(rec->e_cpos)); | ||
1912 | |||
1913 | ret = ocfs2_journal_dirty(handle, bh); | ||
1914 | if (ret) | ||
1915 | mlog_errno(ret); | ||
1916 | |||
1917 | /* Don't touch the leaf node */ | ||
1918 | if (++i >= right_path->p_tree_depth) | ||
1919 | break; | ||
1920 | |||
1921 | bh = right_path->p_node[i].bh; | ||
1922 | el = right_path->p_node[i].el; | ||
1923 | } | ||
1924 | 3197 | ||
1925 | *ret_left_path = left_path; | 3198 | *ret_left_path = left_path; |
1926 | ret = 0; | 3199 | ret = 0; |
@@ -1931,6 +3204,83 @@ out: | |||
1931 | return ret; | 3204 | return ret; |
1932 | } | 3205 | } |
1933 | 3206 | ||
3207 | static void ocfs2_split_record(struct inode *inode, | ||
3208 | struct ocfs2_path *left_path, | ||
3209 | struct ocfs2_path *right_path, | ||
3210 | struct ocfs2_extent_rec *split_rec, | ||
3211 | enum ocfs2_split_type split) | ||
3212 | { | ||
3213 | int index; | ||
3214 | u32 cpos = le32_to_cpu(split_rec->e_cpos); | ||
3215 | struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; | ||
3216 | struct ocfs2_extent_rec *rec, *tmprec; | ||
3217 | |||
3218 | right_el = path_leaf_el(right_path);; | ||
3219 | if (left_path) | ||
3220 | left_el = path_leaf_el(left_path); | ||
3221 | |||
3222 | el = right_el; | ||
3223 | insert_el = right_el; | ||
3224 | index = ocfs2_search_extent_list(el, cpos); | ||
3225 | if (index != -1) { | ||
3226 | if (index == 0 && left_path) { | ||
3227 | BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); | ||
3228 | |||
3229 | /* | ||
3230 | * This typically means that the record | ||
3231 | * started in the left path but moved to the | ||
3232 | * right as a result of rotation. We either | ||
3233 | * move the existing record to the left, or we | ||
3234 | * do the later insert there. | ||
3235 | * | ||
3236 | * In this case, the left path should always | ||
3237 | * exist as the rotate code will have passed | ||
3238 | * it back for a post-insert update. | ||
3239 | */ | ||
3240 | |||
3241 | if (split == SPLIT_LEFT) { | ||
3242 | /* | ||
3243 | * It's a left split. Since we know | ||
3244 | * that the rotate code gave us an | ||
3245 | * empty extent in the left path, we | ||
3246 | * can just do the insert there. | ||
3247 | */ | ||
3248 | insert_el = left_el; | ||
3249 | } else { | ||
3250 | /* | ||
3251 | * Right split - we have to move the | ||
3252 | * existing record over to the left | ||
3253 | * leaf. The insert will be into the | ||
3254 | * newly created empty extent in the | ||
3255 | * right leaf. | ||
3256 | */ | ||
3257 | tmprec = &right_el->l_recs[index]; | ||
3258 | ocfs2_rotate_leaf(left_el, tmprec); | ||
3259 | el = left_el; | ||
3260 | |||
3261 | memset(tmprec, 0, sizeof(*tmprec)); | ||
3262 | index = ocfs2_search_extent_list(left_el, cpos); | ||
3263 | BUG_ON(index == -1); | ||
3264 | } | ||
3265 | } | ||
3266 | } else { | ||
3267 | BUG_ON(!left_path); | ||
3268 | BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0])); | ||
3269 | /* | ||
3270 | * Left path is easy - we can just allow the insert to | ||
3271 | * happen. | ||
3272 | */ | ||
3273 | el = left_el; | ||
3274 | insert_el = left_el; | ||
3275 | index = ocfs2_search_extent_list(el, cpos); | ||
3276 | BUG_ON(index == -1); | ||
3277 | } | ||
3278 | |||
3279 | rec = &el->l_recs[index]; | ||
3280 | ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); | ||
3281 | ocfs2_rotate_leaf(insert_el, split_rec); | ||
3282 | } | ||
3283 | |||
1934 | /* | 3284 | /* |
1935 | * This function only does inserts on an allocation b-tree. For dinode | 3285 | * This function only does inserts on an allocation b-tree. For dinode |
1936 | * lists, ocfs2_insert_at_leaf() is called directly. | 3286 | * lists, ocfs2_insert_at_leaf() is called directly. |
@@ -1948,7 +3298,6 @@ static int ocfs2_insert_path(struct inode *inode, | |||
1948 | { | 3298 | { |
1949 | int ret, subtree_index; | 3299 | int ret, subtree_index; |
1950 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); | 3300 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); |
1951 | struct ocfs2_extent_list *el; | ||
1952 | 3301 | ||
1953 | /* | 3302 | /* |
1954 | * Pass both paths to the journal. The majority of inserts | 3303 | * Pass both paths to the journal. The majority of inserts |
@@ -1984,9 +3333,18 @@ static int ocfs2_insert_path(struct inode *inode, | |||
1984 | } | 3333 | } |
1985 | } | 3334 | } |
1986 | 3335 | ||
1987 | el = path_leaf_el(right_path); | 3336 | if (insert->ins_split != SPLIT_NONE) { |
3337 | /* | ||
3338 | * We could call ocfs2_insert_at_leaf() for some types | ||
3339 | * of splits, but it's easier to just let one seperate | ||
3340 | * function sort it all out. | ||
3341 | */ | ||
3342 | ocfs2_split_record(inode, left_path, right_path, | ||
3343 | insert_rec, insert->ins_split); | ||
3344 | } else | ||
3345 | ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), | ||
3346 | insert, inode); | ||
1988 | 3347 | ||
1989 | ocfs2_insert_at_leaf(insert_rec, el, insert, inode); | ||
1990 | ret = ocfs2_journal_dirty(handle, leaf_bh); | 3348 | ret = ocfs2_journal_dirty(handle, leaf_bh); |
1991 | if (ret) | 3349 | if (ret) |
1992 | mlog_errno(ret); | 3350 | mlog_errno(ret); |
@@ -2075,7 +3433,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
2075 | * can wind up skipping both of these two special cases... | 3433 | * can wind up skipping both of these two special cases... |
2076 | */ | 3434 | */ |
2077 | if (rotate) { | 3435 | if (rotate) { |
2078 | ret = ocfs2_rotate_tree_right(inode, handle, | 3436 | ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, |
2079 | le32_to_cpu(insert_rec->e_cpos), | 3437 | le32_to_cpu(insert_rec->e_cpos), |
2080 | right_path, &left_path); | 3438 | right_path, &left_path); |
2081 | if (ret) { | 3439 | if (ret) { |
@@ -2100,8 +3458,9 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
2100 | } | 3458 | } |
2101 | 3459 | ||
2102 | out_update_clusters: | 3460 | out_update_clusters: |
2103 | ocfs2_update_dinode_clusters(inode, di, | 3461 | if (type->ins_split == SPLIT_NONE) |
2104 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 3462 | ocfs2_update_dinode_clusters(inode, di, |
3463 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
2105 | 3464 | ||
2106 | ret = ocfs2_journal_dirty(handle, di_bh); | 3465 | ret = ocfs2_journal_dirty(handle, di_bh); |
2107 | if (ret) | 3466 | if (ret) |
@@ -2114,6 +3473,44 @@ out: | |||
2114 | return ret; | 3473 | return ret; |
2115 | } | 3474 | } |
2116 | 3475 | ||
3476 | static enum ocfs2_contig_type | ||
3477 | ocfs2_figure_merge_contig_type(struct inode *inode, | ||
3478 | struct ocfs2_extent_list *el, int index, | ||
3479 | struct ocfs2_extent_rec *split_rec) | ||
3480 | { | ||
3481 | struct ocfs2_extent_rec *rec; | ||
3482 | enum ocfs2_contig_type ret = CONTIG_NONE; | ||
3483 | |||
3484 | /* | ||
3485 | * We're careful to check for an empty extent record here - | ||
3486 | * the merge code will know what to do if it sees one. | ||
3487 | */ | ||
3488 | |||
3489 | if (index > 0) { | ||
3490 | rec = &el->l_recs[index - 1]; | ||
3491 | if (index == 1 && ocfs2_is_empty_extent(rec)) { | ||
3492 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | ||
3493 | ret = CONTIG_RIGHT; | ||
3494 | } else { | ||
3495 | ret = ocfs2_extent_contig(inode, rec, split_rec); | ||
3496 | } | ||
3497 | } | ||
3498 | |||
3499 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
3500 | enum ocfs2_contig_type contig_type; | ||
3501 | |||
3502 | rec = &el->l_recs[index + 1]; | ||
3503 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | ||
3504 | |||
3505 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | ||
3506 | ret = CONTIG_LEFTRIGHT; | ||
3507 | else if (ret == CONTIG_NONE) | ||
3508 | ret = contig_type; | ||
3509 | } | ||
3510 | |||
3511 | return ret; | ||
3512 | } | ||
3513 | |||
2117 | static void ocfs2_figure_contig_type(struct inode *inode, | 3514 | static void ocfs2_figure_contig_type(struct inode *inode, |
2118 | struct ocfs2_insert_type *insert, | 3515 | struct ocfs2_insert_type *insert, |
2119 | struct ocfs2_extent_list *el, | 3516 | struct ocfs2_extent_list *el, |
@@ -2205,6 +3602,8 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
2205 | struct ocfs2_path *path = NULL; | 3602 | struct ocfs2_path *path = NULL; |
2206 | struct buffer_head *bh = NULL; | 3603 | struct buffer_head *bh = NULL; |
2207 | 3604 | ||
3605 | insert->ins_split = SPLIT_NONE; | ||
3606 | |||
2208 | el = &di->id2.i_list; | 3607 | el = &di->id2.i_list; |
2209 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); | 3608 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); |
2210 | 3609 | ||
@@ -2327,9 +3726,10 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
2327 | u32 cpos, | 3726 | u32 cpos, |
2328 | u64 start_blk, | 3727 | u64 start_blk, |
2329 | u32 new_clusters, | 3728 | u32 new_clusters, |
3729 | u8 flags, | ||
2330 | struct ocfs2_alloc_context *meta_ac) | 3730 | struct ocfs2_alloc_context *meta_ac) |
2331 | { | 3731 | { |
2332 | int status, shift; | 3732 | int status; |
2333 | struct buffer_head *last_eb_bh = NULL; | 3733 | struct buffer_head *last_eb_bh = NULL; |
2334 | struct buffer_head *bh = NULL; | 3734 | struct buffer_head *bh = NULL; |
2335 | struct ocfs2_insert_type insert = {0, }; | 3735 | struct ocfs2_insert_type insert = {0, }; |
@@ -2350,6 +3750,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
2350 | rec.e_cpos = cpu_to_le32(cpos); | 3750 | rec.e_cpos = cpu_to_le32(cpos); |
2351 | rec.e_blkno = cpu_to_le64(start_blk); | 3751 | rec.e_blkno = cpu_to_le64(start_blk); |
2352 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 3752 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
3753 | rec.e_flags = flags; | ||
2353 | 3754 | ||
2354 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, | 3755 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, |
2355 | &insert); | 3756 | &insert); |
@@ -2364,55 +3765,16 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
2364 | insert.ins_appending, insert.ins_contig, insert.ins_contig_index, | 3765 | insert.ins_appending, insert.ins_contig, insert.ins_contig_index, |
2365 | insert.ins_free_records, insert.ins_tree_depth); | 3766 | insert.ins_free_records, insert.ins_tree_depth); |
2366 | 3767 | ||
2367 | /* | 3768 | if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { |
2368 | * Avoid growing the tree unless we're out of records and the | 3769 | status = ocfs2_grow_tree(inode, handle, fe_bh, |
2369 | * insert type requres one. | 3770 | &insert.ins_tree_depth, &last_eb_bh, |
2370 | */ | 3771 | meta_ac); |
2371 | if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) | 3772 | if (status) { |
2372 | goto out_add; | ||
2373 | |||
2374 | shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); | ||
2375 | if (shift < 0) { | ||
2376 | status = shift; | ||
2377 | mlog_errno(status); | ||
2378 | goto bail; | ||
2379 | } | ||
2380 | |||
2381 | /* We traveled all the way to the bottom of the allocation tree | ||
2382 | * and didn't find room for any more extents - we need to add | ||
2383 | * another tree level */ | ||
2384 | if (shift) { | ||
2385 | BUG_ON(bh); | ||
2386 | mlog(0, "need to shift tree depth " | ||
2387 | "(current = %d)\n", insert.ins_tree_depth); | ||
2388 | |||
2389 | /* ocfs2_shift_tree_depth will return us a buffer with | ||
2390 | * the new extent block (so we can pass that to | ||
2391 | * ocfs2_add_branch). */ | ||
2392 | status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, | ||
2393 | meta_ac, &bh); | ||
2394 | if (status < 0) { | ||
2395 | mlog_errno(status); | 3773 | mlog_errno(status); |
2396 | goto bail; | 3774 | goto bail; |
2397 | } | 3775 | } |
2398 | insert.ins_tree_depth++; | ||
2399 | /* Special case: we have room now if we shifted from | ||
2400 | * tree_depth 0 */ | ||
2401 | if (insert.ins_tree_depth == 1) | ||
2402 | goto out_add; | ||
2403 | } | ||
2404 | |||
2405 | /* call ocfs2_add_branch to add the final part of the tree with | ||
2406 | * the new data. */ | ||
2407 | mlog(0, "add branch. bh = %p\n", bh); | ||
2408 | status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, | ||
2409 | meta_ac); | ||
2410 | if (status < 0) { | ||
2411 | mlog_errno(status); | ||
2412 | goto bail; | ||
2413 | } | 3776 | } |
2414 | 3777 | ||
2415 | out_add: | ||
2416 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 3778 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
2417 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); | 3779 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); |
2418 | if (status < 0) | 3780 | if (status < 0) |
@@ -2431,7 +3793,720 @@ bail: | |||
2431 | return status; | 3793 | return status; |
2432 | } | 3794 | } |
2433 | 3795 | ||
2434 | static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | 3796 | static void ocfs2_make_right_split_rec(struct super_block *sb, |
3797 | struct ocfs2_extent_rec *split_rec, | ||
3798 | u32 cpos, | ||
3799 | struct ocfs2_extent_rec *rec) | ||
3800 | { | ||
3801 | u32 rec_cpos = le32_to_cpu(rec->e_cpos); | ||
3802 | u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters); | ||
3803 | |||
3804 | memset(split_rec, 0, sizeof(struct ocfs2_extent_rec)); | ||
3805 | |||
3806 | split_rec->e_cpos = cpu_to_le32(cpos); | ||
3807 | split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos); | ||
3808 | |||
3809 | split_rec->e_blkno = rec->e_blkno; | ||
3810 | le64_add_cpu(&split_rec->e_blkno, | ||
3811 | ocfs2_clusters_to_blocks(sb, cpos - rec_cpos)); | ||
3812 | |||
3813 | split_rec->e_flags = rec->e_flags; | ||
3814 | } | ||
3815 | |||
3816 | static int ocfs2_split_and_insert(struct inode *inode, | ||
3817 | handle_t *handle, | ||
3818 | struct ocfs2_path *path, | ||
3819 | struct buffer_head *di_bh, | ||
3820 | struct buffer_head **last_eb_bh, | ||
3821 | int split_index, | ||
3822 | struct ocfs2_extent_rec *orig_split_rec, | ||
3823 | struct ocfs2_alloc_context *meta_ac) | ||
3824 | { | ||
3825 | int ret = 0, depth; | ||
3826 | unsigned int insert_range, rec_range, do_leftright = 0; | ||
3827 | struct ocfs2_extent_rec tmprec; | ||
3828 | struct ocfs2_extent_list *rightmost_el; | ||
3829 | struct ocfs2_extent_rec rec; | ||
3830 | struct ocfs2_extent_rec split_rec = *orig_split_rec; | ||
3831 | struct ocfs2_insert_type insert; | ||
3832 | struct ocfs2_extent_block *eb; | ||
3833 | struct ocfs2_dinode *di; | ||
3834 | |||
3835 | leftright: | ||
3836 | /* | ||
3837 | * Store a copy of the record on the stack - it might move | ||
3838 | * around as the tree is manipulated below. | ||
3839 | */ | ||
3840 | rec = path_leaf_el(path)->l_recs[split_index]; | ||
3841 | |||
3842 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3843 | rightmost_el = &di->id2.i_list; | ||
3844 | |||
3845 | depth = le16_to_cpu(rightmost_el->l_tree_depth); | ||
3846 | if (depth) { | ||
3847 | BUG_ON(!(*last_eb_bh)); | ||
3848 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; | ||
3849 | rightmost_el = &eb->h_list; | ||
3850 | } | ||
3851 | |||
3852 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | ||
3853 | le16_to_cpu(rightmost_el->l_count)) { | ||
3854 | int old_depth = depth; | ||
3855 | |||
3856 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, | ||
3857 | meta_ac); | ||
3858 | if (ret) { | ||
3859 | mlog_errno(ret); | ||
3860 | goto out; | ||
3861 | } | ||
3862 | |||
3863 | if (old_depth != depth) { | ||
3864 | eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; | ||
3865 | rightmost_el = &eb->h_list; | ||
3866 | } | ||
3867 | } | ||
3868 | |||
3869 | memset(&insert, 0, sizeof(struct ocfs2_insert_type)); | ||
3870 | insert.ins_appending = APPEND_NONE; | ||
3871 | insert.ins_contig = CONTIG_NONE; | ||
3872 | insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) | ||
3873 | - le16_to_cpu(rightmost_el->l_next_free_rec); | ||
3874 | insert.ins_tree_depth = depth; | ||
3875 | |||
3876 | insert_range = le32_to_cpu(split_rec.e_cpos) + | ||
3877 | le16_to_cpu(split_rec.e_leaf_clusters); | ||
3878 | rec_range = le32_to_cpu(rec.e_cpos) + | ||
3879 | le16_to_cpu(rec.e_leaf_clusters); | ||
3880 | |||
3881 | if (split_rec.e_cpos == rec.e_cpos) { | ||
3882 | insert.ins_split = SPLIT_LEFT; | ||
3883 | } else if (insert_range == rec_range) { | ||
3884 | insert.ins_split = SPLIT_RIGHT; | ||
3885 | } else { | ||
3886 | /* | ||
3887 | * Left/right split. We fake this as a right split | ||
3888 | * first and then make a second pass as a left split. | ||
3889 | */ | ||
3890 | insert.ins_split = SPLIT_RIGHT; | ||
3891 | |||
3892 | ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, | ||
3893 | &rec); | ||
3894 | |||
3895 | split_rec = tmprec; | ||
3896 | |||
3897 | BUG_ON(do_leftright); | ||
3898 | do_leftright = 1; | ||
3899 | } | ||
3900 | |||
3901 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, | ||
3902 | &insert); | ||
3903 | if (ret) { | ||
3904 | mlog_errno(ret); | ||
3905 | goto out; | ||
3906 | } | ||
3907 | |||
3908 | if (do_leftright == 1) { | ||
3909 | u32 cpos; | ||
3910 | struct ocfs2_extent_list *el; | ||
3911 | |||
3912 | do_leftright++; | ||
3913 | split_rec = *orig_split_rec; | ||
3914 | |||
3915 | ocfs2_reinit_path(path, 1); | ||
3916 | |||
3917 | cpos = le32_to_cpu(split_rec.e_cpos); | ||
3918 | ret = ocfs2_find_path(inode, path, cpos); | ||
3919 | if (ret) { | ||
3920 | mlog_errno(ret); | ||
3921 | goto out; | ||
3922 | } | ||
3923 | |||
3924 | el = path_leaf_el(path); | ||
3925 | split_index = ocfs2_search_extent_list(el, cpos); | ||
3926 | goto leftright; | ||
3927 | } | ||
3928 | out: | ||
3929 | |||
3930 | return ret; | ||
3931 | } | ||
3932 | |||
3933 | /* | ||
3934 | * Mark part or all of the extent record at split_index in the leaf | ||
3935 | * pointed to by path as written. This removes the unwritten | ||
3936 | * extent flag. | ||
3937 | * | ||
3938 | * Care is taken to handle contiguousness so as to not grow the tree. | ||
3939 | * | ||
3940 | * meta_ac is not strictly necessary - we only truly need it if growth | ||
3941 | * of the tree is required. All other cases will degrade into a less | ||
3942 | * optimal tree layout. | ||
3943 | * | ||
3944 | * last_eb_bh should be the rightmost leaf block for any inode with a | ||
3945 | * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. | ||
3946 | * | ||
3947 | * This code is optimized for readability - several passes might be | ||
3948 | * made over certain portions of the tree. All of those blocks will | ||
3949 | * have been brought into cache (and pinned via the journal), so the | ||
3950 | * extra overhead is not expressed in terms of disk reads. | ||
3951 | */ | ||
3952 | static int __ocfs2_mark_extent_written(struct inode *inode, | ||
3953 | struct buffer_head *di_bh, | ||
3954 | handle_t *handle, | ||
3955 | struct ocfs2_path *path, | ||
3956 | int split_index, | ||
3957 | struct ocfs2_extent_rec *split_rec, | ||
3958 | struct ocfs2_alloc_context *meta_ac, | ||
3959 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
3960 | { | ||
3961 | int ret = 0; | ||
3962 | struct ocfs2_extent_list *el = path_leaf_el(path); | ||
3963 | struct buffer_head *eb_bh, *last_eb_bh = NULL; | ||
3964 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | ||
3965 | struct ocfs2_merge_ctxt ctxt; | ||
3966 | struct ocfs2_extent_list *rightmost_el; | ||
3967 | |||
3968 | if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) { | ||
3969 | ret = -EIO; | ||
3970 | mlog_errno(ret); | ||
3971 | goto out; | ||
3972 | } | ||
3973 | |||
3974 | if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || | ||
3975 | ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < | ||
3976 | (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { | ||
3977 | ret = -EIO; | ||
3978 | mlog_errno(ret); | ||
3979 | goto out; | ||
3980 | } | ||
3981 | |||
3982 | eb_bh = path_leaf_bh(path); | ||
3983 | ret = ocfs2_journal_access(handle, inode, eb_bh, | ||
3984 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3985 | if (ret) { | ||
3986 | mlog_errno(ret); | ||
3987 | goto out; | ||
3988 | } | ||
3989 | |||
3990 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | ||
3991 | split_index, | ||
3992 | split_rec); | ||
3993 | |||
3994 | /* | ||
3995 | * The core merge / split code wants to know how much room is | ||
3996 | * left in this inodes allocation tree, so we pass the | ||
3997 | * rightmost extent list. | ||
3998 | */ | ||
3999 | if (path->p_tree_depth) { | ||
4000 | struct ocfs2_extent_block *eb; | ||
4001 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4002 | |||
4003 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
4004 | le64_to_cpu(di->i_last_eb_blk), | ||
4005 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4006 | if (ret) { | ||
4007 | mlog_exit(ret); | ||
4008 | goto out; | ||
4009 | } | ||
4010 | |||
4011 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | ||
4012 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
4013 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
4014 | ret = -EROFS; | ||
4015 | goto out; | ||
4016 | } | ||
4017 | |||
4018 | rightmost_el = &eb->h_list; | ||
4019 | } else | ||
4020 | rightmost_el = path_root_el(path); | ||
4021 | |||
4022 | ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec); | ||
4023 | if (ctxt.c_used_tail_recs > 0 && | ||
4024 | ocfs2_is_empty_extent(&rightmost_el->l_recs[0])) | ||
4025 | ctxt.c_used_tail_recs--; | ||
4026 | |||
4027 | if (rec->e_cpos == split_rec->e_cpos && | ||
4028 | rec->e_leaf_clusters == split_rec->e_leaf_clusters) | ||
4029 | ctxt.c_split_covers_rec = 1; | ||
4030 | else | ||
4031 | ctxt.c_split_covers_rec = 0; | ||
4032 | |||
4033 | ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); | ||
4034 | |||
4035 | mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " | ||
4036 | "has_empty: %u, split_covers: %u\n", split_index, | ||
4037 | ctxt.c_contig_type, ctxt.c_used_tail_recs, | ||
4038 | ctxt.c_has_empty_extent, ctxt.c_split_covers_rec); | ||
4039 | |||
4040 | if (ctxt.c_contig_type == CONTIG_NONE) { | ||
4041 | if (ctxt.c_split_covers_rec) | ||
4042 | el->l_recs[split_index] = *split_rec; | ||
4043 | else | ||
4044 | ret = ocfs2_split_and_insert(inode, handle, path, di_bh, | ||
4045 | &last_eb_bh, split_index, | ||
4046 | split_rec, meta_ac); | ||
4047 | if (ret) | ||
4048 | mlog_errno(ret); | ||
4049 | } else { | ||
4050 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | ||
4051 | split_index, split_rec, | ||
4052 | dealloc, &ctxt); | ||
4053 | if (ret) | ||
4054 | mlog_errno(ret); | ||
4055 | } | ||
4056 | |||
4057 | ocfs2_journal_dirty(handle, eb_bh); | ||
4058 | |||
4059 | out: | ||
4060 | brelse(last_eb_bh); | ||
4061 | return ret; | ||
4062 | } | ||
4063 | |||
4064 | /* | ||
4065 | * Mark the already-existing extent at cpos as written for len clusters. | ||
4066 | * | ||
4067 | * If the existing extent is larger than the request, initiate a | ||
4068 | * split. An attempt will be made at merging with adjacent extents. | ||
4069 | * | ||
4070 | * The caller is responsible for passing down meta_ac if we'll need it. | ||
4071 | */ | ||
4072 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | ||
4073 | handle_t *handle, u32 cpos, u32 len, u32 phys, | ||
4074 | struct ocfs2_alloc_context *meta_ac, | ||
4075 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
4076 | { | ||
4077 | int ret, index; | ||
4078 | u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); | ||
4079 | struct ocfs2_extent_rec split_rec; | ||
4080 | struct ocfs2_path *left_path = NULL; | ||
4081 | struct ocfs2_extent_list *el; | ||
4082 | |||
4083 | mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", | ||
4084 | inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); | ||
4085 | |||
4086 | if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { | ||
4087 | ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " | ||
4088 | "that are being written to, but the feature bit " | ||
4089 | "is not set in the super block.", | ||
4090 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
4091 | ret = -EROFS; | ||
4092 | goto out; | ||
4093 | } | ||
4094 | |||
4095 | /* | ||
4096 | * XXX: This should be fixed up so that we just re-insert the | ||
4097 | * next extent records. | ||
4098 | */ | ||
4099 | ocfs2_extent_map_trunc(inode, 0); | ||
4100 | |||
4101 | left_path = ocfs2_new_inode_path(di_bh); | ||
4102 | if (!left_path) { | ||
4103 | ret = -ENOMEM; | ||
4104 | mlog_errno(ret); | ||
4105 | goto out; | ||
4106 | } | ||
4107 | |||
4108 | ret = ocfs2_find_path(inode, left_path, cpos); | ||
4109 | if (ret) { | ||
4110 | mlog_errno(ret); | ||
4111 | goto out; | ||
4112 | } | ||
4113 | el = path_leaf_el(left_path); | ||
4114 | |||
4115 | index = ocfs2_search_extent_list(el, cpos); | ||
4116 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
4117 | ocfs2_error(inode->i_sb, | ||
4118 | "Inode %llu has an extent at cpos %u which can no " | ||
4119 | "longer be found.\n", | ||
4120 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | ||
4121 | ret = -EROFS; | ||
4122 | goto out; | ||
4123 | } | ||
4124 | |||
4125 | memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); | ||
4126 | split_rec.e_cpos = cpu_to_le32(cpos); | ||
4127 | split_rec.e_leaf_clusters = cpu_to_le16(len); | ||
4128 | split_rec.e_blkno = cpu_to_le64(start_blkno); | ||
4129 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | ||
4130 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | ||
4131 | |||
4132 | ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, | ||
4133 | index, &split_rec, meta_ac, dealloc); | ||
4134 | if (ret) | ||
4135 | mlog_errno(ret); | ||
4136 | |||
4137 | out: | ||
4138 | ocfs2_free_path(left_path); | ||
4139 | return ret; | ||
4140 | } | ||
4141 | |||
4142 | static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | ||
4143 | handle_t *handle, struct ocfs2_path *path, | ||
4144 | int index, u32 new_range, | ||
4145 | struct ocfs2_alloc_context *meta_ac) | ||
4146 | { | ||
4147 | int ret, depth, credits = handle->h_buffer_credits; | ||
4148 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4149 | struct buffer_head *last_eb_bh = NULL; | ||
4150 | struct ocfs2_extent_block *eb; | ||
4151 | struct ocfs2_extent_list *rightmost_el, *el; | ||
4152 | struct ocfs2_extent_rec split_rec; | ||
4153 | struct ocfs2_extent_rec *rec; | ||
4154 | struct ocfs2_insert_type insert; | ||
4155 | |||
4156 | /* | ||
4157 | * Setup the record to split before we grow the tree. | ||
4158 | */ | ||
4159 | el = path_leaf_el(path); | ||
4160 | rec = &el->l_recs[index]; | ||
4161 | ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); | ||
4162 | |||
4163 | depth = path->p_tree_depth; | ||
4164 | if (depth > 0) { | ||
4165 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
4166 | le64_to_cpu(di->i_last_eb_blk), | ||
4167 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4168 | if (ret < 0) { | ||
4169 | mlog_errno(ret); | ||
4170 | goto out; | ||
4171 | } | ||
4172 | |||
4173 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | ||
4174 | rightmost_el = &eb->h_list; | ||
4175 | } else | ||
4176 | rightmost_el = path_leaf_el(path); | ||
4177 | |||
4178 | credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); | ||
4179 | ret = ocfs2_extend_trans(handle, credits); | ||
4180 | if (ret) { | ||
4181 | mlog_errno(ret); | ||
4182 | goto out; | ||
4183 | } | ||
4184 | |||
4185 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | ||
4186 | le16_to_cpu(rightmost_el->l_count)) { | ||
4187 | int old_depth = depth; | ||
4188 | |||
4189 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, | ||
4190 | meta_ac); | ||
4191 | if (ret) { | ||
4192 | mlog_errno(ret); | ||
4193 | goto out; | ||
4194 | } | ||
4195 | |||
4196 | if (old_depth != depth) { | ||
4197 | eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; | ||
4198 | rightmost_el = &eb->h_list; | ||
4199 | } | ||
4200 | } | ||
4201 | |||
4202 | memset(&insert, 0, sizeof(struct ocfs2_insert_type)); | ||
4203 | insert.ins_appending = APPEND_NONE; | ||
4204 | insert.ins_contig = CONTIG_NONE; | ||
4205 | insert.ins_split = SPLIT_RIGHT; | ||
4206 | insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) | ||
4207 | - le16_to_cpu(rightmost_el->l_next_free_rec); | ||
4208 | insert.ins_tree_depth = depth; | ||
4209 | |||
4210 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); | ||
4211 | if (ret) | ||
4212 | mlog_errno(ret); | ||
4213 | |||
4214 | out: | ||
4215 | brelse(last_eb_bh); | ||
4216 | return ret; | ||
4217 | } | ||
4218 | |||
4219 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | ||
4220 | struct ocfs2_path *path, int index, | ||
4221 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
4222 | u32 cpos, u32 len) | ||
4223 | { | ||
4224 | int ret; | ||
4225 | u32 left_cpos, rec_range, trunc_range; | ||
4226 | int wants_rotate = 0, is_rightmost_tree_rec = 0; | ||
4227 | struct super_block *sb = inode->i_sb; | ||
4228 | struct ocfs2_path *left_path = NULL; | ||
4229 | struct ocfs2_extent_list *el = path_leaf_el(path); | ||
4230 | struct ocfs2_extent_rec *rec; | ||
4231 | struct ocfs2_extent_block *eb; | ||
4232 | |||
4233 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | ||
4234 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | ||
4235 | if (ret) { | ||
4236 | mlog_errno(ret); | ||
4237 | goto out; | ||
4238 | } | ||
4239 | |||
4240 | index--; | ||
4241 | } | ||
4242 | |||
4243 | if (index == (le16_to_cpu(el->l_next_free_rec) - 1) && | ||
4244 | path->p_tree_depth) { | ||
4245 | /* | ||
4246 | * Check whether this is the rightmost tree record. If | ||
4247 | * we remove all of this record or part of its right | ||
4248 | * edge then an update of the record lengths above it | ||
4249 | * will be required. | ||
4250 | */ | ||
4251 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
4252 | if (eb->h_next_leaf_blk == 0) | ||
4253 | is_rightmost_tree_rec = 1; | ||
4254 | } | ||
4255 | |||
4256 | rec = &el->l_recs[index]; | ||
4257 | if (index == 0 && path->p_tree_depth && | ||
4258 | le32_to_cpu(rec->e_cpos) == cpos) { | ||
4259 | /* | ||
4260 | * Changing the leftmost offset (via partial or whole | ||
4261 | * record truncate) of an interior (or rightmost) path | ||
4262 | * means we have to update the subtree that is formed | ||
4263 | * by this leaf and the one to it's left. | ||
4264 | * | ||
4265 | * There are two cases we can skip: | ||
4266 | * 1) Path is the leftmost one in our inode tree. | ||
4267 | * 2) The leaf is rightmost and will be empty after | ||
4268 | * we remove the extent record - the rotate code | ||
4269 | * knows how to update the newly formed edge. | ||
4270 | */ | ||
4271 | |||
4272 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, | ||
4273 | &left_cpos); | ||
4274 | if (ret) { | ||
4275 | mlog_errno(ret); | ||
4276 | goto out; | ||
4277 | } | ||
4278 | |||
4279 | if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { | ||
4280 | left_path = ocfs2_new_path(path_root_bh(path), | ||
4281 | path_root_el(path)); | ||
4282 | if (!left_path) { | ||
4283 | ret = -ENOMEM; | ||
4284 | mlog_errno(ret); | ||
4285 | goto out; | ||
4286 | } | ||
4287 | |||
4288 | ret = ocfs2_find_path(inode, left_path, left_cpos); | ||
4289 | if (ret) { | ||
4290 | mlog_errno(ret); | ||
4291 | goto out; | ||
4292 | } | ||
4293 | } | ||
4294 | } | ||
4295 | |||
4296 | ret = ocfs2_extend_rotate_transaction(handle, 0, | ||
4297 | handle->h_buffer_credits, | ||
4298 | path); | ||
4299 | if (ret) { | ||
4300 | mlog_errno(ret); | ||
4301 | goto out; | ||
4302 | } | ||
4303 | |||
4304 | ret = ocfs2_journal_access_path(inode, handle, path); | ||
4305 | if (ret) { | ||
4306 | mlog_errno(ret); | ||
4307 | goto out; | ||
4308 | } | ||
4309 | |||
4310 | ret = ocfs2_journal_access_path(inode, handle, left_path); | ||
4311 | if (ret) { | ||
4312 | mlog_errno(ret); | ||
4313 | goto out; | ||
4314 | } | ||
4315 | |||
4316 | rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
4317 | trunc_range = cpos + len; | ||
4318 | |||
4319 | if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) { | ||
4320 | int next_free; | ||
4321 | |||
4322 | memset(rec, 0, sizeof(*rec)); | ||
4323 | ocfs2_cleanup_merge(el, index); | ||
4324 | wants_rotate = 1; | ||
4325 | |||
4326 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
4327 | if (is_rightmost_tree_rec && next_free > 1) { | ||
4328 | /* | ||
4329 | * We skip the edge update if this path will | ||
4330 | * be deleted by the rotate code. | ||
4331 | */ | ||
4332 | rec = &el->l_recs[next_free - 1]; | ||
4333 | ocfs2_adjust_rightmost_records(inode, handle, path, | ||
4334 | rec); | ||
4335 | } | ||
4336 | } else if (le32_to_cpu(rec->e_cpos) == cpos) { | ||
4337 | /* Remove leftmost portion of the record. */ | ||
4338 | le32_add_cpu(&rec->e_cpos, len); | ||
4339 | le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len)); | ||
4340 | le16_add_cpu(&rec->e_leaf_clusters, -len); | ||
4341 | } else if (rec_range == trunc_range) { | ||
4342 | /* Remove rightmost portion of the record */ | ||
4343 | le16_add_cpu(&rec->e_leaf_clusters, -len); | ||
4344 | if (is_rightmost_tree_rec) | ||
4345 | ocfs2_adjust_rightmost_records(inode, handle, path, rec); | ||
4346 | } else { | ||
4347 | /* Caller should have trapped this. */ | ||
4348 | mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " | ||
4349 | "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
4350 | le32_to_cpu(rec->e_cpos), | ||
4351 | le16_to_cpu(rec->e_leaf_clusters), cpos, len); | ||
4352 | BUG(); | ||
4353 | } | ||
4354 | |||
4355 | if (left_path) { | ||
4356 | int subtree_index; | ||
4357 | |||
4358 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | ||
4359 | ocfs2_complete_edge_insert(inode, handle, left_path, path, | ||
4360 | subtree_index); | ||
4361 | } | ||
4362 | |||
4363 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | ||
4364 | |||
4365 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | ||
4366 | if (ret) { | ||
4367 | mlog_errno(ret); | ||
4368 | goto out; | ||
4369 | } | ||
4370 | |||
4371 | out: | ||
4372 | ocfs2_free_path(left_path); | ||
4373 | return ret; | ||
4374 | } | ||
4375 | |||
4376 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
4377 | u32 cpos, u32 len, handle_t *handle, | ||
4378 | struct ocfs2_alloc_context *meta_ac, | ||
4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
4380 | { | ||
4381 | int ret, index; | ||
4382 | u32 rec_range, trunc_range; | ||
4383 | struct ocfs2_extent_rec *rec; | ||
4384 | struct ocfs2_extent_list *el; | ||
4385 | struct ocfs2_path *path; | ||
4386 | |||
4387 | ocfs2_extent_map_trunc(inode, 0); | ||
4388 | |||
4389 | path = ocfs2_new_inode_path(di_bh); | ||
4390 | if (!path) { | ||
4391 | ret = -ENOMEM; | ||
4392 | mlog_errno(ret); | ||
4393 | goto out; | ||
4394 | } | ||
4395 | |||
4396 | ret = ocfs2_find_path(inode, path, cpos); | ||
4397 | if (ret) { | ||
4398 | mlog_errno(ret); | ||
4399 | goto out; | ||
4400 | } | ||
4401 | |||
4402 | el = path_leaf_el(path); | ||
4403 | index = ocfs2_search_extent_list(el, cpos); | ||
4404 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
4405 | ocfs2_error(inode->i_sb, | ||
4406 | "Inode %llu has an extent at cpos %u which can no " | ||
4407 | "longer be found.\n", | ||
4408 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | ||
4409 | ret = -EROFS; | ||
4410 | goto out; | ||
4411 | } | ||
4412 | |||
4413 | /* | ||
4414 | * We have 3 cases of extent removal: | ||
4415 | * 1) Range covers the entire extent rec | ||
4416 | * 2) Range begins or ends on one edge of the extent rec | ||
4417 | * 3) Range is in the middle of the extent rec (no shared edges) | ||
4418 | * | ||
4419 | * For case 1 we remove the extent rec and left rotate to | ||
4420 | * fill the hole. | ||
4421 | * | ||
4422 | * For case 2 we just shrink the existing extent rec, with a | ||
4423 | * tree update if the shrinking edge is also the edge of an | ||
4424 | * extent block. | ||
4425 | * | ||
4426 | * For case 3 we do a right split to turn the extent rec into | ||
4427 | * something case 2 can handle. | ||
4428 | */ | ||
4429 | rec = &el->l_recs[index]; | ||
4430 | rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
4431 | trunc_range = cpos + len; | ||
4432 | |||
4433 | BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); | ||
4434 | |||
4435 | mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " | ||
4436 | "(cpos %u, len %u)\n", | ||
4437 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, | ||
4438 | le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); | ||
4439 | |||
4440 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | ||
4441 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | ||
4442 | cpos, len); | ||
4443 | if (ret) { | ||
4444 | mlog_errno(ret); | ||
4445 | goto out; | ||
4446 | } | ||
4447 | } else { | ||
4448 | ret = ocfs2_split_tree(inode, di_bh, handle, path, index, | ||
4449 | trunc_range, meta_ac); | ||
4450 | if (ret) { | ||
4451 | mlog_errno(ret); | ||
4452 | goto out; | ||
4453 | } | ||
4454 | |||
4455 | /* | ||
4456 | * The split could have manipulated the tree enough to | ||
4457 | * move the record location, so we have to look for it again. | ||
4458 | */ | ||
4459 | ocfs2_reinit_path(path, 1); | ||
4460 | |||
4461 | ret = ocfs2_find_path(inode, path, cpos); | ||
4462 | if (ret) { | ||
4463 | mlog_errno(ret); | ||
4464 | goto out; | ||
4465 | } | ||
4466 | |||
4467 | el = path_leaf_el(path); | ||
4468 | index = ocfs2_search_extent_list(el, cpos); | ||
4469 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
4470 | ocfs2_error(inode->i_sb, | ||
4471 | "Inode %llu: split at cpos %u lost record.", | ||
4472 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
4473 | cpos); | ||
4474 | ret = -EROFS; | ||
4475 | goto out; | ||
4476 | } | ||
4477 | |||
4478 | /* | ||
4479 | * Double check our values here. If anything is fishy, | ||
4480 | * it's easier to catch it at the top level. | ||
4481 | */ | ||
4482 | rec = &el->l_recs[index]; | ||
4483 | rec_range = le32_to_cpu(rec->e_cpos) + | ||
4484 | ocfs2_rec_clusters(el, rec); | ||
4485 | if (rec_range != trunc_range) { | ||
4486 | ocfs2_error(inode->i_sb, | ||
4487 | "Inode %llu: error after split at cpos %u" | ||
4488 | "trunc len %u, existing record is (%u,%u)", | ||
4489 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
4490 | cpos, len, le32_to_cpu(rec->e_cpos), | ||
4491 | ocfs2_rec_clusters(el, rec)); | ||
4492 | ret = -EROFS; | ||
4493 | goto out; | ||
4494 | } | ||
4495 | |||
4496 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | ||
4497 | cpos, len); | ||
4498 | if (ret) { | ||
4499 | mlog_errno(ret); | ||
4500 | goto out; | ||
4501 | } | ||
4502 | } | ||
4503 | |||
4504 | out: | ||
4505 | ocfs2_free_path(path); | ||
4506 | return ret; | ||
4507 | } | ||
4508 | |||
4509 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | ||
2435 | { | 4510 | { |
2436 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
2437 | struct ocfs2_dinode *di; | 4512 | struct ocfs2_dinode *di; |
@@ -2464,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, | |||
2464 | return current_tail == new_start; | 4539 | return current_tail == new_start; |
2465 | } | 4540 | } |
2466 | 4541 | ||
2467 | static int ocfs2_truncate_log_append(struct ocfs2_super *osb, | 4542 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, |
2468 | handle_t *handle, | 4543 | handle_t *handle, |
2469 | u64 start_blk, | 4544 | u64 start_blk, |
2470 | unsigned int num_clusters) | 4545 | unsigned int num_clusters) |
2471 | { | 4546 | { |
2472 | int status, index; | 4547 | int status, index; |
2473 | unsigned int start_cluster, tl_count; | 4548 | unsigned int start_cluster, tl_count; |
@@ -2623,7 +4698,7 @@ bail: | |||
2623 | } | 4698 | } |
2624 | 4699 | ||
2625 | /* Expects you to already be holding tl_inode->i_mutex */ | 4700 | /* Expects you to already be holding tl_inode->i_mutex */ |
2626 | static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | 4701 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) |
2627 | { | 4702 | { |
2628 | int status; | 4703 | int status; |
2629 | unsigned int num_to_flush; | 4704 | unsigned int num_to_flush; |
@@ -2957,6 +5032,219 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) | |||
2957 | return status; | 5032 | return status; |
2958 | } | 5033 | } |
2959 | 5034 | ||
5035 | /* | ||
5036 | * Delayed de-allocation of suballocator blocks. | ||
5037 | * | ||
5038 | * Some sets of block de-allocations might involve multiple suballocator inodes. | ||
5039 | * | ||
5040 | * The locking for this can get extremely complicated, especially when | ||
5041 | * the suballocator inodes to delete from aren't known until deep | ||
5042 | * within an unrelated codepath. | ||
5043 | * | ||
5044 | * ocfs2_extent_block structures are a good example of this - an inode | ||
5045 | * btree could have been grown by any number of nodes each allocating | ||
5046 | * out of their own suballoc inode. | ||
5047 | * | ||
5048 | * These structures allow the delay of block de-allocation until a | ||
5049 | * later time, when locking of multiple cluster inodes won't cause | ||
5050 | * deadlock. | ||
5051 | */ | ||
5052 | |||
5053 | /* | ||
5054 | * Describes a single block free from a suballocator | ||
5055 | */ | ||
5056 | struct ocfs2_cached_block_free { | ||
5057 | struct ocfs2_cached_block_free *free_next; | ||
5058 | u64 free_blk; | ||
5059 | unsigned int free_bit; | ||
5060 | }; | ||
5061 | |||
5062 | struct ocfs2_per_slot_free_list { | ||
5063 | struct ocfs2_per_slot_free_list *f_next_suballocator; | ||
5064 | int f_inode_type; | ||
5065 | int f_slot; | ||
5066 | struct ocfs2_cached_block_free *f_first; | ||
5067 | }; | ||
5068 | |||
5069 | static int ocfs2_free_cached_items(struct ocfs2_super *osb, | ||
5070 | int sysfile_type, | ||
5071 | int slot, | ||
5072 | struct ocfs2_cached_block_free *head) | ||
5073 | { | ||
5074 | int ret; | ||
5075 | u64 bg_blkno; | ||
5076 | handle_t *handle; | ||
5077 | struct inode *inode; | ||
5078 | struct buffer_head *di_bh = NULL; | ||
5079 | struct ocfs2_cached_block_free *tmp; | ||
5080 | |||
5081 | inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); | ||
5082 | if (!inode) { | ||
5083 | ret = -EINVAL; | ||
5084 | mlog_errno(ret); | ||
5085 | goto out; | ||
5086 | } | ||
5087 | |||
5088 | mutex_lock(&inode->i_mutex); | ||
5089 | |||
5090 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
5091 | if (ret) { | ||
5092 | mlog_errno(ret); | ||
5093 | goto out_mutex; | ||
5094 | } | ||
5095 | |||
5096 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
5097 | if (IS_ERR(handle)) { | ||
5098 | ret = PTR_ERR(handle); | ||
5099 | mlog_errno(ret); | ||
5100 | goto out_unlock; | ||
5101 | } | ||
5102 | |||
5103 | while (head) { | ||
5104 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, | ||
5105 | head->free_bit); | ||
5106 | mlog(0, "Free bit: (bit %u, blkno %llu)\n", | ||
5107 | head->free_bit, (unsigned long long)head->free_blk); | ||
5108 | |||
5109 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, | ||
5110 | head->free_bit, bg_blkno, 1); | ||
5111 | if (ret) { | ||
5112 | mlog_errno(ret); | ||
5113 | goto out_journal; | ||
5114 | } | ||
5115 | |||
5116 | ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); | ||
5117 | if (ret) { | ||
5118 | mlog_errno(ret); | ||
5119 | goto out_journal; | ||
5120 | } | ||
5121 | |||
5122 | tmp = head; | ||
5123 | head = head->free_next; | ||
5124 | kfree(tmp); | ||
5125 | } | ||
5126 | |||
5127 | out_journal: | ||
5128 | ocfs2_commit_trans(osb, handle); | ||
5129 | |||
5130 | out_unlock: | ||
5131 | ocfs2_meta_unlock(inode, 1); | ||
5132 | brelse(di_bh); | ||
5133 | out_mutex: | ||
5134 | mutex_unlock(&inode->i_mutex); | ||
5135 | iput(inode); | ||
5136 | out: | ||
5137 | while(head) { | ||
5138 | /* Premature exit may have left some dangling items. */ | ||
5139 | tmp = head; | ||
5140 | head = head->free_next; | ||
5141 | kfree(tmp); | ||
5142 | } | ||
5143 | |||
5144 | return ret; | ||
5145 | } | ||
5146 | |||
5147 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
5148 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
5149 | { | ||
5150 | int ret = 0, ret2; | ||
5151 | struct ocfs2_per_slot_free_list *fl; | ||
5152 | |||
5153 | if (!ctxt) | ||
5154 | return 0; | ||
5155 | |||
5156 | while (ctxt->c_first_suballocator) { | ||
5157 | fl = ctxt->c_first_suballocator; | ||
5158 | |||
5159 | if (fl->f_first) { | ||
5160 | mlog(0, "Free items: (type %u, slot %d)\n", | ||
5161 | fl->f_inode_type, fl->f_slot); | ||
5162 | ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, | ||
5163 | fl->f_slot, fl->f_first); | ||
5164 | if (ret2) | ||
5165 | mlog_errno(ret2); | ||
5166 | if (!ret) | ||
5167 | ret = ret2; | ||
5168 | } | ||
5169 | |||
5170 | ctxt->c_first_suballocator = fl->f_next_suballocator; | ||
5171 | kfree(fl); | ||
5172 | } | ||
5173 | |||
5174 | return ret; | ||
5175 | } | ||
5176 | |||
5177 | static struct ocfs2_per_slot_free_list * | ||
5178 | ocfs2_find_per_slot_free_list(int type, | ||
5179 | int slot, | ||
5180 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
5181 | { | ||
5182 | struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; | ||
5183 | |||
5184 | while (fl) { | ||
5185 | if (fl->f_inode_type == type && fl->f_slot == slot) | ||
5186 | return fl; | ||
5187 | |||
5188 | fl = fl->f_next_suballocator; | ||
5189 | } | ||
5190 | |||
5191 | fl = kmalloc(sizeof(*fl), GFP_NOFS); | ||
5192 | if (fl) { | ||
5193 | fl->f_inode_type = type; | ||
5194 | fl->f_slot = slot; | ||
5195 | fl->f_first = NULL; | ||
5196 | fl->f_next_suballocator = ctxt->c_first_suballocator; | ||
5197 | |||
5198 | ctxt->c_first_suballocator = fl; | ||
5199 | } | ||
5200 | return fl; | ||
5201 | } | ||
5202 | |||
5203 | static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
5204 | int type, int slot, u64 blkno, | ||
5205 | unsigned int bit) | ||
5206 | { | ||
5207 | int ret; | ||
5208 | struct ocfs2_per_slot_free_list *fl; | ||
5209 | struct ocfs2_cached_block_free *item; | ||
5210 | |||
5211 | fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); | ||
5212 | if (fl == NULL) { | ||
5213 | ret = -ENOMEM; | ||
5214 | mlog_errno(ret); | ||
5215 | goto out; | ||
5216 | } | ||
5217 | |||
5218 | item = kmalloc(sizeof(*item), GFP_NOFS); | ||
5219 | if (item == NULL) { | ||
5220 | ret = -ENOMEM; | ||
5221 | mlog_errno(ret); | ||
5222 | goto out; | ||
5223 | } | ||
5224 | |||
5225 | mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", | ||
5226 | type, slot, bit, (unsigned long long)blkno); | ||
5227 | |||
5228 | item->free_blk = blkno; | ||
5229 | item->free_bit = bit; | ||
5230 | item->free_next = fl->f_first; | ||
5231 | |||
5232 | fl->f_first = item; | ||
5233 | |||
5234 | ret = 0; | ||
5235 | out: | ||
5236 | return ret; | ||
5237 | } | ||
5238 | |||
5239 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
5240 | struct ocfs2_extent_block *eb) | ||
5241 | { | ||
5242 | return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, | ||
5243 | le16_to_cpu(eb->h_suballoc_slot), | ||
5244 | le64_to_cpu(eb->h_blkno), | ||
5245 | le16_to_cpu(eb->h_suballoc_bit)); | ||
5246 | } | ||
5247 | |||
2960 | /* This function will figure out whether the currently last extent | 5248 | /* This function will figure out whether the currently last extent |
2961 | * block will be deleted, and if it will, what the new last extent | 5249 | * block will be deleted, and if it will, what the new last extent |
2962 | * block will be so we can update his h_next_leaf_blk field, as well | 5250 | * block will be so we can update his h_next_leaf_blk field, as well |
@@ -3238,27 +5526,10 @@ delete: | |||
3238 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); | 5526 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); |
3239 | BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); | 5527 | BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); |
3240 | 5528 | ||
3241 | if (le16_to_cpu(eb->h_suballoc_slot) == 0) { | 5529 | ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); |
3242 | /* | 5530 | /* An error here is not fatal. */ |
3243 | * This code only understands how to | 5531 | if (ret < 0) |
3244 | * lock the suballocator in slot 0, | 5532 | mlog_errno(ret); |
3245 | * which is fine because allocation is | ||
3246 | * only ever done out of that | ||
3247 | * suballocator too. A future version | ||
3248 | * might change that however, so avoid | ||
3249 | * a free if we don't know how to | ||
3250 | * handle it. This way an fs incompat | ||
3251 | * bit will not be necessary. | ||
3252 | */ | ||
3253 | ret = ocfs2_free_extent_block(handle, | ||
3254 | tc->tc_ext_alloc_inode, | ||
3255 | tc->tc_ext_alloc_bh, | ||
3256 | eb); | ||
3257 | |||
3258 | /* An error here is not fatal. */ | ||
3259 | if (ret < 0) | ||
3260 | mlog_errno(ret); | ||
3261 | } | ||
3262 | } else { | 5533 | } else { |
3263 | deleted_eb = 0; | 5534 | deleted_eb = 0; |
3264 | } | 5535 | } |
@@ -3397,9 +5668,9 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | |||
3397 | return ocfs2_journal_dirty_data(handle, bh); | 5668 | return ocfs2_journal_dirty_data(handle, bh); |
3398 | } | 5669 | } |
3399 | 5670 | ||
3400 | static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | 5671 | static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, |
3401 | struct page **pages, int numpages, | 5672 | loff_t end, struct page **pages, |
3402 | u64 phys, handle_t *handle) | 5673 | int numpages, u64 phys, handle_t *handle) |
3403 | { | 5674 | { |
3404 | int i, ret, partial = 0; | 5675 | int i, ret, partial = 0; |
3405 | void *kaddr; | 5676 | void *kaddr; |
@@ -3412,26 +5683,14 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | |||
3412 | if (numpages == 0) | 5683 | if (numpages == 0) |
3413 | goto out; | 5684 | goto out; |
3414 | 5685 | ||
3415 | from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */ | 5686 | to = PAGE_CACHE_SIZE; |
3416 | if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) { | ||
3417 | /* | ||
3418 | * Since 'from' has been capped to a value below page | ||
3419 | * size, this calculation won't be able to overflow | ||
3420 | * 'to' | ||
3421 | */ | ||
3422 | to = ocfs2_align_bytes_to_clusters(sb, from); | ||
3423 | |||
3424 | /* | ||
3425 | * The truncate tail in this case should never contain | ||
3426 | * more than one page at maximum. The loop below also | ||
3427 | * assumes this. | ||
3428 | */ | ||
3429 | BUG_ON(numpages != 1); | ||
3430 | } | ||
3431 | |||
3432 | for(i = 0; i < numpages; i++) { | 5687 | for(i = 0; i < numpages; i++) { |
3433 | page = pages[i]; | 5688 | page = pages[i]; |
3434 | 5689 | ||
5690 | from = start & (PAGE_CACHE_SIZE - 1); | ||
5691 | if ((end >> PAGE_CACHE_SHIFT) == page->index) | ||
5692 | to = end & (PAGE_CACHE_SIZE - 1); | ||
5693 | |||
3435 | BUG_ON(from > PAGE_CACHE_SIZE); | 5694 | BUG_ON(from > PAGE_CACHE_SIZE); |
3436 | BUG_ON(to > PAGE_CACHE_SIZE); | 5695 | BUG_ON(to > PAGE_CACHE_SIZE); |
3437 | 5696 | ||
@@ -3468,10 +5727,7 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | |||
3468 | 5727 | ||
3469 | flush_dcache_page(page); | 5728 | flush_dcache_page(page); |
3470 | 5729 | ||
3471 | /* | 5730 | start = (page->index + 1) << PAGE_CACHE_SHIFT; |
3472 | * Every page after the 1st one should be completely zero'd. | ||
3473 | */ | ||
3474 | from = 0; | ||
3475 | } | 5731 | } |
3476 | out: | 5732 | out: |
3477 | if (pages) { | 5733 | if (pages) { |
@@ -3484,24 +5740,26 @@ out: | |||
3484 | } | 5740 | } |
3485 | } | 5741 | } |
3486 | 5742 | ||
3487 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages, | 5743 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, |
3488 | int *num, u64 *phys) | 5744 | struct page **pages, int *num, u64 *phys) |
3489 | { | 5745 | { |
3490 | int i, numpages = 0, ret = 0; | 5746 | int i, numpages = 0, ret = 0; |
3491 | unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize; | ||
3492 | unsigned int ext_flags; | 5747 | unsigned int ext_flags; |
3493 | struct super_block *sb = inode->i_sb; | 5748 | struct super_block *sb = inode->i_sb; |
3494 | struct address_space *mapping = inode->i_mapping; | 5749 | struct address_space *mapping = inode->i_mapping; |
3495 | unsigned long index; | 5750 | unsigned long index; |
3496 | u64 next_cluster_bytes; | 5751 | loff_t last_page_bytes; |
3497 | 5752 | ||
3498 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); | 5753 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); |
5754 | BUG_ON(start > end); | ||
3499 | 5755 | ||
3500 | /* Cluster boundary, so we don't need to grab any pages. */ | 5756 | if (start == end) |
3501 | if ((isize & (csize - 1)) == 0) | ||
3502 | goto out; | 5757 | goto out; |
3503 | 5758 | ||
3504 | ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits, | 5759 | BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != |
5760 | (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); | ||
5761 | |||
5762 | ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits, | ||
3505 | phys, NULL, &ext_flags); | 5763 | phys, NULL, &ext_flags); |
3506 | if (ret) { | 5764 | if (ret) { |
3507 | mlog_errno(ret); | 5765 | mlog_errno(ret); |
@@ -3517,8 +5775,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page * | |||
3517 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | 5775 | if (ext_flags & OCFS2_EXT_UNWRITTEN) |
3518 | goto out; | 5776 | goto out; |
3519 | 5777 | ||
3520 | next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize); | 5778 | last_page_bytes = PAGE_ALIGN(end); |
3521 | index = isize >> PAGE_CACHE_SHIFT; | 5779 | index = start >> PAGE_CACHE_SHIFT; |
3522 | do { | 5780 | do { |
3523 | pages[numpages] = grab_cache_page(mapping, index); | 5781 | pages[numpages] = grab_cache_page(mapping, index); |
3524 | if (!pages[numpages]) { | 5782 | if (!pages[numpages]) { |
@@ -3529,7 +5787,7 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page * | |||
3529 | 5787 | ||
3530 | numpages++; | 5788 | numpages++; |
3531 | index++; | 5789 | index++; |
3532 | } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT)); | 5790 | } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT)); |
3533 | 5791 | ||
3534 | out: | 5792 | out: |
3535 | if (ret != 0) { | 5793 | if (ret != 0) { |
@@ -3558,11 +5816,10 @@ out: | |||
3558 | * otherwise block_write_full_page() will skip writeout of pages past | 5816 | * otherwise block_write_full_page() will skip writeout of pages past |
3559 | * i_size. The new_i_size parameter is passed for this reason. | 5817 | * i_size. The new_i_size parameter is passed for this reason. |
3560 | */ | 5818 | */ |
3561 | int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | 5819 | int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, |
3562 | u64 new_i_size) | 5820 | u64 range_start, u64 range_end) |
3563 | { | 5821 | { |
3564 | int ret, numpages; | 5822 | int ret, numpages; |
3565 | loff_t endbyte; | ||
3566 | struct page **pages = NULL; | 5823 | struct page **pages = NULL; |
3567 | u64 phys; | 5824 | u64 phys; |
3568 | 5825 | ||
@@ -3581,7 +5838,8 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | |||
3581 | goto out; | 5838 | goto out; |
3582 | } | 5839 | } |
3583 | 5840 | ||
3584 | ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys); | 5841 | ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, |
5842 | &numpages, &phys); | ||
3585 | if (ret) { | 5843 | if (ret) { |
3586 | mlog_errno(ret); | 5844 | mlog_errno(ret); |
3587 | goto out; | 5845 | goto out; |
@@ -3590,17 +5848,16 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | |||
3590 | if (numpages == 0) | 5848 | if (numpages == 0) |
3591 | goto out; | 5849 | goto out; |
3592 | 5850 | ||
3593 | ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys, | 5851 | ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, |
3594 | handle); | 5852 | numpages, phys, handle); |
3595 | 5853 | ||
3596 | /* | 5854 | /* |
3597 | * Initiate writeout of the pages we zero'd here. We don't | 5855 | * Initiate writeout of the pages we zero'd here. We don't |
3598 | * wait on them - the truncate_inode_pages() call later will | 5856 | * wait on them - the truncate_inode_pages() call later will |
3599 | * do that for us. | 5857 | * do that for us. |
3600 | */ | 5858 | */ |
3601 | endbyte = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); | 5859 | ret = do_sync_mapping_range(inode->i_mapping, range_start, |
3602 | ret = do_sync_mapping_range(inode->i_mapping, new_i_size, | 5860 | range_end - 1, SYNC_FILE_RANGE_WRITE); |
3603 | endbyte - 1, SYNC_FILE_RANGE_WRITE); | ||
3604 | if (ret) | 5861 | if (ret) |
3605 | mlog_errno(ret); | 5862 | mlog_errno(ret); |
3606 | 5863 | ||
@@ -3631,8 +5888,6 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
3631 | 5888 | ||
3632 | mlog_entry_void(); | 5889 | mlog_entry_void(); |
3633 | 5890 | ||
3634 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
3635 | |||
3636 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, | 5891 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, |
3637 | i_size_read(inode)); | 5892 | i_size_read(inode)); |
3638 | 5893 | ||
@@ -3754,7 +6009,6 @@ start: | |||
3754 | goto start; | 6009 | goto start; |
3755 | 6010 | ||
3756 | bail: | 6011 | bail: |
3757 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
3758 | 6012 | ||
3759 | ocfs2_schedule_truncate_log_flush(osb, 1); | 6013 | ocfs2_schedule_truncate_log_flush(osb, 1); |
3760 | 6014 | ||
@@ -3764,6 +6018,8 @@ bail: | |||
3764 | if (handle) | 6018 | if (handle) |
3765 | ocfs2_commit_trans(osb, handle); | 6019 | ocfs2_commit_trans(osb, handle); |
3766 | 6020 | ||
6021 | ocfs2_run_deallocs(osb, &tc->tc_dealloc); | ||
6022 | |||
3767 | ocfs2_free_path(path); | 6023 | ocfs2_free_path(path); |
3768 | 6024 | ||
3769 | /* This will drop the ext_alloc cluster lock for us */ | 6025 | /* This will drop the ext_alloc cluster lock for us */ |
@@ -3774,23 +6030,18 @@ bail: | |||
3774 | } | 6030 | } |
3775 | 6031 | ||
3776 | /* | 6032 | /* |
3777 | * Expects the inode to already be locked. This will figure out which | 6033 | * Expects the inode to already be locked. |
3778 | * inodes need to be locked and will put them on the returned truncate | ||
3779 | * context. | ||
3780 | */ | 6034 | */ |
3781 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, | 6035 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, |
3782 | struct inode *inode, | 6036 | struct inode *inode, |
3783 | struct buffer_head *fe_bh, | 6037 | struct buffer_head *fe_bh, |
3784 | struct ocfs2_truncate_context **tc) | 6038 | struct ocfs2_truncate_context **tc) |
3785 | { | 6039 | { |
3786 | int status, metadata_delete, i; | 6040 | int status; |
3787 | unsigned int new_i_clusters; | 6041 | unsigned int new_i_clusters; |
3788 | struct ocfs2_dinode *fe; | 6042 | struct ocfs2_dinode *fe; |
3789 | struct ocfs2_extent_block *eb; | 6043 | struct ocfs2_extent_block *eb; |
3790 | struct ocfs2_extent_list *el; | ||
3791 | struct buffer_head *last_eb_bh = NULL; | 6044 | struct buffer_head *last_eb_bh = NULL; |
3792 | struct inode *ext_alloc_inode = NULL; | ||
3793 | struct buffer_head *ext_alloc_bh = NULL; | ||
3794 | 6045 | ||
3795 | mlog_entry_void(); | 6046 | mlog_entry_void(); |
3796 | 6047 | ||
@@ -3810,12 +6061,9 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
3810 | mlog_errno(status); | 6061 | mlog_errno(status); |
3811 | goto bail; | 6062 | goto bail; |
3812 | } | 6063 | } |
6064 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | ||
3813 | 6065 | ||
3814 | metadata_delete = 0; | ||
3815 | if (fe->id2.i_list.l_tree_depth) { | 6066 | if (fe->id2.i_list.l_tree_depth) { |
3816 | /* If we have a tree, then the truncate may result in | ||
3817 | * metadata deletes. Figure this out from the | ||
3818 | * rightmost leaf block.*/ | ||
3819 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 6067 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), |
3820 | &last_eb_bh, OCFS2_BH_CACHED, inode); | 6068 | &last_eb_bh, OCFS2_BH_CACHED, inode); |
3821 | if (status < 0) { | 6069 | if (status < 0) { |
@@ -3830,43 +6078,10 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
3830 | status = -EIO; | 6078 | status = -EIO; |
3831 | goto bail; | 6079 | goto bail; |
3832 | } | 6080 | } |
3833 | el = &(eb->h_list); | ||
3834 | |||
3835 | i = 0; | ||
3836 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
3837 | i = 1; | ||
3838 | /* | ||
3839 | * XXX: Should we check that next_free_rec contains | ||
3840 | * the extent? | ||
3841 | */ | ||
3842 | if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) | ||
3843 | metadata_delete = 1; | ||
3844 | } | 6081 | } |
3845 | 6082 | ||
3846 | (*tc)->tc_last_eb_bh = last_eb_bh; | 6083 | (*tc)->tc_last_eb_bh = last_eb_bh; |
3847 | 6084 | ||
3848 | if (metadata_delete) { | ||
3849 | mlog(0, "Will have to delete metadata for this trunc. " | ||
3850 | "locking allocator.\n"); | ||
3851 | ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); | ||
3852 | if (!ext_alloc_inode) { | ||
3853 | status = -ENOMEM; | ||
3854 | mlog_errno(status); | ||
3855 | goto bail; | ||
3856 | } | ||
3857 | |||
3858 | mutex_lock(&ext_alloc_inode->i_mutex); | ||
3859 | (*tc)->tc_ext_alloc_inode = ext_alloc_inode; | ||
3860 | |||
3861 | status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); | ||
3862 | if (status < 0) { | ||
3863 | mlog_errno(status); | ||
3864 | goto bail; | ||
3865 | } | ||
3866 | (*tc)->tc_ext_alloc_bh = ext_alloc_bh; | ||
3867 | (*tc)->tc_ext_alloc_locked = 1; | ||
3868 | } | ||
3869 | |||
3870 | status = 0; | 6085 | status = 0; |
3871 | bail: | 6086 | bail: |
3872 | if (status < 0) { | 6087 | if (status < 0) { |
@@ -3880,16 +6095,13 @@ bail: | |||
3880 | 6095 | ||
3881 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) | 6096 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) |
3882 | { | 6097 | { |
3883 | if (tc->tc_ext_alloc_inode) { | 6098 | /* |
3884 | if (tc->tc_ext_alloc_locked) | 6099 | * The caller is responsible for completing deallocation |
3885 | ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); | 6100 | * before freeing the context. |
3886 | 6101 | */ | |
3887 | mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); | 6102 | if (tc->tc_dealloc.c_first_suballocator != NULL) |
3888 | iput(tc->tc_ext_alloc_inode); | 6103 | mlog(ML_NOTICE, |
3889 | } | 6104 | "Truncate completion has non-empty dealloc context\n"); |
3890 | |||
3891 | if (tc->tc_ext_alloc_bh) | ||
3892 | brelse(tc->tc_ext_alloc_bh); | ||
3893 | 6105 | ||
3894 | if (tc->tc_last_eb_bh) | 6106 | if (tc->tc_last_eb_bh) |
3895 | brelse(tc->tc_last_eb_bh); | 6107 | brelse(tc->tc_last_eb_bh); |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index fbcb5934a081..990df48ae8d3 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -34,7 +34,17 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
34 | u32 cpos, | 34 | u32 cpos, |
35 | u64 start_blk, | 35 | u64 start_blk, |
36 | u32 new_clusters, | 36 | u32 new_clusters, |
37 | u8 flags, | ||
37 | struct ocfs2_alloc_context *meta_ac); | 38 | struct ocfs2_alloc_context *meta_ac); |
39 | struct ocfs2_cached_dealloc_ctxt; | ||
40 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | ||
41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | ||
42 | struct ocfs2_alloc_context *meta_ac, | ||
43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
45 | u32 cpos, u32 len, handle_t *handle, | ||
46 | struct ocfs2_alloc_context *meta_ac, | ||
47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
38 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
39 | struct inode *inode, | 49 | struct inode *inode, |
40 | struct ocfs2_dinode *fe); | 50 | struct ocfs2_dinode *fe); |
@@ -62,17 +72,41 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
62 | struct ocfs2_dinode **tl_copy); | 72 | struct ocfs2_dinode **tl_copy); |
63 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | 73 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, |
64 | struct ocfs2_dinode *tl_copy); | 74 | struct ocfs2_dinode *tl_copy); |
75 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); | ||
76 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||
77 | handle_t *handle, | ||
78 | u64 start_blk, | ||
79 | unsigned int num_clusters); | ||
80 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); | ||
81 | |||
82 | /* | ||
83 | * Process local structure which describes the block unlinks done | ||
84 | * during an operation. This is populated via | ||
85 | * ocfs2_cache_block_dealloc(). | ||
86 | * | ||
87 | * ocfs2_run_deallocs() should be called after the potentially | ||
88 | * de-allocating routines. No journal handles should be open, and most | ||
89 | * locks should have been dropped. | ||
90 | */ | ||
91 | struct ocfs2_cached_dealloc_ctxt { | ||
92 | struct ocfs2_per_slot_free_list *c_first_suballocator; | ||
93 | }; | ||
94 | static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) | ||
95 | { | ||
96 | c->c_first_suballocator = NULL; | ||
97 | } | ||
98 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
99 | struct ocfs2_cached_dealloc_ctxt *ctxt); | ||
65 | 100 | ||
66 | struct ocfs2_truncate_context { | 101 | struct ocfs2_truncate_context { |
67 | struct inode *tc_ext_alloc_inode; | 102 | struct ocfs2_cached_dealloc_ctxt tc_dealloc; |
68 | struct buffer_head *tc_ext_alloc_bh; | ||
69 | int tc_ext_alloc_locked; /* is it cluster locked? */ | 103 | int tc_ext_alloc_locked; /* is it cluster locked? */ |
70 | /* these get destroyed once it's passed to ocfs2_commit_truncate. */ | 104 | /* these get destroyed once it's passed to ocfs2_commit_truncate. */ |
71 | struct buffer_head *tc_last_eb_bh; | 105 | struct buffer_head *tc_last_eb_bh; |
72 | }; | 106 | }; |
73 | 107 | ||
74 | int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | 108 | int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, |
75 | u64 new_i_size); | 109 | u64 range_start, u64 range_end); |
76 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, | 110 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, |
77 | struct inode *inode, | 111 | struct inode *inode, |
78 | struct buffer_head *fe_bh, | 112 | struct buffer_head *fe_bh, |
@@ -84,6 +118,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
84 | 118 | ||
85 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, | 119 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, |
86 | u32 cpos, struct buffer_head **leaf_bh); | 120 | u32 cpos, struct buffer_head **leaf_bh); |
121 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); | ||
87 | 122 | ||
88 | /* | 123 | /* |
89 | * Helper function to look at the # of clusters in an extent record. | 124 | * Helper function to look at the # of clusters in an extent record. |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index a480b09c79b9..84bf6e79de23 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -684,6 +684,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
684 | bh = bh->b_this_page, block_start += bsize) { | 684 | bh = bh->b_this_page, block_start += bsize) { |
685 | block_end = block_start + bsize; | 685 | block_end = block_start + bsize; |
686 | 686 | ||
687 | clear_buffer_new(bh); | ||
688 | |||
687 | /* | 689 | /* |
688 | * Ignore blocks outside of our i/o range - | 690 | * Ignore blocks outside of our i/o range - |
689 | * they may belong to unallocated clusters. | 691 | * they may belong to unallocated clusters. |
@@ -698,9 +700,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
698 | * For an allocating write with cluster size >= page | 700 | * For an allocating write with cluster size >= page |
699 | * size, we always write the entire page. | 701 | * size, we always write the entire page. |
700 | */ | 702 | */ |
701 | 703 | if (new) | |
702 | if (buffer_new(bh)) | 704 | set_buffer_new(bh); |
703 | clear_buffer_new(bh); | ||
704 | 705 | ||
705 | if (!buffer_mapped(bh)) { | 706 | if (!buffer_mapped(bh)) { |
706 | map_bh(bh, inode->i_sb, *p_blkno); | 707 | map_bh(bh, inode->i_sb, *p_blkno); |
@@ -711,7 +712,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
711 | if (!buffer_uptodate(bh)) | 712 | if (!buffer_uptodate(bh)) |
712 | set_buffer_uptodate(bh); | 713 | set_buffer_uptodate(bh); |
713 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && | 714 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && |
714 | (block_start < from || block_end > to)) { | 715 | !buffer_new(bh) && |
716 | (block_start < from || block_end > to)) { | ||
715 | ll_rw_block(READ, 1, &bh); | 717 | ll_rw_block(READ, 1, &bh); |
716 | *wait_bh++=bh; | 718 | *wait_bh++=bh; |
717 | } | 719 | } |
@@ -738,18 +740,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
738 | bh = head; | 740 | bh = head; |
739 | block_start = 0; | 741 | block_start = 0; |
740 | do { | 742 | do { |
741 | void *kaddr; | ||
742 | |||
743 | block_end = block_start + bsize; | 743 | block_end = block_start + bsize; |
744 | if (block_end <= from) | 744 | if (block_end <= from) |
745 | goto next_bh; | 745 | goto next_bh; |
746 | if (block_start >= to) | 746 | if (block_start >= to) |
747 | break; | 747 | break; |
748 | 748 | ||
749 | kaddr = kmap_atomic(page, KM_USER0); | 749 | zero_user_page(page, block_start, bh->b_size, KM_USER0); |
750 | memset(kaddr+block_start, 0, bh->b_size); | ||
751 | flush_dcache_page(page); | ||
752 | kunmap_atomic(kaddr, KM_USER0); | ||
753 | set_buffer_uptodate(bh); | 750 | set_buffer_uptodate(bh); |
754 | mark_buffer_dirty(bh); | 751 | mark_buffer_dirty(bh); |
755 | 752 | ||
@@ -761,217 +758,240 @@ next_bh: | |||
761 | return ret; | 758 | return ret; |
762 | } | 759 | } |
763 | 760 | ||
761 | #if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE) | ||
762 | #define OCFS2_MAX_CTXT_PAGES 1 | ||
763 | #else | ||
764 | #define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE) | ||
765 | #endif | ||
766 | |||
767 | #define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) | ||
768 | |||
764 | /* | 769 | /* |
765 | * This will copy user data from the buffer page in the splice | 770 | * Describe the state of a single cluster to be written to. |
766 | * context. | ||
767 | * | ||
768 | * For now, we ignore SPLICE_F_MOVE as that would require some extra | ||
769 | * communication out all the way to ocfs2_write(). | ||
770 | */ | 771 | */ |
771 | int ocfs2_map_and_write_splice_data(struct inode *inode, | 772 | struct ocfs2_write_cluster_desc { |
772 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | 773 | u32 c_cpos; |
773 | unsigned int *ret_from, unsigned int *ret_to) | 774 | u32 c_phys; |
775 | /* | ||
776 | * Give this a unique field because c_phys eventually gets | ||
777 | * filled. | ||
778 | */ | ||
779 | unsigned c_new; | ||
780 | unsigned c_unwritten; | ||
781 | }; | ||
782 | |||
783 | static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) | ||
774 | { | 784 | { |
775 | int ret; | 785 | return d->c_new || d->c_unwritten; |
776 | unsigned int to, from, cluster_start, cluster_end; | 786 | } |
777 | char *src, *dst; | ||
778 | struct ocfs2_splice_write_priv *sp = wc->w_private; | ||
779 | struct pipe_buffer *buf = sp->s_buf; | ||
780 | unsigned long bytes, src_from; | ||
781 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
782 | 787 | ||
783 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 788 | struct ocfs2_write_ctxt { |
784 | &cluster_end); | 789 | /* Logical cluster position / len of write */ |
790 | u32 w_cpos; | ||
791 | u32 w_clen; | ||
785 | 792 | ||
786 | from = sp->s_offset; | 793 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; |
787 | src_from = sp->s_buf_offset; | ||
788 | bytes = wc->w_count; | ||
789 | 794 | ||
790 | if (wc->w_large_pages) { | 795 | /* |
791 | /* | 796 | * This is true if page_size > cluster_size. |
792 | * For cluster size < page size, we have to | 797 | * |
793 | * calculate pos within the cluster and obey | 798 | * It triggers a set of special cases during write which might |
794 | * the rightmost boundary. | 799 | * have to deal with allocating writes to partial pages. |
795 | */ | 800 | */ |
796 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | 801 | unsigned int w_large_pages; |
797 | - (wc->w_pos & (osb->s_clustersize - 1)))); | 802 | |
798 | } | 803 | /* |
799 | to = from + bytes; | 804 | * Pages involved in this write. |
805 | * | ||
806 | * w_target_page is the page being written to by the user. | ||
807 | * | ||
808 | * w_pages is an array of pages which always contains | ||
809 | * w_target_page, and in the case of an allocating write with | ||
810 | * page_size < cluster size, it will contain zero'd and mapped | ||
811 | * pages adjacent to w_target_page which need to be written | ||
812 | * out in so that future reads from that region will get | ||
813 | * zero's. | ||
814 | */ | ||
815 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
816 | unsigned int w_num_pages; | ||
817 | struct page *w_target_page; | ||
800 | 818 | ||
801 | BUG_ON(from > PAGE_CACHE_SIZE); | 819 | /* |
802 | BUG_ON(to > PAGE_CACHE_SIZE); | 820 | * ocfs2_write_end() uses this to know what the real range to |
803 | BUG_ON(from < cluster_start); | 821 | * write in the target should be. |
804 | BUG_ON(to > cluster_end); | 822 | */ |
823 | unsigned int w_target_from; | ||
824 | unsigned int w_target_to; | ||
805 | 825 | ||
806 | if (wc->w_this_page_new) | 826 | /* |
807 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 827 | * We could use journal_current_handle() but this is cleaner, |
808 | cluster_start, cluster_end, 1); | 828 | * IMHO -Mark |
809 | else | 829 | */ |
810 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 830 | handle_t *w_handle; |
811 | from, to, 0); | 831 | |
812 | if (ret) { | 832 | struct buffer_head *w_di_bh; |
813 | mlog_errno(ret); | 833 | |
814 | goto out; | 834 | struct ocfs2_cached_dealloc_ctxt w_dealloc; |
835 | }; | ||
836 | |||
837 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | ||
838 | { | ||
839 | int i; | ||
840 | |||
841 | for(i = 0; i < wc->w_num_pages; i++) { | ||
842 | if (wc->w_pages[i] == NULL) | ||
843 | continue; | ||
844 | |||
845 | unlock_page(wc->w_pages[i]); | ||
846 | mark_page_accessed(wc->w_pages[i]); | ||
847 | page_cache_release(wc->w_pages[i]); | ||
815 | } | 848 | } |
816 | 849 | ||
817 | src = buf->ops->map(sp->s_pipe, buf, 1); | 850 | brelse(wc->w_di_bh); |
818 | dst = kmap_atomic(wc->w_this_page, KM_USER1); | 851 | kfree(wc); |
819 | memcpy(dst + from, src + src_from, bytes); | 852 | } |
820 | kunmap_atomic(wc->w_this_page, KM_USER1); | 853 | |
821 | buf->ops->unmap(sp->s_pipe, buf, src); | 854 | static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, |
855 | struct ocfs2_super *osb, loff_t pos, | ||
856 | unsigned len, struct buffer_head *di_bh) | ||
857 | { | ||
858 | struct ocfs2_write_ctxt *wc; | ||
859 | |||
860 | wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); | ||
861 | if (!wc) | ||
862 | return -ENOMEM; | ||
822 | 863 | ||
823 | wc->w_finished_copy = 1; | 864 | wc->w_cpos = pos >> osb->s_clustersize_bits; |
865 | wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); | ||
866 | get_bh(di_bh); | ||
867 | wc->w_di_bh = di_bh; | ||
824 | 868 | ||
825 | *ret_from = from; | 869 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) |
826 | *ret_to = to; | 870 | wc->w_large_pages = 1; |
827 | out: | 871 | else |
872 | wc->w_large_pages = 0; | ||
873 | |||
874 | ocfs2_init_dealloc_ctxt(&wc->w_dealloc); | ||
875 | |||
876 | *wcp = wc; | ||
828 | 877 | ||
829 | return bytes ? (unsigned int)bytes : ret; | 878 | return 0; |
830 | } | 879 | } |
831 | 880 | ||
832 | /* | 881 | /* |
833 | * This will copy user data from the iovec in the buffered write | 882 | * If a page has any new buffers, zero them out here, and mark them uptodate |
834 | * context. | 883 | * and dirty so they'll be written out (in order to prevent uninitialised |
884 | * block data from leaking). And clear the new bit. | ||
835 | */ | 885 | */ |
836 | int ocfs2_map_and_write_user_data(struct inode *inode, | 886 | static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) |
837 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | ||
838 | unsigned int *ret_from, unsigned int *ret_to) | ||
839 | { | 887 | { |
840 | int ret; | 888 | unsigned int block_start, block_end; |
841 | unsigned int to, from, cluster_start, cluster_end; | 889 | struct buffer_head *head, *bh; |
842 | unsigned long bytes, src_from; | ||
843 | char *dst; | ||
844 | struct ocfs2_buffered_write_priv *bp = wc->w_private; | ||
845 | const struct iovec *cur_iov = bp->b_cur_iov; | ||
846 | char __user *buf; | ||
847 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
848 | 890 | ||
849 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 891 | BUG_ON(!PageLocked(page)); |
850 | &cluster_end); | 892 | if (!page_has_buffers(page)) |
893 | return; | ||
851 | 894 | ||
852 | buf = cur_iov->iov_base + bp->b_cur_off; | 895 | bh = head = page_buffers(page); |
853 | src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; | 896 | block_start = 0; |
897 | do { | ||
898 | block_end = block_start + bh->b_size; | ||
854 | 899 | ||
855 | from = wc->w_pos & (PAGE_CACHE_SIZE - 1); | 900 | if (buffer_new(bh)) { |
901 | if (block_end > from && block_start < to) { | ||
902 | if (!PageUptodate(page)) { | ||
903 | unsigned start, end; | ||
856 | 904 | ||
857 | /* | 905 | start = max(from, block_start); |
858 | * This is a lot of comparisons, but it reads quite | 906 | end = min(to, block_end); |
859 | * easily, which is important here. | ||
860 | */ | ||
861 | /* Stay within the src page */ | ||
862 | bytes = PAGE_SIZE - src_from; | ||
863 | /* Stay within the vector */ | ||
864 | bytes = min(bytes, | ||
865 | (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); | ||
866 | /* Stay within count */ | ||
867 | bytes = min(bytes, (unsigned long)wc->w_count); | ||
868 | /* | ||
869 | * For clustersize > page size, just stay within | ||
870 | * target page, otherwise we have to calculate pos | ||
871 | * within the cluster and obey the rightmost | ||
872 | * boundary. | ||
873 | */ | ||
874 | if (wc->w_large_pages) { | ||
875 | /* | ||
876 | * For cluster size < page size, we have to | ||
877 | * calculate pos within the cluster and obey | ||
878 | * the rightmost boundary. | ||
879 | */ | ||
880 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | ||
881 | - (wc->w_pos & (osb->s_clustersize - 1)))); | ||
882 | } else { | ||
883 | /* | ||
884 | * cluster size > page size is the most common | ||
885 | * case - we just stay within the target page | ||
886 | * boundary. | ||
887 | */ | ||
888 | bytes = min(bytes, PAGE_CACHE_SIZE - from); | ||
889 | } | ||
890 | 907 | ||
891 | to = from + bytes; | 908 | zero_user_page(page, start, end - start, KM_USER0); |
909 | set_buffer_uptodate(bh); | ||
910 | } | ||
892 | 911 | ||
893 | BUG_ON(from > PAGE_CACHE_SIZE); | 912 | clear_buffer_new(bh); |
894 | BUG_ON(to > PAGE_CACHE_SIZE); | 913 | mark_buffer_dirty(bh); |
895 | BUG_ON(from < cluster_start); | 914 | } |
896 | BUG_ON(to > cluster_end); | 915 | } |
897 | 916 | ||
898 | if (wc->w_this_page_new) | 917 | block_start = block_end; |
899 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 918 | bh = bh->b_this_page; |
900 | cluster_start, cluster_end, 1); | 919 | } while (bh != head); |
901 | else | 920 | } |
902 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | ||
903 | from, to, 0); | ||
904 | if (ret) { | ||
905 | mlog_errno(ret); | ||
906 | goto out; | ||
907 | } | ||
908 | 921 | ||
909 | dst = kmap(wc->w_this_page); | 922 | /* |
910 | memcpy(dst + from, bp->b_src_buf + src_from, bytes); | 923 | * Only called when we have a failure during allocating write to write |
911 | kunmap(wc->w_this_page); | 924 | * zero's to the newly allocated region. |
925 | */ | ||
926 | static void ocfs2_write_failure(struct inode *inode, | ||
927 | struct ocfs2_write_ctxt *wc, | ||
928 | loff_t user_pos, unsigned user_len) | ||
929 | { | ||
930 | int i; | ||
931 | unsigned from, to; | ||
932 | struct page *tmppage; | ||
912 | 933 | ||
913 | /* | 934 | ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); |
914 | * XXX: This is slow, but simple. The caller of | ||
915 | * ocfs2_buffered_write_cluster() is responsible for | ||
916 | * passing through the iovecs, so it's difficult to | ||
917 | * predict what our next step is in here after our | ||
918 | * initial write. A future version should be pushing | ||
919 | * that iovec manipulation further down. | ||
920 | * | ||
921 | * By setting this, we indicate that a copy from user | ||
922 | * data was done, and subsequent calls for this | ||
923 | * cluster will skip copying more data. | ||
924 | */ | ||
925 | wc->w_finished_copy = 1; | ||
926 | 935 | ||
927 | *ret_from = from; | 936 | if (wc->w_large_pages) { |
928 | *ret_to = to; | 937 | from = wc->w_target_from; |
929 | out: | 938 | to = wc->w_target_to; |
939 | } else { | ||
940 | from = 0; | ||
941 | to = PAGE_CACHE_SIZE; | ||
942 | } | ||
943 | |||
944 | for(i = 0; i < wc->w_num_pages; i++) { | ||
945 | tmppage = wc->w_pages[i]; | ||
930 | 946 | ||
931 | return bytes ? (unsigned int)bytes : ret; | 947 | if (ocfs2_should_order_data(inode)) |
948 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | ||
949 | from, to, NULL, | ||
950 | ocfs2_journal_dirty_data); | ||
951 | |||
952 | block_commit_write(tmppage, from, to); | ||
953 | } | ||
932 | } | 954 | } |
933 | 955 | ||
934 | /* | 956 | static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, |
935 | * Map, fill and write a page to disk. | 957 | struct ocfs2_write_ctxt *wc, |
936 | * | 958 | struct page *page, u32 cpos, |
937 | * The work of copying data is done via callback. Newly allocated | 959 | loff_t user_pos, unsigned user_len, |
938 | * pages which don't take user data will be zero'd (set 'new' to | 960 | int new) |
939 | * indicate an allocating write) | ||
940 | * | ||
941 | * Returns a negative error code or the number of bytes copied into | ||
942 | * the page. | ||
943 | */ | ||
944 | static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | ||
945 | u64 *p_blkno, struct page *page, | ||
946 | struct ocfs2_write_ctxt *wc, int new) | ||
947 | { | 961 | { |
948 | int ret, copied = 0; | 962 | int ret; |
949 | unsigned int from = 0, to = 0; | 963 | unsigned int map_from = 0, map_to = 0; |
950 | unsigned int cluster_start, cluster_end; | 964 | unsigned int cluster_start, cluster_end; |
951 | unsigned int zero_from = 0, zero_to = 0; | 965 | unsigned int user_data_from = 0, user_data_to = 0; |
952 | 966 | ||
953 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, | 967 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, |
954 | &cluster_start, &cluster_end); | 968 | &cluster_start, &cluster_end); |
955 | 969 | ||
956 | if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index | 970 | if (page == wc->w_target_page) { |
957 | && !wc->w_finished_copy) { | 971 | map_from = user_pos & (PAGE_CACHE_SIZE - 1); |
958 | 972 | map_to = map_from + user_len; | |
959 | wc->w_this_page = page; | 973 | |
960 | wc->w_this_page_new = new; | 974 | if (new) |
961 | ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); | 975 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, |
962 | if (ret < 0) { | 976 | cluster_start, cluster_end, |
977 | new); | ||
978 | else | ||
979 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, | ||
980 | map_from, map_to, new); | ||
981 | if (ret) { | ||
963 | mlog_errno(ret); | 982 | mlog_errno(ret); |
964 | goto out; | 983 | goto out; |
965 | } | 984 | } |
966 | 985 | ||
967 | copied = ret; | 986 | user_data_from = map_from; |
968 | 987 | user_data_to = map_to; | |
969 | zero_from = from; | ||
970 | zero_to = to; | ||
971 | if (new) { | 988 | if (new) { |
972 | from = cluster_start; | 989 | map_from = cluster_start; |
973 | to = cluster_end; | 990 | map_to = cluster_end; |
974 | } | 991 | } |
992 | |||
993 | wc->w_target_from = map_from; | ||
994 | wc->w_target_to = map_to; | ||
975 | } else { | 995 | } else { |
976 | /* | 996 | /* |
977 | * If we haven't allocated the new page yet, we | 997 | * If we haven't allocated the new page yet, we |
@@ -980,11 +1000,11 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | |||
980 | */ | 1000 | */ |
981 | BUG_ON(!new); | 1001 | BUG_ON(!new); |
982 | 1002 | ||
983 | from = cluster_start; | 1003 | map_from = cluster_start; |
984 | to = cluster_end; | 1004 | map_to = cluster_end; |
985 | 1005 | ||
986 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, | 1006 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, |
987 | cluster_start, cluster_end, 1); | 1007 | cluster_start, cluster_end, new); |
988 | if (ret) { | 1008 | if (ret) { |
989 | mlog_errno(ret); | 1009 | mlog_errno(ret); |
990 | goto out; | 1010 | goto out; |
@@ -1003,108 +1023,113 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | |||
1003 | */ | 1023 | */ |
1004 | if (new && !PageUptodate(page)) | 1024 | if (new && !PageUptodate(page)) |
1005 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), | 1025 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), |
1006 | wc->w_cpos, zero_from, zero_to); | 1026 | cpos, user_data_from, user_data_to); |
1007 | 1027 | ||
1008 | flush_dcache_page(page); | 1028 | flush_dcache_page(page); |
1009 | 1029 | ||
1010 | if (ocfs2_should_order_data(inode)) { | ||
1011 | ret = walk_page_buffers(handle, | ||
1012 | page_buffers(page), | ||
1013 | from, to, NULL, | ||
1014 | ocfs2_journal_dirty_data); | ||
1015 | if (ret < 0) | ||
1016 | mlog_errno(ret); | ||
1017 | } | ||
1018 | |||
1019 | /* | ||
1020 | * We don't use generic_commit_write() because we need to | ||
1021 | * handle our own i_size update. | ||
1022 | */ | ||
1023 | ret = block_commit_write(page, from, to); | ||
1024 | if (ret) | ||
1025 | mlog_errno(ret); | ||
1026 | out: | 1030 | out: |
1027 | 1031 | return ret; | |
1028 | return copied ? copied : ret; | ||
1029 | } | 1032 | } |
1030 | 1033 | ||
1031 | /* | 1034 | /* |
1032 | * Do the actual write of some data into an inode. Optionally allocate | 1035 | * This function will only grab one clusters worth of pages. |
1033 | * in order to fulfill the write. | ||
1034 | * | ||
1035 | * cpos is the logical cluster offset within the file to write at | ||
1036 | * | ||
1037 | * 'phys' is the physical mapping of that offset. a 'phys' value of | ||
1038 | * zero indicates that allocation is required. In this case, data_ac | ||
1039 | * and meta_ac should be valid (meta_ac can be null if metadata | ||
1040 | * allocation isn't required). | ||
1041 | */ | 1036 | */ |
1042 | static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, | 1037 | static int ocfs2_grab_pages_for_write(struct address_space *mapping, |
1043 | struct buffer_head *di_bh, | 1038 | struct ocfs2_write_ctxt *wc, |
1044 | struct ocfs2_alloc_context *data_ac, | 1039 | u32 cpos, loff_t user_pos, int new, |
1045 | struct ocfs2_alloc_context *meta_ac, | 1040 | struct page *mmap_page) |
1046 | struct ocfs2_write_ctxt *wc) | ||
1047 | { | 1041 | { |
1048 | int ret, i, numpages = 1, new; | 1042 | int ret = 0, i; |
1049 | unsigned int copied = 0; | 1043 | unsigned long start, target_index, index; |
1050 | u32 tmp_pos; | ||
1051 | u64 v_blkno, p_blkno; | ||
1052 | struct address_space *mapping = file->f_mapping; | ||
1053 | struct inode *inode = mapping->host; | 1044 | struct inode *inode = mapping->host; |
1054 | unsigned long index, start; | ||
1055 | struct page **cpages; | ||
1056 | 1045 | ||
1057 | new = phys == 0 ? 1 : 0; | 1046 | target_index = user_pos >> PAGE_CACHE_SHIFT; |
1058 | 1047 | ||
1059 | /* | 1048 | /* |
1060 | * Figure out how many pages we'll be manipulating here. For | 1049 | * Figure out how many pages we'll be manipulating here. For |
1061 | * non allocating write, we just change the one | 1050 | * non allocating write, we just change the one |
1062 | * page. Otherwise, we'll need a whole clusters worth. | 1051 | * page. Otherwise, we'll need a whole clusters worth. |
1063 | */ | 1052 | */ |
1064 | if (new) | ||
1065 | numpages = ocfs2_pages_per_cluster(inode->i_sb); | ||
1066 | |||
1067 | cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); | ||
1068 | if (!cpages) { | ||
1069 | ret = -ENOMEM; | ||
1070 | mlog_errno(ret); | ||
1071 | return ret; | ||
1072 | } | ||
1073 | |||
1074 | /* | ||
1075 | * Fill our page array first. That way we've grabbed enough so | ||
1076 | * that we can zero and flush if we error after adding the | ||
1077 | * extent. | ||
1078 | */ | ||
1079 | if (new) { | 1053 | if (new) { |
1080 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, | 1054 | wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); |
1081 | wc->w_cpos); | 1055 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); |
1082 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); | ||
1083 | } else { | 1056 | } else { |
1084 | start = wc->w_pos >> PAGE_CACHE_SHIFT; | 1057 | wc->w_num_pages = 1; |
1085 | v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; | 1058 | start = target_index; |
1086 | } | 1059 | } |
1087 | 1060 | ||
1088 | for(i = 0; i < numpages; i++) { | 1061 | for(i = 0; i < wc->w_num_pages; i++) { |
1089 | index = start + i; | 1062 | index = start + i; |
1090 | 1063 | ||
1091 | cpages[i] = find_or_create_page(mapping, index, GFP_NOFS); | 1064 | if (index == target_index && mmap_page) { |
1092 | if (!cpages[i]) { | 1065 | /* |
1093 | ret = -ENOMEM; | 1066 | * ocfs2_pagemkwrite() is a little different |
1094 | mlog_errno(ret); | 1067 | * and wants us to directly use the page |
1095 | goto out; | 1068 | * passed in. |
1069 | */ | ||
1070 | lock_page(mmap_page); | ||
1071 | |||
1072 | if (mmap_page->mapping != mapping) { | ||
1073 | unlock_page(mmap_page); | ||
1074 | /* | ||
1075 | * Sanity check - the locking in | ||
1076 | * ocfs2_pagemkwrite() should ensure | ||
1077 | * that this code doesn't trigger. | ||
1078 | */ | ||
1079 | ret = -EINVAL; | ||
1080 | mlog_errno(ret); | ||
1081 | goto out; | ||
1082 | } | ||
1083 | |||
1084 | page_cache_get(mmap_page); | ||
1085 | wc->w_pages[i] = mmap_page; | ||
1086 | } else { | ||
1087 | wc->w_pages[i] = find_or_create_page(mapping, index, | ||
1088 | GFP_NOFS); | ||
1089 | if (!wc->w_pages[i]) { | ||
1090 | ret = -ENOMEM; | ||
1091 | mlog_errno(ret); | ||
1092 | goto out; | ||
1093 | } | ||
1096 | } | 1094 | } |
1095 | |||
1096 | if (index == target_index) | ||
1097 | wc->w_target_page = wc->w_pages[i]; | ||
1097 | } | 1098 | } |
1099 | out: | ||
1100 | return ret; | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * Prepare a single cluster for write one cluster into the file. | ||
1105 | */ | ||
1106 | static int ocfs2_write_cluster(struct address_space *mapping, | ||
1107 | u32 phys, unsigned int unwritten, | ||
1108 | struct ocfs2_alloc_context *data_ac, | ||
1109 | struct ocfs2_alloc_context *meta_ac, | ||
1110 | struct ocfs2_write_ctxt *wc, u32 cpos, | ||
1111 | loff_t user_pos, unsigned user_len) | ||
1112 | { | ||
1113 | int ret, i, new, should_zero = 0; | ||
1114 | u64 v_blkno, p_blkno; | ||
1115 | struct inode *inode = mapping->host; | ||
1116 | |||
1117 | new = phys == 0 ? 1 : 0; | ||
1118 | if (new || unwritten) | ||
1119 | should_zero = 1; | ||
1098 | 1120 | ||
1099 | if (new) { | 1121 | if (new) { |
1122 | u32 tmp_pos; | ||
1123 | |||
1100 | /* | 1124 | /* |
1101 | * This is safe to call with the page locks - it won't take | 1125 | * This is safe to call with the page locks - it won't take |
1102 | * any additional semaphores or cluster locks. | 1126 | * any additional semaphores or cluster locks. |
1103 | */ | 1127 | */ |
1104 | tmp_pos = wc->w_cpos; | 1128 | tmp_pos = cpos; |
1105 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1129 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, |
1106 | &tmp_pos, 1, di_bh, handle, | 1130 | &tmp_pos, 1, 0, wc->w_di_bh, |
1107 | data_ac, meta_ac, NULL); | 1131 | wc->w_handle, data_ac, |
1132 | meta_ac, NULL); | ||
1108 | /* | 1133 | /* |
1109 | * This shouldn't happen because we must have already | 1134 | * This shouldn't happen because we must have already |
1110 | * calculated the correct meta data allocation required. The | 1135 | * calculated the correct meta data allocation required. The |
@@ -1121,159 +1146,433 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, | |||
1121 | mlog_errno(ret); | 1146 | mlog_errno(ret); |
1122 | goto out; | 1147 | goto out; |
1123 | } | 1148 | } |
1149 | } else if (unwritten) { | ||
1150 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | ||
1151 | wc->w_handle, cpos, 1, phys, | ||
1152 | meta_ac, &wc->w_dealloc); | ||
1153 | if (ret < 0) { | ||
1154 | mlog_errno(ret); | ||
1155 | goto out; | ||
1156 | } | ||
1124 | } | 1157 | } |
1125 | 1158 | ||
1159 | if (should_zero) | ||
1160 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); | ||
1161 | else | ||
1162 | v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; | ||
1163 | |||
1164 | /* | ||
1165 | * The only reason this should fail is due to an inability to | ||
1166 | * find the extent added. | ||
1167 | */ | ||
1126 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, | 1168 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, |
1127 | NULL); | 1169 | NULL); |
1128 | if (ret < 0) { | 1170 | if (ret < 0) { |
1129 | 1171 | ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, " | |
1130 | /* | 1172 | "at logical block %llu", |
1131 | * XXX: Should we go readonly here? | 1173 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1132 | */ | 1174 | (unsigned long long)v_blkno); |
1133 | |||
1134 | mlog_errno(ret); | ||
1135 | goto out; | 1175 | goto out; |
1136 | } | 1176 | } |
1137 | 1177 | ||
1138 | BUG_ON(p_blkno == 0); | 1178 | BUG_ON(p_blkno == 0); |
1139 | 1179 | ||
1140 | for(i = 0; i < numpages; i++) { | 1180 | for(i = 0; i < wc->w_num_pages; i++) { |
1141 | ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], | 1181 | int tmpret; |
1142 | wc, new); | 1182 | |
1143 | if (ret < 0) { | 1183 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, |
1144 | mlog_errno(ret); | 1184 | wc->w_pages[i], cpos, |
1145 | goto out; | 1185 | user_pos, user_len, |
1186 | should_zero); | ||
1187 | if (tmpret) { | ||
1188 | mlog_errno(tmpret); | ||
1189 | if (ret == 0) | ||
1190 | tmpret = ret; | ||
1146 | } | 1191 | } |
1147 | |||
1148 | copied += ret; | ||
1149 | } | 1192 | } |
1150 | 1193 | ||
1194 | /* | ||
1195 | * We only have cleanup to do in case of allocating write. | ||
1196 | */ | ||
1197 | if (ret && new) | ||
1198 | ocfs2_write_failure(inode, wc, user_pos, user_len); | ||
1199 | |||
1151 | out: | 1200 | out: |
1152 | for(i = 0; i < numpages; i++) { | 1201 | |
1153 | unlock_page(cpages[i]); | 1202 | return ret; |
1154 | mark_page_accessed(cpages[i]); | 1203 | } |
1155 | page_cache_release(cpages[i]); | 1204 | |
1205 | static int ocfs2_write_cluster_by_desc(struct address_space *mapping, | ||
1206 | struct ocfs2_alloc_context *data_ac, | ||
1207 | struct ocfs2_alloc_context *meta_ac, | ||
1208 | struct ocfs2_write_ctxt *wc, | ||
1209 | loff_t pos, unsigned len) | ||
1210 | { | ||
1211 | int ret, i; | ||
1212 | struct ocfs2_write_cluster_desc *desc; | ||
1213 | |||
1214 | for (i = 0; i < wc->w_clen; i++) { | ||
1215 | desc = &wc->w_desc[i]; | ||
1216 | |||
1217 | ret = ocfs2_write_cluster(mapping, desc->c_phys, | ||
1218 | desc->c_unwritten, data_ac, meta_ac, | ||
1219 | wc, desc->c_cpos, pos, len); | ||
1220 | if (ret) { | ||
1221 | mlog_errno(ret); | ||
1222 | goto out; | ||
1223 | } | ||
1156 | } | 1224 | } |
1157 | kfree(cpages); | ||
1158 | 1225 | ||
1159 | return copied ? copied : ret; | 1226 | ret = 0; |
1227 | out: | ||
1228 | return ret; | ||
1160 | } | 1229 | } |
1161 | 1230 | ||
1162 | static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, | 1231 | /* |
1163 | struct ocfs2_super *osb, loff_t pos, | 1232 | * ocfs2_write_end() wants to know which parts of the target page it |
1164 | size_t count, ocfs2_page_writer *cb, | 1233 | * should complete the write on. It's easiest to compute them ahead of |
1165 | void *cb_priv) | 1234 | * time when a more complete view of the write is available. |
1235 | */ | ||
1236 | static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | ||
1237 | struct ocfs2_write_ctxt *wc, | ||
1238 | loff_t pos, unsigned len, int alloc) | ||
1166 | { | 1239 | { |
1167 | wc->w_count = count; | 1240 | struct ocfs2_write_cluster_desc *desc; |
1168 | wc->w_pos = pos; | ||
1169 | wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; | ||
1170 | wc->w_finished_copy = 0; | ||
1171 | 1241 | ||
1172 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) | 1242 | wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1); |
1173 | wc->w_large_pages = 1; | 1243 | wc->w_target_to = wc->w_target_from + len; |
1174 | else | ||
1175 | wc->w_large_pages = 0; | ||
1176 | 1244 | ||
1177 | wc->w_write_data_page = cb; | 1245 | if (alloc == 0) |
1178 | wc->w_private = cb_priv; | 1246 | return; |
1247 | |||
1248 | /* | ||
1249 | * Allocating write - we may have different boundaries based | ||
1250 | * on page size and cluster size. | ||
1251 | * | ||
1252 | * NOTE: We can no longer compute one value from the other as | ||
1253 | * the actual write length and user provided length may be | ||
1254 | * different. | ||
1255 | */ | ||
1256 | |||
1257 | if (wc->w_large_pages) { | ||
1258 | /* | ||
1259 | * We only care about the 1st and last cluster within | ||
1260 | * our range and whether they should be zero'd or not. Either | ||
1261 | * value may be extended out to the start/end of a | ||
1262 | * newly allocated cluster. | ||
1263 | */ | ||
1264 | desc = &wc->w_desc[0]; | ||
1265 | if (ocfs2_should_zero_cluster(desc)) | ||
1266 | ocfs2_figure_cluster_boundaries(osb, | ||
1267 | desc->c_cpos, | ||
1268 | &wc->w_target_from, | ||
1269 | NULL); | ||
1270 | |||
1271 | desc = &wc->w_desc[wc->w_clen - 1]; | ||
1272 | if (ocfs2_should_zero_cluster(desc)) | ||
1273 | ocfs2_figure_cluster_boundaries(osb, | ||
1274 | desc->c_cpos, | ||
1275 | NULL, | ||
1276 | &wc->w_target_to); | ||
1277 | } else { | ||
1278 | wc->w_target_from = 0; | ||
1279 | wc->w_target_to = PAGE_CACHE_SIZE; | ||
1280 | } | ||
1179 | } | 1281 | } |
1180 | 1282 | ||
1181 | /* | 1283 | /* |
1182 | * Write a cluster to an inode. The cluster may not be allocated yet, | 1284 | * Populate each single-cluster write descriptor in the write context |
1183 | * in which case it will be. This only exists for buffered writes - | 1285 | * with information about the i/o to be done. |
1184 | * O_DIRECT takes a more "traditional" path through the kernel. | ||
1185 | * | ||
1186 | * The caller is responsible for incrementing pos, written counts, etc | ||
1187 | * | 1286 | * |
1188 | * For file systems that don't support sparse files, pre-allocation | 1287 | * Returns the number of clusters that will have to be allocated, as |
1189 | * and page zeroing up until cpos should be done prior to this | 1288 | * well as a worst case estimate of the number of extent records that |
1190 | * function call. | 1289 | * would have to be created during a write to an unwritten region. |
1191 | * | ||
1192 | * Callers should be holding i_sem, and the rw cluster lock. | ||
1193 | * | ||
1194 | * Returns the number of user bytes written, or less than zero for | ||
1195 | * error. | ||
1196 | */ | 1290 | */ |
1197 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | 1291 | static int ocfs2_populate_write_desc(struct inode *inode, |
1198 | size_t count, ocfs2_page_writer *actor, | 1292 | struct ocfs2_write_ctxt *wc, |
1199 | void *priv) | 1293 | unsigned int *clusters_to_alloc, |
1294 | unsigned int *extents_to_split) | ||
1295 | { | ||
1296 | int ret; | ||
1297 | struct ocfs2_write_cluster_desc *desc; | ||
1298 | unsigned int num_clusters = 0; | ||
1299 | unsigned int ext_flags = 0; | ||
1300 | u32 phys = 0; | ||
1301 | int i; | ||
1302 | |||
1303 | *clusters_to_alloc = 0; | ||
1304 | *extents_to_split = 0; | ||
1305 | |||
1306 | for (i = 0; i < wc->w_clen; i++) { | ||
1307 | desc = &wc->w_desc[i]; | ||
1308 | desc->c_cpos = wc->w_cpos + i; | ||
1309 | |||
1310 | if (num_clusters == 0) { | ||
1311 | /* | ||
1312 | * Need to look up the next extent record. | ||
1313 | */ | ||
1314 | ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, | ||
1315 | &num_clusters, &ext_flags); | ||
1316 | if (ret) { | ||
1317 | mlog_errno(ret); | ||
1318 | goto out; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * Assume worst case - that we're writing in | ||
1323 | * the middle of the extent. | ||
1324 | * | ||
1325 | * We can assume that the write proceeds from | ||
1326 | * left to right, in which case the extent | ||
1327 | * insert code is smart enough to coalesce the | ||
1328 | * next splits into the previous records created. | ||
1329 | */ | ||
1330 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
1331 | *extents_to_split = *extents_to_split + 2; | ||
1332 | } else if (phys) { | ||
1333 | /* | ||
1334 | * Only increment phys if it doesn't describe | ||
1335 | * a hole. | ||
1336 | */ | ||
1337 | phys++; | ||
1338 | } | ||
1339 | |||
1340 | desc->c_phys = phys; | ||
1341 | if (phys == 0) { | ||
1342 | desc->c_new = 1; | ||
1343 | *clusters_to_alloc = *clusters_to_alloc + 1; | ||
1344 | } | ||
1345 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
1346 | desc->c_unwritten = 1; | ||
1347 | |||
1348 | num_clusters--; | ||
1349 | } | ||
1350 | |||
1351 | ret = 0; | ||
1352 | out: | ||
1353 | return ret; | ||
1354 | } | ||
1355 | |||
1356 | int ocfs2_write_begin_nolock(struct address_space *mapping, | ||
1357 | loff_t pos, unsigned len, unsigned flags, | ||
1358 | struct page **pagep, void **fsdata, | ||
1359 | struct buffer_head *di_bh, struct page *mmap_page) | ||
1200 | { | 1360 | { |
1201 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; | 1361 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; |
1202 | ssize_t written = 0; | 1362 | unsigned int clusters_to_alloc, extents_to_split; |
1203 | u32 phys; | 1363 | struct ocfs2_write_ctxt *wc; |
1204 | struct inode *inode = file->f_mapping->host; | 1364 | struct inode *inode = mapping->host; |
1205 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1206 | struct buffer_head *di_bh = NULL; | ||
1207 | struct ocfs2_dinode *di; | 1366 | struct ocfs2_dinode *di; |
1208 | struct ocfs2_alloc_context *data_ac = NULL; | 1367 | struct ocfs2_alloc_context *data_ac = NULL; |
1209 | struct ocfs2_alloc_context *meta_ac = NULL; | 1368 | struct ocfs2_alloc_context *meta_ac = NULL; |
1210 | handle_t *handle; | 1369 | handle_t *handle; |
1211 | struct ocfs2_write_ctxt wc; | ||
1212 | |||
1213 | ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); | ||
1214 | 1370 | ||
1215 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1371 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); |
1216 | if (ret) { | 1372 | if (ret) { |
1217 | mlog_errno(ret); | 1373 | mlog_errno(ret); |
1218 | goto out; | 1374 | return ret; |
1219 | } | 1375 | } |
1220 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1221 | |||
1222 | /* | ||
1223 | * Take alloc sem here to prevent concurrent lookups. That way | ||
1224 | * the mapping, zeroing and tree manipulation within | ||
1225 | * ocfs2_write() will be safe against ->readpage(). This | ||
1226 | * should also serve to lock out allocation from a shared | ||
1227 | * writeable region. | ||
1228 | */ | ||
1229 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1230 | 1376 | ||
1231 | ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); | 1377 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, |
1378 | &extents_to_split); | ||
1232 | if (ret) { | 1379 | if (ret) { |
1233 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
1234 | goto out_meta; | 1381 | goto out; |
1235 | } | 1382 | } |
1236 | 1383 | ||
1237 | /* phys == 0 means that allocation is required. */ | 1384 | di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; |
1238 | if (phys == 0) { | 1385 | |
1239 | ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); | 1386 | /* |
1387 | * We set w_target_from, w_target_to here so that | ||
1388 | * ocfs2_write_end() knows which range in the target page to | ||
1389 | * write out. An allocation requires that we write the entire | ||
1390 | * cluster range. | ||
1391 | */ | ||
1392 | if (clusters_to_alloc || extents_to_split) { | ||
1393 | /* | ||
1394 | * XXX: We are stretching the limits of | ||
1395 | * ocfs2_lock_allocators(). It greatly over-estimates | ||
1396 | * the work to be done. | ||
1397 | */ | ||
1398 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | ||
1399 | extents_to_split, &data_ac, &meta_ac); | ||
1240 | if (ret) { | 1400 | if (ret) { |
1241 | mlog_errno(ret); | 1401 | mlog_errno(ret); |
1242 | goto out_meta; | 1402 | goto out; |
1243 | } | 1403 | } |
1244 | 1404 | ||
1245 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); | 1405 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, |
1246 | } | 1406 | clusters_to_alloc); |
1247 | 1407 | ||
1248 | ret = ocfs2_data_lock(inode, 1); | ||
1249 | if (ret) { | ||
1250 | mlog_errno(ret); | ||
1251 | goto out_meta; | ||
1252 | } | 1408 | } |
1253 | 1409 | ||
1410 | ocfs2_set_target_boundaries(osb, wc, pos, len, | ||
1411 | clusters_to_alloc + extents_to_split); | ||
1412 | |||
1254 | handle = ocfs2_start_trans(osb, credits); | 1413 | handle = ocfs2_start_trans(osb, credits); |
1255 | if (IS_ERR(handle)) { | 1414 | if (IS_ERR(handle)) { |
1256 | ret = PTR_ERR(handle); | 1415 | ret = PTR_ERR(handle); |
1257 | mlog_errno(ret); | 1416 | mlog_errno(ret); |
1258 | goto out_data; | 1417 | goto out; |
1259 | } | 1418 | } |
1260 | 1419 | ||
1261 | written = ocfs2_write(file, phys, handle, di_bh, data_ac, | 1420 | wc->w_handle = handle; |
1262 | meta_ac, &wc); | 1421 | |
1263 | if (written < 0) { | 1422 | /* |
1264 | ret = written; | 1423 | * We don't want this to fail in ocfs2_write_end(), so do it |
1424 | * here. | ||
1425 | */ | ||
1426 | ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, | ||
1427 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1428 | if (ret) { | ||
1265 | mlog_errno(ret); | 1429 | mlog_errno(ret); |
1266 | goto out_commit; | 1430 | goto out_commit; |
1267 | } | 1431 | } |
1268 | 1432 | ||
1269 | ret = ocfs2_journal_access(handle, inode, di_bh, | 1433 | /* |
1270 | OCFS2_JOURNAL_ACCESS_WRITE); | 1434 | * Fill our page array first. That way we've grabbed enough so |
1435 | * that we can zero and flush if we error after adding the | ||
1436 | * extent. | ||
1437 | */ | ||
1438 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, | ||
1439 | clusters_to_alloc + extents_to_split, | ||
1440 | mmap_page); | ||
1271 | if (ret) { | 1441 | if (ret) { |
1272 | mlog_errno(ret); | 1442 | mlog_errno(ret); |
1273 | goto out_commit; | 1443 | goto out_commit; |
1274 | } | 1444 | } |
1275 | 1445 | ||
1276 | pos += written; | 1446 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, |
1447 | len); | ||
1448 | if (ret) { | ||
1449 | mlog_errno(ret); | ||
1450 | goto out_commit; | ||
1451 | } | ||
1452 | |||
1453 | if (data_ac) | ||
1454 | ocfs2_free_alloc_context(data_ac); | ||
1455 | if (meta_ac) | ||
1456 | ocfs2_free_alloc_context(meta_ac); | ||
1457 | |||
1458 | *pagep = wc->w_target_page; | ||
1459 | *fsdata = wc; | ||
1460 | return 0; | ||
1461 | out_commit: | ||
1462 | ocfs2_commit_trans(osb, handle); | ||
1463 | |||
1464 | out: | ||
1465 | ocfs2_free_write_ctxt(wc); | ||
1466 | |||
1467 | if (data_ac) | ||
1468 | ocfs2_free_alloc_context(data_ac); | ||
1469 | if (meta_ac) | ||
1470 | ocfs2_free_alloc_context(meta_ac); | ||
1471 | return ret; | ||
1472 | } | ||
1473 | |||
1474 | int ocfs2_write_begin(struct file *file, struct address_space *mapping, | ||
1475 | loff_t pos, unsigned len, unsigned flags, | ||
1476 | struct page **pagep, void **fsdata) | ||
1477 | { | ||
1478 | int ret; | ||
1479 | struct buffer_head *di_bh = NULL; | ||
1480 | struct inode *inode = mapping->host; | ||
1481 | |||
1482 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
1483 | if (ret) { | ||
1484 | mlog_errno(ret); | ||
1485 | return ret; | ||
1486 | } | ||
1487 | |||
1488 | /* | ||
1489 | * Take alloc sem here to prevent concurrent lookups. That way | ||
1490 | * the mapping, zeroing and tree manipulation within | ||
1491 | * ocfs2_write() will be safe against ->readpage(). This | ||
1492 | * should also serve to lock out allocation from a shared | ||
1493 | * writeable region. | ||
1494 | */ | ||
1495 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1496 | |||
1497 | ret = ocfs2_data_lock(inode, 1); | ||
1498 | if (ret) { | ||
1499 | mlog_errno(ret); | ||
1500 | goto out_fail; | ||
1501 | } | ||
1502 | |||
1503 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | ||
1504 | fsdata, di_bh, NULL); | ||
1505 | if (ret) { | ||
1506 | mlog_errno(ret); | ||
1507 | goto out_fail_data; | ||
1508 | } | ||
1509 | |||
1510 | brelse(di_bh); | ||
1511 | |||
1512 | return 0; | ||
1513 | |||
1514 | out_fail_data: | ||
1515 | ocfs2_data_unlock(inode, 1); | ||
1516 | out_fail: | ||
1517 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1518 | |||
1519 | brelse(di_bh); | ||
1520 | ocfs2_meta_unlock(inode, 1); | ||
1521 | |||
1522 | return ret; | ||
1523 | } | ||
1524 | |||
1525 | int ocfs2_write_end_nolock(struct address_space *mapping, | ||
1526 | loff_t pos, unsigned len, unsigned copied, | ||
1527 | struct page *page, void *fsdata) | ||
1528 | { | ||
1529 | int i; | ||
1530 | unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1); | ||
1531 | struct inode *inode = mapping->host; | ||
1532 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1533 | struct ocfs2_write_ctxt *wc = fsdata; | ||
1534 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; | ||
1535 | handle_t *handle = wc->w_handle; | ||
1536 | struct page *tmppage; | ||
1537 | |||
1538 | if (unlikely(copied < len)) { | ||
1539 | if (!PageUptodate(wc->w_target_page)) | ||
1540 | copied = 0; | ||
1541 | |||
1542 | ocfs2_zero_new_buffers(wc->w_target_page, start+copied, | ||
1543 | start+len); | ||
1544 | } | ||
1545 | flush_dcache_page(wc->w_target_page); | ||
1546 | |||
1547 | for(i = 0; i < wc->w_num_pages; i++) { | ||
1548 | tmppage = wc->w_pages[i]; | ||
1549 | |||
1550 | if (tmppage == wc->w_target_page) { | ||
1551 | from = wc->w_target_from; | ||
1552 | to = wc->w_target_to; | ||
1553 | |||
1554 | BUG_ON(from > PAGE_CACHE_SIZE || | ||
1555 | to > PAGE_CACHE_SIZE || | ||
1556 | to < from); | ||
1557 | } else { | ||
1558 | /* | ||
1559 | * Pages adjacent to the target (if any) imply | ||
1560 | * a hole-filling write in which case we want | ||
1561 | * to flush their entire range. | ||
1562 | */ | ||
1563 | from = 0; | ||
1564 | to = PAGE_CACHE_SIZE; | ||
1565 | } | ||
1566 | |||
1567 | if (ocfs2_should_order_data(inode)) | ||
1568 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | ||
1569 | from, to, NULL, | ||
1570 | ocfs2_journal_dirty_data); | ||
1571 | |||
1572 | block_commit_write(tmppage, from, to); | ||
1573 | } | ||
1574 | |||
1575 | pos += copied; | ||
1277 | if (pos > inode->i_size) { | 1576 | if (pos > inode->i_size) { |
1278 | i_size_write(inode, pos); | 1577 | i_size_write(inode, pos); |
1279 | mark_inode_dirty(inode); | 1578 | mark_inode_dirty(inode); |
@@ -1283,29 +1582,31 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | |||
1283 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1582 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1284 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 1583 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
1285 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1584 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
1585 | ocfs2_journal_dirty(handle, wc->w_di_bh); | ||
1286 | 1586 | ||
1287 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1288 | if (ret) | ||
1289 | mlog_errno(ret); | ||
1290 | |||
1291 | out_commit: | ||
1292 | ocfs2_commit_trans(osb, handle); | 1587 | ocfs2_commit_trans(osb, handle); |
1293 | 1588 | ||
1294 | out_data: | 1589 | ocfs2_run_deallocs(osb, &wc->w_dealloc); |
1295 | ocfs2_data_unlock(inode, 1); | 1590 | |
1591 | ocfs2_free_write_ctxt(wc); | ||
1592 | |||
1593 | return copied; | ||
1594 | } | ||
1595 | |||
1596 | int ocfs2_write_end(struct file *file, struct address_space *mapping, | ||
1597 | loff_t pos, unsigned len, unsigned copied, | ||
1598 | struct page *page, void *fsdata) | ||
1599 | { | ||
1600 | int ret; | ||
1601 | struct inode *inode = mapping->host; | ||
1296 | 1602 | ||
1297 | out_meta: | 1603 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
1604 | |||
1605 | ocfs2_data_unlock(inode, 1); | ||
1298 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1606 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1299 | ocfs2_meta_unlock(inode, 1); | 1607 | ocfs2_meta_unlock(inode, 1); |
1300 | 1608 | ||
1301 | out: | 1609 | return ret; |
1302 | brelse(di_bh); | ||
1303 | if (data_ac) | ||
1304 | ocfs2_free_alloc_context(data_ac); | ||
1305 | if (meta_ac) | ||
1306 | ocfs2_free_alloc_context(meta_ac); | ||
1307 | |||
1308 | return written ? written : ret; | ||
1309 | } | 1610 | } |
1310 | 1611 | ||
1311 | const struct address_space_operations ocfs2_aops = { | 1612 | const struct address_space_operations ocfs2_aops = { |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 45821d479b5a..389579bd64e3 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -42,57 +42,22 @@ int walk_page_buffers( handle_t *handle, | |||
42 | int (*fn)( handle_t *handle, | 42 | int (*fn)( handle_t *handle, |
43 | struct buffer_head *bh)); | 43 | struct buffer_head *bh)); |
44 | 44 | ||
45 | struct ocfs2_write_ctxt; | 45 | int ocfs2_write_begin(struct file *file, struct address_space *mapping, |
46 | typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, | 46 | loff_t pos, unsigned len, unsigned flags, |
47 | u64 *, unsigned int *, unsigned int *); | 47 | struct page **pagep, void **fsdata); |
48 | 48 | ||
49 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | 49 | int ocfs2_write_end(struct file *file, struct address_space *mapping, |
50 | size_t count, ocfs2_page_writer *actor, | 50 | loff_t pos, unsigned len, unsigned copied, |
51 | void *priv); | 51 | struct page *page, void *fsdata); |
52 | 52 | ||
53 | struct ocfs2_write_ctxt { | 53 | int ocfs2_write_end_nolock(struct address_space *mapping, |
54 | size_t w_count; | 54 | loff_t pos, unsigned len, unsigned copied, |
55 | loff_t w_pos; | 55 | struct page *page, void *fsdata); |
56 | u32 w_cpos; | ||
57 | unsigned int w_finished_copy; | ||
58 | 56 | ||
59 | /* This is true if page_size > cluster_size */ | 57 | int ocfs2_write_begin_nolock(struct address_space *mapping, |
60 | unsigned int w_large_pages; | 58 | loff_t pos, unsigned len, unsigned flags, |
61 | 59 | struct page **pagep, void **fsdata, | |
62 | /* Filler callback and private data */ | 60 | struct buffer_head *di_bh, struct page *mmap_page); |
63 | ocfs2_page_writer *w_write_data_page; | ||
64 | void *w_private; | ||
65 | |||
66 | /* Only valid for the filler callback */ | ||
67 | struct page *w_this_page; | ||
68 | unsigned int w_this_page_new; | ||
69 | }; | ||
70 | |||
71 | struct ocfs2_buffered_write_priv { | ||
72 | char *b_src_buf; | ||
73 | const struct iovec *b_cur_iov; /* Current iovec */ | ||
74 | size_t b_cur_off; /* Offset in the | ||
75 | * current iovec */ | ||
76 | }; | ||
77 | int ocfs2_map_and_write_user_data(struct inode *inode, | ||
78 | struct ocfs2_write_ctxt *wc, | ||
79 | u64 *p_blkno, | ||
80 | unsigned int *ret_from, | ||
81 | unsigned int *ret_to); | ||
82 | |||
83 | struct ocfs2_splice_write_priv { | ||
84 | struct splice_desc *s_sd; | ||
85 | struct pipe_buffer *s_buf; | ||
86 | struct pipe_inode_info *s_pipe; | ||
87 | /* Neither offset value is ever larger than one page */ | ||
88 | unsigned int s_offset; | ||
89 | unsigned int s_buf_offset; | ||
90 | }; | ||
91 | int ocfs2_map_and_write_splice_data(struct inode *inode, | ||
92 | struct ocfs2_write_ctxt *wc, | ||
93 | u64 *p_blkno, | ||
94 | unsigned int *ret_from, | ||
95 | unsigned int *ret_to); | ||
96 | 61 | ||
97 | /* all ocfs2_dio_end_io()'s fault */ | 62 | /* all ocfs2_dio_end_io()'s fault */ |
98 | #define ocfs2_iocb_is_rw_locked(iocb) \ | 63 | #define ocfs2_iocb_is_rw_locked(iocb) \ |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 979113479c66..2bd7f788cf34 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1335 | ret = wait_event_interruptible(o2hb_steady_queue, | 1335 | ret = wait_event_interruptible(o2hb_steady_queue, |
1336 | atomic_read(®->hr_steady_iterations) == 0); | 1336 | atomic_read(®->hr_steady_iterations) == 0); |
1337 | if (ret) { | 1337 | if (ret) { |
1338 | /* We got interrupted (hello ptrace!). Clean up */ | ||
1338 | spin_lock(&o2hb_live_lock); | 1339 | spin_lock(&o2hb_live_lock); |
1339 | hb_task = reg->hr_task; | 1340 | hb_task = reg->hr_task; |
1340 | reg->hr_task = NULL; | 1341 | reg->hr_task = NULL; |
@@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1345 | goto out; | 1346 | goto out; |
1346 | } | 1347 | } |
1347 | 1348 | ||
1348 | ret = count; | 1349 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1350 | spin_lock(&o2hb_live_lock); | ||
1351 | hb_task = reg->hr_task; | ||
1352 | spin_unlock(&o2hb_live_lock); | ||
1353 | |||
1354 | if (hb_task) | ||
1355 | ret = count; | ||
1356 | else | ||
1357 | ret = -EIO; | ||
1358 | |||
1349 | out: | 1359 | out: |
1350 | if (filp) | 1360 | if (filp) |
1351 | fput(filp); | 1361 | fput(filp); |
@@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1523 | if (hb_task) | 1533 | if (hb_task) |
1524 | kthread_stop(hb_task); | 1534 | kthread_stop(hb_task); |
1525 | 1535 | ||
1536 | /* | ||
1537 | * If we're racing a dev_write(), we need to wake them. They will | ||
1538 | * check reg->hr_task | ||
1539 | */ | ||
1540 | if (atomic_read(®->hr_steady_iterations) != 0) { | ||
1541 | atomic_set(®->hr_steady_iterations, 0); | ||
1542 | wake_up(&o2hb_steady_queue); | ||
1543 | } | ||
1544 | |||
1526 | config_item_put(item); | 1545 | config_item_put(item); |
1527 | } | 1546 | } |
1528 | 1547 | ||
@@ -1665,7 +1684,67 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, | |||
1665 | } | 1684 | } |
1666 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); | 1685 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); |
1667 | 1686 | ||
1668 | int o2hb_register_callback(struct o2hb_callback_func *hc) | 1687 | static struct o2hb_region *o2hb_find_region(const char *region_uuid) |
1688 | { | ||
1689 | struct o2hb_region *p, *reg = NULL; | ||
1690 | |||
1691 | assert_spin_locked(&o2hb_live_lock); | ||
1692 | |||
1693 | list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { | ||
1694 | if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { | ||
1695 | reg = p; | ||
1696 | break; | ||
1697 | } | ||
1698 | } | ||
1699 | |||
1700 | return reg; | ||
1701 | } | ||
1702 | |||
1703 | static int o2hb_region_get(const char *region_uuid) | ||
1704 | { | ||
1705 | int ret = 0; | ||
1706 | struct o2hb_region *reg; | ||
1707 | |||
1708 | spin_lock(&o2hb_live_lock); | ||
1709 | |||
1710 | reg = o2hb_find_region(region_uuid); | ||
1711 | if (!reg) | ||
1712 | ret = -ENOENT; | ||
1713 | spin_unlock(&o2hb_live_lock); | ||
1714 | |||
1715 | if (ret) | ||
1716 | goto out; | ||
1717 | |||
1718 | ret = o2nm_depend_this_node(); | ||
1719 | if (ret) | ||
1720 | goto out; | ||
1721 | |||
1722 | ret = o2nm_depend_item(®->hr_item); | ||
1723 | if (ret) | ||
1724 | o2nm_undepend_this_node(); | ||
1725 | |||
1726 | out: | ||
1727 | return ret; | ||
1728 | } | ||
1729 | |||
1730 | static void o2hb_region_put(const char *region_uuid) | ||
1731 | { | ||
1732 | struct o2hb_region *reg; | ||
1733 | |||
1734 | spin_lock(&o2hb_live_lock); | ||
1735 | |||
1736 | reg = o2hb_find_region(region_uuid); | ||
1737 | |||
1738 | spin_unlock(&o2hb_live_lock); | ||
1739 | |||
1740 | if (reg) { | ||
1741 | o2nm_undepend_item(®->hr_item); | ||
1742 | o2nm_undepend_this_node(); | ||
1743 | } | ||
1744 | } | ||
1745 | |||
1746 | int o2hb_register_callback(const char *region_uuid, | ||
1747 | struct o2hb_callback_func *hc) | ||
1669 | { | 1748 | { |
1670 | struct o2hb_callback_func *tmp; | 1749 | struct o2hb_callback_func *tmp; |
1671 | struct list_head *iter; | 1750 | struct list_head *iter; |
@@ -1681,6 +1760,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc) | |||
1681 | goto out; | 1760 | goto out; |
1682 | } | 1761 | } |
1683 | 1762 | ||
1763 | if (region_uuid) { | ||
1764 | ret = o2hb_region_get(region_uuid); | ||
1765 | if (ret) | ||
1766 | goto out; | ||
1767 | } | ||
1768 | |||
1684 | down_write(&o2hb_callback_sem); | 1769 | down_write(&o2hb_callback_sem); |
1685 | 1770 | ||
1686 | list_for_each(iter, &hbcall->list) { | 1771 | list_for_each(iter, &hbcall->list) { |
@@ -1702,16 +1787,21 @@ out: | |||
1702 | } | 1787 | } |
1703 | EXPORT_SYMBOL_GPL(o2hb_register_callback); | 1788 | EXPORT_SYMBOL_GPL(o2hb_register_callback); |
1704 | 1789 | ||
1705 | void o2hb_unregister_callback(struct o2hb_callback_func *hc) | 1790 | void o2hb_unregister_callback(const char *region_uuid, |
1791 | struct o2hb_callback_func *hc) | ||
1706 | { | 1792 | { |
1707 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); | 1793 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); |
1708 | 1794 | ||
1709 | mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", | 1795 | mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", |
1710 | __builtin_return_address(0), hc); | 1796 | __builtin_return_address(0), hc); |
1711 | 1797 | ||
1798 | /* XXX Can this happen _with_ a region reference? */ | ||
1712 | if (list_empty(&hc->hc_item)) | 1799 | if (list_empty(&hc->hc_item)) |
1713 | return; | 1800 | return; |
1714 | 1801 | ||
1802 | if (region_uuid) | ||
1803 | o2hb_region_put(region_uuid); | ||
1804 | |||
1715 | down_write(&o2hb_callback_sem); | 1805 | down_write(&o2hb_callback_sem); |
1716 | 1806 | ||
1717 | list_del_init(&hc->hc_item); | 1807 | list_del_init(&hc->hc_item); |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index cc6d40b39771..35397dd5ecdb 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, | |||
69 | o2hb_cb_func *func, | 69 | o2hb_cb_func *func, |
70 | void *data, | 70 | void *data, |
71 | int priority); | 71 | int priority); |
72 | int o2hb_register_callback(struct o2hb_callback_func *hc); | 72 | int o2hb_register_callback(const char *region_uuid, |
73 | void o2hb_unregister_callback(struct o2hb_callback_func *hc); | 73 | struct o2hb_callback_func *hc); |
74 | void o2hb_unregister_callback(const char *region_uuid, | ||
75 | struct o2hb_callback_func *hc); | ||
74 | void o2hb_fill_node_map(unsigned long *map, | 76 | void o2hb_fill_node_map(unsigned long *map, |
75 | unsigned bytes); | 77 | unsigned bytes); |
76 | void o2hb_init(void); | 78 | void o2hb_init(void); |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 9f5ad0f01ce0..af2070da308b 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -900,6 +900,46 @@ static struct o2nm_cluster_group o2nm_cluster_group = { | |||
900 | }, | 900 | }, |
901 | }; | 901 | }; |
902 | 902 | ||
903 | int o2nm_depend_item(struct config_item *item) | ||
904 | { | ||
905 | return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); | ||
906 | } | ||
907 | |||
908 | void o2nm_undepend_item(struct config_item *item) | ||
909 | { | ||
910 | configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item); | ||
911 | } | ||
912 | |||
913 | int o2nm_depend_this_node(void) | ||
914 | { | ||
915 | int ret = 0; | ||
916 | struct o2nm_node *local_node; | ||
917 | |||
918 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | ||
919 | if (!local_node) { | ||
920 | ret = -EINVAL; | ||
921 | goto out; | ||
922 | } | ||
923 | |||
924 | ret = o2nm_depend_item(&local_node->nd_item); | ||
925 | o2nm_node_put(local_node); | ||
926 | |||
927 | out: | ||
928 | return ret; | ||
929 | } | ||
930 | |||
931 | void o2nm_undepend_this_node(void) | ||
932 | { | ||
933 | struct o2nm_node *local_node; | ||
934 | |||
935 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | ||
936 | BUG_ON(!local_node); | ||
937 | |||
938 | o2nm_undepend_item(&local_node->nd_item); | ||
939 | o2nm_node_put(local_node); | ||
940 | } | ||
941 | |||
942 | |||
903 | static void __exit exit_o2nm(void) | 943 | static void __exit exit_o2nm(void) |
904 | { | 944 | { |
905 | if (ocfs2_table_header) | 945 | if (ocfs2_table_header) |
@@ -934,7 +974,7 @@ static int __init init_o2nm(void) | |||
934 | goto out_sysctl; | 974 | goto out_sysctl; |
935 | 975 | ||
936 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); | 976 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); |
937 | init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem); | 977 | mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); |
938 | ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); | 978 | ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); |
939 | if (ret) { | 979 | if (ret) { |
940 | printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); | 980 | printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 070522138ae2..7c860361b8dd 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
@@ -77,4 +77,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr); | |||
77 | void o2nm_node_get(struct o2nm_node *node); | 77 | void o2nm_node_get(struct o2nm_node *node); |
78 | void o2nm_node_put(struct o2nm_node *node); | 78 | void o2nm_node_put(struct o2nm_node *node); |
79 | 79 | ||
80 | int o2nm_depend_item(struct config_item *item); | ||
81 | void o2nm_undepend_item(struct config_item *item); | ||
82 | int o2nm_depend_this_node(void); | ||
83 | void o2nm_undepend_this_node(void); | ||
84 | |||
80 | #endif /* O2CLUSTER_NODEMANAGER_H */ | 85 | #endif /* O2CLUSTER_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0b229a9c7952..f0bdfd944c44 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -261,14 +261,12 @@ out: | |||
261 | 261 | ||
262 | static void o2net_complete_nodes_nsw(struct o2net_node *nn) | 262 | static void o2net_complete_nodes_nsw(struct o2net_node *nn) |
263 | { | 263 | { |
264 | struct list_head *iter, *tmp; | 264 | struct o2net_status_wait *nsw, *tmp; |
265 | unsigned int num_kills = 0; | 265 | unsigned int num_kills = 0; |
266 | struct o2net_status_wait *nsw; | ||
267 | 266 | ||
268 | assert_spin_locked(&nn->nn_lock); | 267 | assert_spin_locked(&nn->nn_lock); |
269 | 268 | ||
270 | list_for_each_safe(iter, tmp, &nn->nn_status_list) { | 269 | list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) { |
271 | nsw = list_entry(iter, struct o2net_status_wait, ns_node_item); | ||
272 | o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); | 270 | o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); |
273 | num_kills++; | 271 | num_kills++; |
274 | } | 272 | } |
@@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler); | |||
764 | 762 | ||
765 | void o2net_unregister_handler_list(struct list_head *list) | 763 | void o2net_unregister_handler_list(struct list_head *list) |
766 | { | 764 | { |
767 | struct list_head *pos, *n; | 765 | struct o2net_msg_handler *nmh, *n; |
768 | struct o2net_msg_handler *nmh; | ||
769 | 766 | ||
770 | write_lock(&o2net_handler_lock); | 767 | write_lock(&o2net_handler_lock); |
771 | list_for_each_safe(pos, n, list) { | 768 | list_for_each_entry_safe(nmh, n, list, nh_unregister_item) { |
772 | nmh = list_entry(pos, struct o2net_msg_handler, | ||
773 | nh_unregister_item); | ||
774 | mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", | 769 | mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", |
775 | nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); | 770 | nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); |
776 | rb_erase(&nmh->nh_node, &o2net_handler_tree); | 771 | rb_erase(&nmh->nh_node, &o2net_handler_tree); |
@@ -1638,8 +1633,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1638 | 1633 | ||
1639 | void o2net_unregister_hb_callbacks(void) | 1634 | void o2net_unregister_hb_callbacks(void) |
1640 | { | 1635 | { |
1641 | o2hb_unregister_callback(&o2net_hb_up); | 1636 | o2hb_unregister_callback(NULL, &o2net_hb_up); |
1642 | o2hb_unregister_callback(&o2net_hb_down); | 1637 | o2hb_unregister_callback(NULL, &o2net_hb_down); |
1643 | } | 1638 | } |
1644 | 1639 | ||
1645 | int o2net_register_hb_callbacks(void) | 1640 | int o2net_register_hb_callbacks(void) |
@@ -1651,9 +1646,9 @@ int o2net_register_hb_callbacks(void) | |||
1651 | o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, | 1646 | o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, |
1652 | o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); | 1647 | o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); |
1653 | 1648 | ||
1654 | ret = o2hb_register_callback(&o2net_hb_up); | 1649 | ret = o2hb_register_callback(NULL, &o2net_hb_up); |
1655 | if (ret == 0) | 1650 | if (ret == 0) |
1656 | ret = o2hb_register_callback(&o2net_hb_down); | 1651 | ret = o2hb_register_callback(NULL, &o2net_hb_down); |
1657 | 1652 | ||
1658 | if (ret) | 1653 | if (ret) |
1659 | o2net_unregister_hb_callbacks(); | 1654 | o2net_unregister_hb_callbacks(); |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c441ef1f2bad..0d5fdde959c8 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -368,7 +368,7 @@ int ocfs2_do_extend_dir(struct super_block *sb, | |||
368 | u32 offset = OCFS2_I(dir)->ip_clusters; | 368 | u32 offset = OCFS2_I(dir)->ip_clusters; |
369 | 369 | ||
370 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, | 370 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, |
371 | 1, parent_fe_bh, handle, | 371 | 1, 0, parent_fe_bh, handle, |
372 | data_ac, meta_ac, NULL); | 372 | data_ac, meta_ac, NULL); |
373 | BUG_ON(status == -EAGAIN); | 373 | BUG_ON(status == -EAGAIN); |
374 | if (status < 0) { | 374 | if (status < 0) { |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index d836b98dd99a..6954565b8ccb 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -1128,8 +1128,8 @@ bail: | |||
1128 | 1128 | ||
1129 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) | 1129 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) |
1130 | { | 1130 | { |
1131 | o2hb_unregister_callback(&dlm->dlm_hb_up); | 1131 | o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); |
1132 | o2hb_unregister_callback(&dlm->dlm_hb_down); | 1132 | o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); |
1133 | o2net_unregister_handler_list(&dlm->dlm_domain_handlers); | 1133 | o2net_unregister_handler_list(&dlm->dlm_domain_handlers); |
1134 | } | 1134 | } |
1135 | 1135 | ||
@@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
1141 | 1141 | ||
1142 | o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, | 1142 | o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, |
1143 | dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); | 1143 | dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); |
1144 | status = o2hb_register_callback(&dlm->dlm_hb_down); | 1144 | status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); |
1145 | if (status) | 1145 | if (status) |
1146 | goto bail; | 1146 | goto bail; |
1147 | 1147 | ||
1148 | o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, | 1148 | o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, |
1149 | dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); | 1149 | dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); |
1150 | status = o2hb_register_callback(&dlm->dlm_hb_up); | 1150 | status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); |
1151 | if (status) | 1151 | if (status) |
1152 | goto bail; | 1152 | goto bail; |
1153 | 1153 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 6edffca99d98..65b2b9b92688 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle) | |||
192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) | 192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) |
193 | { | 193 | { |
194 | struct dlm_master_list_entry *mle; | 194 | struct dlm_master_list_entry *mle; |
195 | struct list_head *iter; | ||
196 | 195 | ||
197 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); | 196 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); |
198 | spin_lock(&dlm->master_lock); | 197 | spin_lock(&dlm->master_lock); |
199 | list_for_each(iter, &dlm->master_list) { | 198 | list_for_each_entry(mle, &dlm->master_list, list) |
200 | mle = list_entry(iter, struct dlm_master_list_entry, list); | ||
201 | dlm_print_one_mle(mle); | 199 | dlm_print_one_mle(mle); |
202 | } | ||
203 | spin_unlock(&dlm->master_lock); | 200 | spin_unlock(&dlm->master_lock); |
204 | } | 201 | } |
205 | 202 | ||
206 | int dlm_dump_all_mles(const char __user *data, unsigned int len) | 203 | int dlm_dump_all_mles(const char __user *data, unsigned int len) |
207 | { | 204 | { |
208 | struct list_head *iter; | ||
209 | struct dlm_ctxt *dlm; | 205 | struct dlm_ctxt *dlm; |
210 | 206 | ||
211 | spin_lock(&dlm_domain_lock); | 207 | spin_lock(&dlm_domain_lock); |
212 | list_for_each(iter, &dlm_domains) { | 208 | list_for_each_entry(dlm, &dlm_domains, list) { |
213 | dlm = list_entry (iter, struct dlm_ctxt, list); | ||
214 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); | 209 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); |
215 | dlm_dump_mles(dlm); | 210 | dlm_dump_mles(dlm); |
216 | } | 211 | } |
@@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
454 | char *name, unsigned int namelen) | 449 | char *name, unsigned int namelen) |
455 | { | 450 | { |
456 | struct dlm_master_list_entry *tmpmle; | 451 | struct dlm_master_list_entry *tmpmle; |
457 | struct list_head *iter; | ||
458 | 452 | ||
459 | assert_spin_locked(&dlm->master_lock); | 453 | assert_spin_locked(&dlm->master_lock); |
460 | 454 | ||
461 | list_for_each(iter, &dlm->master_list) { | 455 | list_for_each_entry(tmpmle, &dlm->master_list, list) { |
462 | tmpmle = list_entry(iter, struct dlm_master_list_entry, list); | ||
463 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) | 456 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) |
464 | continue; | 457 | continue; |
465 | dlm_get_mle(tmpmle); | 458 | dlm_get_mle(tmpmle); |
@@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
472 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) | 465 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) |
473 | { | 466 | { |
474 | struct dlm_master_list_entry *mle; | 467 | struct dlm_master_list_entry *mle; |
475 | struct list_head *iter; | ||
476 | 468 | ||
477 | assert_spin_locked(&dlm->spinlock); | 469 | assert_spin_locked(&dlm->spinlock); |
478 | 470 | ||
479 | list_for_each(iter, &dlm->mle_hb_events) { | 471 | list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { |
480 | mle = list_entry(iter, struct dlm_master_list_entry, | ||
481 | hb_events); | ||
482 | if (node_up) | 472 | if (node_up) |
483 | dlm_mle_node_up(dlm, mle, NULL, idx); | 473 | dlm_mle_node_up(dlm, mle, NULL, idx); |
484 | else | 474 | else |
@@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2434 | int ret; | 2424 | int ret; |
2435 | int i; | 2425 | int i; |
2436 | int count = 0; | 2426 | int count = 0; |
2437 | struct list_head *queue, *iter; | 2427 | struct list_head *queue; |
2438 | struct dlm_lock *lock; | 2428 | struct dlm_lock *lock; |
2439 | 2429 | ||
2440 | assert_spin_locked(&res->spinlock); | 2430 | assert_spin_locked(&res->spinlock); |
@@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2453 | ret = 0; | 2443 | ret = 0; |
2454 | queue = &res->granted; | 2444 | queue = &res->granted; |
2455 | for (i = 0; i < 3; i++) { | 2445 | for (i = 0; i < 3; i++) { |
2456 | list_for_each(iter, queue) { | 2446 | list_for_each_entry(lock, queue, list) { |
2457 | lock = list_entry(iter, struct dlm_lock, list); | ||
2458 | ++count; | 2447 | ++count; |
2459 | if (lock->ml.node == dlm->node_num) { | 2448 | if (lock->ml.node == dlm->node_num) { |
2460 | mlog(0, "found a lock owned by this node still " | 2449 | mlog(0, "found a lock owned by this node still " |
@@ -2923,18 +2912,16 @@ again: | |||
2923 | static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | 2912 | static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, |
2924 | struct dlm_lock_resource *res) | 2913 | struct dlm_lock_resource *res) |
2925 | { | 2914 | { |
2926 | struct list_head *iter, *iter2; | ||
2927 | struct list_head *queue = &res->granted; | 2915 | struct list_head *queue = &res->granted; |
2928 | int i, bit; | 2916 | int i, bit; |
2929 | struct dlm_lock *lock; | 2917 | struct dlm_lock *lock, *next; |
2930 | 2918 | ||
2931 | assert_spin_locked(&res->spinlock); | 2919 | assert_spin_locked(&res->spinlock); |
2932 | 2920 | ||
2933 | BUG_ON(res->owner == dlm->node_num); | 2921 | BUG_ON(res->owner == dlm->node_num); |
2934 | 2922 | ||
2935 | for (i=0; i<3; i++) { | 2923 | for (i=0; i<3; i++) { |
2936 | list_for_each_safe(iter, iter2, queue) { | 2924 | list_for_each_entry_safe(lock, next, queue, list) { |
2937 | lock = list_entry (iter, struct dlm_lock, list); | ||
2938 | if (lock->ml.node != dlm->node_num) { | 2925 | if (lock->ml.node != dlm->node_num) { |
2939 | mlog(0, "putting lock for node %u\n", | 2926 | mlog(0, "putting lock for node %u\n", |
2940 | lock->ml.node); | 2927 | lock->ml.node); |
@@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | |||
2976 | { | 2963 | { |
2977 | int i; | 2964 | int i; |
2978 | struct list_head *queue = &res->granted; | 2965 | struct list_head *queue = &res->granted; |
2979 | struct list_head *iter; | ||
2980 | struct dlm_lock *lock; | 2966 | struct dlm_lock *lock; |
2981 | int nodenum; | 2967 | int nodenum; |
2982 | 2968 | ||
@@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | |||
2984 | 2970 | ||
2985 | spin_lock(&res->spinlock); | 2971 | spin_lock(&res->spinlock); |
2986 | for (i=0; i<3; i++) { | 2972 | for (i=0; i<3; i++) { |
2987 | list_for_each(iter, queue) { | 2973 | list_for_each_entry(lock, queue, list) { |
2988 | /* up to the caller to make sure this node | 2974 | /* up to the caller to make sure this node |
2989 | * is alive */ | 2975 | * is alive */ |
2990 | lock = list_entry (iter, struct dlm_lock, list); | ||
2991 | if (lock->ml.node != dlm->node_num) { | 2976 | if (lock->ml.node != dlm->node_num) { |
2992 | spin_unlock(&res->spinlock); | 2977 | spin_unlock(&res->spinlock); |
2993 | return lock->ml.node; | 2978 | return lock->ml.node; |
@@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
3234 | 3219 | ||
3235 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) | 3220 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) |
3236 | { | 3221 | { |
3237 | struct list_head *iter, *iter2; | 3222 | struct dlm_master_list_entry *mle, *next; |
3238 | struct dlm_master_list_entry *mle; | ||
3239 | struct dlm_lock_resource *res; | 3223 | struct dlm_lock_resource *res; |
3240 | unsigned int hash; | 3224 | unsigned int hash; |
3241 | 3225 | ||
@@ -3245,9 +3229,7 @@ top: | |||
3245 | 3229 | ||
3246 | /* clean the master list */ | 3230 | /* clean the master list */ |
3247 | spin_lock(&dlm->master_lock); | 3231 | spin_lock(&dlm->master_lock); |
3248 | list_for_each_safe(iter, iter2, &dlm->master_list) { | 3232 | list_for_each_entry_safe(mle, next, &dlm->master_list, list) { |
3249 | mle = list_entry(iter, struct dlm_master_list_entry, list); | ||
3250 | |||
3251 | BUG_ON(mle->type != DLM_MLE_BLOCK && | 3233 | BUG_ON(mle->type != DLM_MLE_BLOCK && |
3252 | mle->type != DLM_MLE_MASTER && | 3234 | mle->type != DLM_MLE_MASTER && |
3253 | mle->type != DLM_MLE_MIGRATION); | 3235 | mle->type != DLM_MLE_MIGRATION); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 671c4ed58ee2..a2c33160bfd6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work) | |||
158 | struct dlm_ctxt *dlm = | 158 | struct dlm_ctxt *dlm = |
159 | container_of(work, struct dlm_ctxt, dispatched_work); | 159 | container_of(work, struct dlm_ctxt, dispatched_work); |
160 | LIST_HEAD(tmp_list); | 160 | LIST_HEAD(tmp_list); |
161 | struct list_head *iter, *iter2; | 161 | struct dlm_work_item *item, *next; |
162 | struct dlm_work_item *item; | ||
163 | dlm_workfunc_t *workfunc; | 162 | dlm_workfunc_t *workfunc; |
164 | int tot=0; | 163 | int tot=0; |
165 | 164 | ||
@@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work) | |||
167 | list_splice_init(&dlm->work_list, &tmp_list); | 166 | list_splice_init(&dlm->work_list, &tmp_list); |
168 | spin_unlock(&dlm->work_lock); | 167 | spin_unlock(&dlm->work_lock); |
169 | 168 | ||
170 | list_for_each_safe(iter, iter2, &tmp_list) { | 169 | list_for_each_entry(item, &tmp_list, list) { |
171 | tot++; | 170 | tot++; |
172 | } | 171 | } |
173 | mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); | 172 | mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); |
174 | 173 | ||
175 | list_for_each_safe(iter, iter2, &tmp_list) { | 174 | list_for_each_entry_safe(item, next, &tmp_list, list) { |
176 | item = list_entry(iter, struct dlm_work_item, list); | ||
177 | workfunc = item->func; | 175 | workfunc = item->func; |
178 | list_del_init(&item->list); | 176 | list_del_init(&item->list); |
179 | 177 | ||
@@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
549 | { | 547 | { |
550 | int status = 0; | 548 | int status = 0; |
551 | struct dlm_reco_node_data *ndata; | 549 | struct dlm_reco_node_data *ndata; |
552 | struct list_head *iter; | ||
553 | int all_nodes_done; | 550 | int all_nodes_done; |
554 | int destroy = 0; | 551 | int destroy = 0; |
555 | int pass = 0; | 552 | int pass = 0; |
@@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
567 | 564 | ||
568 | /* safe to access the node data list without a lock, since this | 565 | /* safe to access the node data list without a lock, since this |
569 | * process is the only one to change the list */ | 566 | * process is the only one to change the list */ |
570 | list_for_each(iter, &dlm->reco.node_data) { | 567 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
571 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
572 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); | 568 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); |
573 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; | 569 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; |
574 | 570 | ||
@@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
655 | * done, or if anyone died */ | 651 | * done, or if anyone died */ |
656 | all_nodes_done = 1; | 652 | all_nodes_done = 1; |
657 | spin_lock(&dlm_reco_state_lock); | 653 | spin_lock(&dlm_reco_state_lock); |
658 | list_for_each(iter, &dlm->reco.node_data) { | 654 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
659 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
660 | |||
661 | mlog(0, "checking recovery state of node %u\n", | 655 | mlog(0, "checking recovery state of node %u\n", |
662 | ndata->node_num); | 656 | ndata->node_num); |
663 | switch (ndata->state) { | 657 | switch (ndata->state) { |
@@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | |||
774 | 768 | ||
775 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | 769 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) |
776 | { | 770 | { |
777 | struct list_head *iter, *iter2; | 771 | struct dlm_reco_node_data *ndata, *next; |
778 | struct dlm_reco_node_data *ndata; | ||
779 | LIST_HEAD(tmplist); | 772 | LIST_HEAD(tmplist); |
780 | 773 | ||
781 | spin_lock(&dlm_reco_state_lock); | 774 | spin_lock(&dlm_reco_state_lock); |
782 | list_splice_init(&dlm->reco.node_data, &tmplist); | 775 | list_splice_init(&dlm->reco.node_data, &tmplist); |
783 | spin_unlock(&dlm_reco_state_lock); | 776 | spin_unlock(&dlm_reco_state_lock); |
784 | 777 | ||
785 | list_for_each_safe(iter, iter2, &tmplist) { | 778 | list_for_each_entry_safe(ndata, next, &tmplist, list) { |
786 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
787 | list_del_init(&ndata->list); | 779 | list_del_init(&ndata->list); |
788 | kfree(ndata); | 780 | kfree(ndata); |
789 | } | 781 | } |
@@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) | |||
876 | struct dlm_lock_resource *res; | 868 | struct dlm_lock_resource *res; |
877 | struct dlm_ctxt *dlm; | 869 | struct dlm_ctxt *dlm; |
878 | LIST_HEAD(resources); | 870 | LIST_HEAD(resources); |
879 | struct list_head *iter; | ||
880 | int ret; | 871 | int ret; |
881 | u8 dead_node, reco_master; | 872 | u8 dead_node, reco_master; |
882 | int skip_all_done = 0; | 873 | int skip_all_done = 0; |
@@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) | |||
920 | 911 | ||
921 | /* any errors returned will be due to the new_master dying, | 912 | /* any errors returned will be due to the new_master dying, |
922 | * the dlm_reco_thread should detect this */ | 913 | * the dlm_reco_thread should detect this */ |
923 | list_for_each(iter, &resources) { | 914 | list_for_each_entry(res, &resources, recovering) { |
924 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
925 | ret = dlm_send_one_lockres(dlm, res, mres, reco_master, | 915 | ret = dlm_send_one_lockres(dlm, res, mres, reco_master, |
926 | DLM_MRES_RECOVERY); | 916 | DLM_MRES_RECOVERY); |
927 | if (ret < 0) { | 917 | if (ret < 0) { |
@@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
983 | { | 973 | { |
984 | struct dlm_ctxt *dlm = data; | 974 | struct dlm_ctxt *dlm = data; |
985 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; | 975 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; |
986 | struct list_head *iter; | ||
987 | struct dlm_reco_node_data *ndata = NULL; | 976 | struct dlm_reco_node_data *ndata = NULL; |
988 | int ret = -EINVAL; | 977 | int ret = -EINVAL; |
989 | 978 | ||
@@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1000 | dlm->reco.dead_node, done->node_idx, dlm->node_num); | 989 | dlm->reco.dead_node, done->node_idx, dlm->node_num); |
1001 | 990 | ||
1002 | spin_lock(&dlm_reco_state_lock); | 991 | spin_lock(&dlm_reco_state_lock); |
1003 | list_for_each(iter, &dlm->reco.node_data) { | 992 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
1004 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
1005 | if (ndata->node_num != done->node_idx) | 993 | if (ndata->node_num != done->node_idx) |
1006 | continue; | 994 | continue; |
1007 | 995 | ||
@@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, | |||
1049 | struct list_head *list, | 1037 | struct list_head *list, |
1050 | u8 dead_node) | 1038 | u8 dead_node) |
1051 | { | 1039 | { |
1052 | struct dlm_lock_resource *res; | 1040 | struct dlm_lock_resource *res, *next; |
1053 | struct list_head *iter, *iter2; | ||
1054 | struct dlm_lock *lock; | 1041 | struct dlm_lock *lock; |
1055 | 1042 | ||
1056 | spin_lock(&dlm->spinlock); | 1043 | spin_lock(&dlm->spinlock); |
1057 | list_for_each_safe(iter, iter2, &dlm->reco.resources) { | 1044 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { |
1058 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
1059 | /* always prune any $RECOVERY entries for dead nodes, | 1045 | /* always prune any $RECOVERY entries for dead nodes, |
1060 | * otherwise hangs can occur during later recovery */ | 1046 | * otherwise hangs can occur during later recovery */ |
1061 | if (dlm_is_recovery_lock(res->lockname.name, | 1047 | if (dlm_is_recovery_lock(res->lockname.name, |
@@ -1169,7 +1155,7 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, | |||
1169 | u8 flags, u8 master) | 1155 | u8 flags, u8 master) |
1170 | { | 1156 | { |
1171 | /* mres here is one full page */ | 1157 | /* mres here is one full page */ |
1172 | memset(mres, 0, PAGE_SIZE); | 1158 | clear_page(mres); |
1173 | mres->lockname_len = namelen; | 1159 | mres->lockname_len = namelen; |
1174 | memcpy(mres->lockname, lockname, namelen); | 1160 | memcpy(mres->lockname, lockname, namelen); |
1175 | mres->num_locks = 0; | 1161 | mres->num_locks = 0; |
@@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1252 | struct dlm_migratable_lockres *mres, | 1238 | struct dlm_migratable_lockres *mres, |
1253 | u8 send_to, u8 flags) | 1239 | u8 send_to, u8 flags) |
1254 | { | 1240 | { |
1255 | struct list_head *queue, *iter; | 1241 | struct list_head *queue; |
1256 | int total_locks, i; | 1242 | int total_locks, i; |
1257 | u64 mig_cookie = 0; | 1243 | u64 mig_cookie = 0; |
1258 | struct dlm_lock *lock; | 1244 | struct dlm_lock *lock; |
@@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1278 | total_locks = 0; | 1264 | total_locks = 0; |
1279 | for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { | 1265 | for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { |
1280 | queue = dlm_list_idx_to_ptr(res, i); | 1266 | queue = dlm_list_idx_to_ptr(res, i); |
1281 | list_for_each(iter, queue) { | 1267 | list_for_each_entry(lock, queue, list) { |
1282 | lock = list_entry (iter, struct dlm_lock, list); | ||
1283 | |||
1284 | /* add another lock. */ | 1268 | /* add another lock. */ |
1285 | total_locks++; | 1269 | total_locks++; |
1286 | if (!dlm_add_lock_to_array(lock, mres, i)) | 1270 | if (!dlm_add_lock_to_array(lock, mres, i)) |
@@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1717 | struct dlm_lockstatus *lksb = NULL; | 1701 | struct dlm_lockstatus *lksb = NULL; |
1718 | int ret = 0; | 1702 | int ret = 0; |
1719 | int i, j, bad; | 1703 | int i, j, bad; |
1720 | struct list_head *iter; | ||
1721 | struct dlm_lock *lock = NULL; | 1704 | struct dlm_lock *lock = NULL; |
1722 | u8 from = O2NM_MAX_NODES; | 1705 | u8 from = O2NM_MAX_NODES; |
1723 | unsigned int added = 0; | 1706 | unsigned int added = 0; |
@@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1755 | spin_lock(&res->spinlock); | 1738 | spin_lock(&res->spinlock); |
1756 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { | 1739 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
1757 | tmpq = dlm_list_idx_to_ptr(res, j); | 1740 | tmpq = dlm_list_idx_to_ptr(res, j); |
1758 | list_for_each(iter, tmpq) { | 1741 | list_for_each_entry(lock, tmpq, list) { |
1759 | lock = list_entry (iter, struct dlm_lock, list); | ||
1760 | if (lock->ml.cookie != ml->cookie) | 1742 | if (lock->ml.cookie != ml->cookie) |
1761 | lock = NULL; | 1743 | lock = NULL; |
1762 | else | 1744 | else |
@@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
1930 | struct dlm_lock_resource *res) | 1912 | struct dlm_lock_resource *res) |
1931 | { | 1913 | { |
1932 | int i; | 1914 | int i; |
1933 | struct list_head *queue, *iter, *iter2; | 1915 | struct list_head *queue; |
1934 | struct dlm_lock *lock; | 1916 | struct dlm_lock *lock, *next; |
1935 | 1917 | ||
1936 | res->state |= DLM_LOCK_RES_RECOVERING; | 1918 | res->state |= DLM_LOCK_RES_RECOVERING; |
1937 | if (!list_empty(&res->recovering)) { | 1919 | if (!list_empty(&res->recovering)) { |
@@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
1947 | /* find any pending locks and put them back on proper list */ | 1929 | /* find any pending locks and put them back on proper list */ |
1948 | for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { | 1930 | for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { |
1949 | queue = dlm_list_idx_to_ptr(res, i); | 1931 | queue = dlm_list_idx_to_ptr(res, i); |
1950 | list_for_each_safe(iter, iter2, queue) { | 1932 | list_for_each_entry_safe(lock, next, queue, list) { |
1951 | lock = list_entry (iter, struct dlm_lock, list); | ||
1952 | dlm_lock_get(lock); | 1933 | dlm_lock_get(lock); |
1953 | if (lock->convert_pending) { | 1934 | if (lock->convert_pending) { |
1954 | /* move converting lock back to granted */ | 1935 | /* move converting lock back to granted */ |
@@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
2013 | u8 dead_node, u8 new_master) | 1994 | u8 dead_node, u8 new_master) |
2014 | { | 1995 | { |
2015 | int i; | 1996 | int i; |
2016 | struct list_head *iter, *iter2; | ||
2017 | struct hlist_node *hash_iter; | 1997 | struct hlist_node *hash_iter; |
2018 | struct hlist_head *bucket; | 1998 | struct hlist_head *bucket; |
2019 | 1999 | struct dlm_lock_resource *res, *next; | |
2020 | struct dlm_lock_resource *res; | ||
2021 | 2000 | ||
2022 | mlog_entry_void(); | 2001 | mlog_entry_void(); |
2023 | 2002 | ||
2024 | assert_spin_locked(&dlm->spinlock); | 2003 | assert_spin_locked(&dlm->spinlock); |
2025 | 2004 | ||
2026 | list_for_each_safe(iter, iter2, &dlm->reco.resources) { | 2005 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { |
2027 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
2028 | if (res->owner == dead_node) { | 2006 | if (res->owner == dead_node) { |
2029 | list_del_init(&res->recovering); | 2007 | list_del_init(&res->recovering); |
2030 | spin_lock(&res->spinlock); | 2008 | spin_lock(&res->spinlock); |
@@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local) | |||
2099 | static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | 2077 | static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, |
2100 | struct dlm_lock_resource *res, u8 dead_node) | 2078 | struct dlm_lock_resource *res, u8 dead_node) |
2101 | { | 2079 | { |
2102 | struct list_head *iter, *queue; | 2080 | struct list_head *queue; |
2103 | struct dlm_lock *lock; | 2081 | struct dlm_lock *lock; |
2104 | int blank_lvb = 0, local = 0; | 2082 | int blank_lvb = 0, local = 0; |
2105 | int i; | 2083 | int i; |
@@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | |||
2121 | 2099 | ||
2122 | for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { | 2100 | for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { |
2123 | queue = dlm_list_idx_to_ptr(res, i); | 2101 | queue = dlm_list_idx_to_ptr(res, i); |
2124 | list_for_each(iter, queue) { | 2102 | list_for_each_entry(lock, queue, list) { |
2125 | lock = list_entry (iter, struct dlm_lock, list); | ||
2126 | if (lock->ml.node == search_node) { | 2103 | if (lock->ml.node == search_node) { |
2127 | if (dlm_lvb_needs_invalidation(lock, local)) { | 2104 | if (dlm_lvb_needs_invalidation(lock, local)) { |
2128 | /* zero the lksb lvb and lockres lvb */ | 2105 | /* zero the lksb lvb and lockres lvb */ |
@@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | |||
2143 | static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | 2120 | static void dlm_free_dead_locks(struct dlm_ctxt *dlm, |
2144 | struct dlm_lock_resource *res, u8 dead_node) | 2121 | struct dlm_lock_resource *res, u8 dead_node) |
2145 | { | 2122 | { |
2146 | struct list_head *iter, *tmpiter; | 2123 | struct dlm_lock *lock, *next; |
2147 | struct dlm_lock *lock; | ||
2148 | unsigned int freed = 0; | 2124 | unsigned int freed = 0; |
2149 | 2125 | ||
2150 | /* this node is the lockres master: | 2126 | /* this node is the lockres master: |
@@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2155 | assert_spin_locked(&res->spinlock); | 2131 | assert_spin_locked(&res->spinlock); |
2156 | 2132 | ||
2157 | /* TODO: check pending_asts, pending_basts here */ | 2133 | /* TODO: check pending_asts, pending_basts here */ |
2158 | list_for_each_safe(iter, tmpiter, &res->granted) { | 2134 | list_for_each_entry_safe(lock, next, &res->granted, list) { |
2159 | lock = list_entry (iter, struct dlm_lock, list); | ||
2160 | if (lock->ml.node == dead_node) { | 2135 | if (lock->ml.node == dead_node) { |
2161 | list_del_init(&lock->list); | 2136 | list_del_init(&lock->list); |
2162 | dlm_lock_put(lock); | 2137 | dlm_lock_put(lock); |
2163 | freed++; | 2138 | freed++; |
2164 | } | 2139 | } |
2165 | } | 2140 | } |
2166 | list_for_each_safe(iter, tmpiter, &res->converting) { | 2141 | list_for_each_entry_safe(lock, next, &res->converting, list) { |
2167 | lock = list_entry (iter, struct dlm_lock, list); | ||
2168 | if (lock->ml.node == dead_node) { | 2142 | if (lock->ml.node == dead_node) { |
2169 | list_del_init(&lock->list); | 2143 | list_del_init(&lock->list); |
2170 | dlm_lock_put(lock); | 2144 | dlm_lock_put(lock); |
2171 | freed++; | 2145 | freed++; |
2172 | } | 2146 | } |
2173 | } | 2147 | } |
2174 | list_for_each_safe(iter, tmpiter, &res->blocked) { | 2148 | list_for_each_entry_safe(lock, next, &res->blocked, list) { |
2175 | lock = list_entry (iter, struct dlm_lock, list); | ||
2176 | if (lock->ml.node == dead_node) { | 2149 | if (lock->ml.node == dead_node) { |
2177 | list_del_init(&lock->list); | 2150 | list_del_init(&lock->list); |
2178 | dlm_lock_put(lock); | 2151 | dlm_lock_put(lock); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index d1bd305ef0d7..f71250ed166f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level) | |||
600 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 600 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, |
601 | unsigned long newflags) | 601 | unsigned long newflags) |
602 | { | 602 | { |
603 | struct list_head *pos, *tmp; | 603 | struct ocfs2_mask_waiter *mw, *tmp; |
604 | struct ocfs2_mask_waiter *mw; | ||
605 | 604 | ||
606 | assert_spin_locked(&lockres->l_lock); | 605 | assert_spin_locked(&lockres->l_lock); |
607 | 606 | ||
608 | lockres->l_flags = newflags; | 607 | lockres->l_flags = newflags; |
609 | 608 | ||
610 | list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) { | 609 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { |
611 | mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item); | ||
612 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 610 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) |
613 | continue; | 611 | continue; |
614 | 612 | ||
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h index f226b2207628..ff257628af16 100644 --- a/fs/ocfs2/endian.h +++ b/fs/ocfs2/endian.h | |||
@@ -32,6 +32,11 @@ static inline void le32_add_cpu(__le32 *var, u32 val) | |||
32 | *var = cpu_to_le32(le32_to_cpu(*var) + val); | 32 | *var = cpu_to_le32(le32_to_cpu(*var) + val); |
33 | } | 33 | } |
34 | 34 | ||
35 | static inline void le64_add_cpu(__le64 *var, u64 val) | ||
36 | { | ||
37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | ||
38 | } | ||
39 | |||
35 | static inline void le32_and_cpu(__le32 *var, u32 val) | 40 | static inline void le32_and_cpu(__le32 *var, u32 val) |
36 | { | 41 | { |
37 | *var = cpu_to_le32(le32_to_cpu(*var) & val); | 42 | *var = cpu_to_le32(le32_to_cpu(*var) & val); |
diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index 5b77ee7866ef..e08bed9e45a0 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #ifndef OCFS2_EXPORT_H | 26 | #ifndef OCFS2_EXPORT_H |
27 | #define OCFS2_EXPORT_H | 27 | #define OCFS2_EXPORT_H |
28 | 28 | ||
29 | #include <linux/exportfs.h> | ||
30 | |||
29 | extern struct export_operations ocfs2_export_ops; | 31 | extern struct export_operations ocfs2_export_ops; |
30 | 32 | ||
31 | #endif /* OCFS2_EXPORT_H */ | 33 | #endif /* OCFS2_EXPORT_H */ |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index ba2b2ab1c6e4..03c1d365c78b 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, | |||
109 | */ | 109 | */ |
110 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) | 110 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) |
111 | { | 111 | { |
112 | struct list_head *p, *n; | 112 | struct ocfs2_extent_map_item *emi, *n; |
113 | struct ocfs2_extent_map_item *emi; | ||
114 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 113 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
115 | struct ocfs2_extent_map *em = &oi->ip_extent_map; | 114 | struct ocfs2_extent_map *em = &oi->ip_extent_map; |
116 | LIST_HEAD(tmp_list); | 115 | LIST_HEAD(tmp_list); |
117 | unsigned int range; | 116 | unsigned int range; |
118 | 117 | ||
119 | spin_lock(&oi->ip_lock); | 118 | spin_lock(&oi->ip_lock); |
120 | list_for_each_safe(p, n, &em->em_list) { | 119 | list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { |
121 | emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); | ||
122 | |||
123 | if (emi->ei_cpos >= cpos) { | 120 | if (emi->ei_cpos >= cpos) { |
124 | /* Full truncate of this record. */ | 121 | /* Full truncate of this record. */ |
125 | list_move(&emi->ei_list, &tmp_list); | 122 | list_move(&emi->ei_list, &tmp_list); |
@@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) | |||
136 | } | 133 | } |
137 | spin_unlock(&oi->ip_lock); | 134 | spin_unlock(&oi->ip_lock); |
138 | 135 | ||
139 | list_for_each_safe(p, n, &tmp_list) { | 136 | list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { |
140 | emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); | ||
141 | list_del(&emi->ei_list); | 137 | list_del(&emi->ei_list); |
142 | kfree(emi); | 138 | kfree(emi); |
143 | } | 139 | } |
@@ -377,37 +373,6 @@ out: | |||
377 | return ret; | 373 | return ret; |
378 | } | 374 | } |
379 | 375 | ||
380 | /* | ||
381 | * Return the index of the extent record which contains cluster #v_cluster. | ||
382 | * -1 is returned if it was not found. | ||
383 | * | ||
384 | * Should work fine on interior and exterior nodes. | ||
385 | */ | ||
386 | static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, | ||
387 | u32 v_cluster) | ||
388 | { | ||
389 | int ret = -1; | ||
390 | int i; | ||
391 | struct ocfs2_extent_rec *rec; | ||
392 | u32 rec_end, rec_start, clusters; | ||
393 | |||
394 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
395 | rec = &el->l_recs[i]; | ||
396 | |||
397 | rec_start = le32_to_cpu(rec->e_cpos); | ||
398 | clusters = ocfs2_rec_clusters(el, rec); | ||
399 | |||
400 | rec_end = rec_start + clusters; | ||
401 | |||
402 | if (v_cluster >= rec_start && v_cluster < rec_end) { | ||
403 | ret = i; | ||
404 | break; | ||
405 | } | ||
406 | } | ||
407 | |||
408 | return ret; | ||
409 | } | ||
410 | |||
411 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, |
412 | u32 *p_cluster, u32 *num_clusters, | 377 | u32 *p_cluster, u32 *num_clusters, |
413 | unsigned int *extent_flags) | 378 | unsigned int *extent_flags) |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4979b6675717..f04c7aa834cb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -263,6 +263,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
263 | int status; | 263 | int status; |
264 | handle_t *handle; | 264 | handle_t *handle; |
265 | struct ocfs2_dinode *di; | 265 | struct ocfs2_dinode *di; |
266 | u64 cluster_bytes; | ||
266 | 267 | ||
267 | mlog_entry_void(); | 268 | mlog_entry_void(); |
268 | 269 | ||
@@ -286,7 +287,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
286 | /* | 287 | /* |
287 | * Do this before setting i_size. | 288 | * Do this before setting i_size. |
288 | */ | 289 | */ |
289 | status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size); | 290 | cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); |
291 | status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, | ||
292 | cluster_bytes); | ||
290 | if (status) { | 293 | if (status) { |
291 | mlog_errno(status); | 294 | mlog_errno(status); |
292 | goto out_commit; | 295 | goto out_commit; |
@@ -326,9 +329,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
326 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 329 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
327 | (unsigned long long)new_i_size); | 330 | (unsigned long long)new_i_size); |
328 | 331 | ||
329 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
330 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
331 | |||
332 | fe = (struct ocfs2_dinode *) di_bh->b_data; | 332 | fe = (struct ocfs2_dinode *) di_bh->b_data; |
333 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 333 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
334 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 334 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); |
@@ -363,16 +363,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
363 | if (new_i_size == le64_to_cpu(fe->i_size)) | 363 | if (new_i_size == le64_to_cpu(fe->i_size)) |
364 | goto bail; | 364 | goto bail; |
365 | 365 | ||
366 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
367 | |||
366 | /* This forces other nodes to sync and drop their pages. Do | 368 | /* This forces other nodes to sync and drop their pages. Do |
367 | * this even if we have a truncate without allocation change - | 369 | * this even if we have a truncate without allocation change - |
368 | * ocfs2 cluster sizes can be much greater than page size, so | 370 | * ocfs2 cluster sizes can be much greater than page size, so |
369 | * we have to truncate them anyway. */ | 371 | * we have to truncate them anyway. */ |
370 | status = ocfs2_data_lock(inode, 1); | 372 | status = ocfs2_data_lock(inode, 1); |
371 | if (status < 0) { | 373 | if (status < 0) { |
374 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
375 | |||
372 | mlog_errno(status); | 376 | mlog_errno(status); |
373 | goto bail; | 377 | goto bail; |
374 | } | 378 | } |
375 | 379 | ||
380 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
381 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
382 | |||
376 | /* alright, we're going to need to do a full blown alloc size | 383 | /* alright, we're going to need to do a full blown alloc size |
377 | * change. Orphan the inode so that recovery can complete the | 384 | * change. Orphan the inode so that recovery can complete the |
378 | * truncate if necessary. This does the task of marking | 385 | * truncate if necessary. This does the task of marking |
@@ -399,6 +406,8 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
399 | bail_unlock_data: | 406 | bail_unlock_data: |
400 | ocfs2_data_unlock(inode, 1); | 407 | ocfs2_data_unlock(inode, 1); |
401 | 408 | ||
409 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
410 | |||
402 | bail: | 411 | bail: |
403 | 412 | ||
404 | mlog_exit(status); | 413 | mlog_exit(status); |
@@ -419,6 +428,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
419 | struct inode *inode, | 428 | struct inode *inode, |
420 | u32 *logical_offset, | 429 | u32 *logical_offset, |
421 | u32 clusters_to_add, | 430 | u32 clusters_to_add, |
431 | int mark_unwritten, | ||
422 | struct buffer_head *fe_bh, | 432 | struct buffer_head *fe_bh, |
423 | handle_t *handle, | 433 | handle_t *handle, |
424 | struct ocfs2_alloc_context *data_ac, | 434 | struct ocfs2_alloc_context *data_ac, |
@@ -431,9 +441,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
431 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | 441 | enum ocfs2_alloc_restarted reason = RESTART_NONE; |
432 | u32 bit_off, num_bits; | 442 | u32 bit_off, num_bits; |
433 | u64 block; | 443 | u64 block; |
444 | u8 flags = 0; | ||
434 | 445 | ||
435 | BUG_ON(!clusters_to_add); | 446 | BUG_ON(!clusters_to_add); |
436 | 447 | ||
448 | if (mark_unwritten) | ||
449 | flags = OCFS2_EXT_UNWRITTEN; | ||
450 | |||
437 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | 451 | free_extents = ocfs2_num_free_extents(osb, inode, fe); |
438 | if (free_extents < 0) { | 452 | if (free_extents < 0) { |
439 | status = free_extents; | 453 | status = free_extents; |
@@ -483,7 +497,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
483 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 497 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
484 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | 498 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, |
485 | *logical_offset, block, num_bits, | 499 | *logical_offset, block, num_bits, |
486 | meta_ac); | 500 | flags, meta_ac); |
487 | if (status < 0) { | 501 | if (status < 0) { |
488 | mlog_errno(status); | 502 | mlog_errno(status); |
489 | goto leave; | 503 | goto leave; |
@@ -516,25 +530,31 @@ leave: | |||
516 | * For a given allocation, determine which allocators will need to be | 530 | * For a given allocation, determine which allocators will need to be |
517 | * accessed, and lock them, reserving the appropriate number of bits. | 531 | * accessed, and lock them, reserving the appropriate number of bits. |
518 | * | 532 | * |
519 | * Called from ocfs2_extend_allocation() for file systems which don't | 533 | * Sparse file systems call this from ocfs2_write_begin_nolock() |
520 | * support holes, and from ocfs2_write() for file systems which | 534 | * and ocfs2_allocate_unwritten_extents(). |
521 | * understand sparse inodes. | 535 | * |
536 | * File systems which don't support holes call this from | ||
537 | * ocfs2_extend_allocation(). | ||
522 | */ | 538 | */ |
523 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | 539 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, |
524 | u32 clusters_to_add, | 540 | u32 clusters_to_add, u32 extents_to_split, |
525 | struct ocfs2_alloc_context **data_ac, | 541 | struct ocfs2_alloc_context **data_ac, |
526 | struct ocfs2_alloc_context **meta_ac) | 542 | struct ocfs2_alloc_context **meta_ac) |
527 | { | 543 | { |
528 | int ret, num_free_extents; | 544 | int ret = 0, num_free_extents; |
545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
529 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
530 | 547 | ||
531 | *meta_ac = NULL; | 548 | *meta_ac = NULL; |
532 | *data_ac = NULL; | 549 | if (data_ac) |
550 | *data_ac = NULL; | ||
551 | |||
552 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
533 | 553 | ||
534 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 554 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
535 | "clusters_to_add = %u\n", | 555 | "clusters_to_add = %u, extents_to_split = %u\n", |
536 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 556 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), |
537 | le32_to_cpu(di->i_clusters), clusters_to_add); | 557 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); |
538 | 558 | ||
539 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 559 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); |
540 | if (num_free_extents < 0) { | 560 | if (num_free_extents < 0) { |
@@ -552,9 +572,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
552 | * | 572 | * |
553 | * Most of the time we'll only be seeing this 1 cluster at a time | 573 | * Most of the time we'll only be seeing this 1 cluster at a time |
554 | * anyway. | 574 | * anyway. |
575 | * | ||
576 | * Always lock for any unwritten extents - we might want to | ||
577 | * add blocks during a split. | ||
555 | */ | 578 | */ |
556 | if (!num_free_extents || | 579 | if (!num_free_extents || |
557 | (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { | 580 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { |
558 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | 581 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); |
559 | if (ret < 0) { | 582 | if (ret < 0) { |
560 | if (ret != -ENOSPC) | 583 | if (ret != -ENOSPC) |
@@ -563,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
563 | } | 586 | } |
564 | } | 587 | } |
565 | 588 | ||
589 | if (clusters_to_add == 0) | ||
590 | goto out; | ||
591 | |||
566 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | 592 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); |
567 | if (ret < 0) { | 593 | if (ret < 0) { |
568 | if (ret != -ENOSPC) | 594 | if (ret != -ENOSPC) |
@@ -585,14 +611,13 @@ out: | |||
585 | return ret; | 611 | return ret; |
586 | } | 612 | } |
587 | 613 | ||
588 | static int ocfs2_extend_allocation(struct inode *inode, | 614 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, |
589 | u32 clusters_to_add) | 615 | u32 clusters_to_add, int mark_unwritten) |
590 | { | 616 | { |
591 | int status = 0; | 617 | int status = 0; |
592 | int restart_func = 0; | 618 | int restart_func = 0; |
593 | int drop_alloc_sem = 0; | ||
594 | int credits; | 619 | int credits; |
595 | u32 prev_clusters, logical_start; | 620 | u32 prev_clusters; |
596 | struct buffer_head *bh = NULL; | 621 | struct buffer_head *bh = NULL; |
597 | struct ocfs2_dinode *fe = NULL; | 622 | struct ocfs2_dinode *fe = NULL; |
598 | handle_t *handle = NULL; | 623 | handle_t *handle = NULL; |
@@ -607,7 +632,7 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
607 | * This function only exists for file systems which don't | 632 | * This function only exists for file systems which don't |
608 | * support holes. | 633 | * support holes. |
609 | */ | 634 | */ |
610 | BUG_ON(ocfs2_sparse_alloc(osb)); | 635 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
611 | 636 | ||
612 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 637 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, |
613 | OCFS2_BH_CACHED, inode); | 638 | OCFS2_BH_CACHED, inode); |
@@ -623,19 +648,10 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
623 | goto leave; | 648 | goto leave; |
624 | } | 649 | } |
625 | 650 | ||
626 | logical_start = OCFS2_I(inode)->ip_clusters; | ||
627 | |||
628 | restart_all: | 651 | restart_all: |
629 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 652 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
630 | 653 | ||
631 | /* blocks peope in read/write from reading our allocation | 654 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, |
632 | * until we're done changing it. We depend on i_mutex to block | ||
633 | * other extend/truncate calls while we're here. Ordering wrt | ||
634 | * start_trans is important here -- always do it before! */ | ||
635 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
636 | drop_alloc_sem = 1; | ||
637 | |||
638 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, | ||
639 | &meta_ac); | 655 | &meta_ac); |
640 | if (status) { | 656 | if (status) { |
641 | mlog_errno(status); | 657 | mlog_errno(status); |
@@ -668,6 +684,7 @@ restarted_transaction: | |||
668 | inode, | 684 | inode, |
669 | &logical_start, | 685 | &logical_start, |
670 | clusters_to_add, | 686 | clusters_to_add, |
687 | mark_unwritten, | ||
671 | bh, | 688 | bh, |
672 | handle, | 689 | handle, |
673 | data_ac, | 690 | data_ac, |
@@ -720,10 +737,6 @@ restarted_transaction: | |||
720 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 737 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); |
721 | 738 | ||
722 | leave: | 739 | leave: |
723 | if (drop_alloc_sem) { | ||
724 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
725 | drop_alloc_sem = 0; | ||
726 | } | ||
727 | if (handle) { | 740 | if (handle) { |
728 | ocfs2_commit_trans(osb, handle); | 741 | ocfs2_commit_trans(osb, handle); |
729 | handle = NULL; | 742 | handle = NULL; |
@@ -749,6 +762,25 @@ leave: | |||
749 | return status; | 762 | return status; |
750 | } | 763 | } |
751 | 764 | ||
765 | static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
766 | u32 clusters_to_add, int mark_unwritten) | ||
767 | { | ||
768 | int ret; | ||
769 | |||
770 | /* | ||
771 | * The alloc sem blocks peope in read/write from reading our | ||
772 | * allocation until we're done changing it. We depend on | ||
773 | * i_mutex to block other extend/truncate calls while we're | ||
774 | * here. | ||
775 | */ | ||
776 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
777 | ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add, | ||
778 | mark_unwritten); | ||
779 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
780 | |||
781 | return ret; | ||
782 | } | ||
783 | |||
752 | /* Some parts of this taken from generic_cont_expand, which turned out | 784 | /* Some parts of this taken from generic_cont_expand, which turned out |
753 | * to be too fragile to do exactly what we need without us having to | 785 | * to be too fragile to do exactly what we need without us having to |
754 | * worry about recursive locking in ->prepare_write() and | 786 | * worry about recursive locking in ->prepare_write() and |
@@ -890,7 +922,9 @@ static int ocfs2_extend_file(struct inode *inode, | |||
890 | } | 922 | } |
891 | 923 | ||
892 | if (clusters_to_add) { | 924 | if (clusters_to_add) { |
893 | ret = ocfs2_extend_allocation(inode, clusters_to_add); | 925 | ret = ocfs2_extend_allocation(inode, |
926 | OCFS2_I(inode)->ip_clusters, | ||
927 | clusters_to_add, 0); | ||
894 | if (ret < 0) { | 928 | if (ret < 0) { |
895 | mlog_errno(ret); | 929 | mlog_errno(ret); |
896 | goto out_unlock; | 930 | goto out_unlock; |
@@ -995,6 +1029,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
995 | goto bail_unlock; | 1029 | goto bail_unlock; |
996 | } | 1030 | } |
997 | 1031 | ||
1032 | /* | ||
1033 | * This will intentionally not wind up calling vmtruncate(), | ||
1034 | * since all the work for a size change has been done above. | ||
1035 | * Otherwise, we could get into problems with truncate as | ||
1036 | * ip_alloc_sem is used there to protect against i_size | ||
1037 | * changes. | ||
1038 | */ | ||
998 | status = inode_setattr(inode, attr); | 1039 | status = inode_setattr(inode, attr); |
999 | if (status < 0) { | 1040 | if (status < 0) { |
1000 | mlog_errno(status); | 1041 | mlog_errno(status); |
@@ -1070,17 +1111,16 @@ out: | |||
1070 | return ret; | 1111 | return ret; |
1071 | } | 1112 | } |
1072 | 1113 | ||
1073 | static int ocfs2_write_remove_suid(struct inode *inode) | 1114 | static int __ocfs2_write_remove_suid(struct inode *inode, |
1115 | struct buffer_head *bh) | ||
1074 | { | 1116 | { |
1075 | int ret; | 1117 | int ret; |
1076 | struct buffer_head *bh = NULL; | ||
1077 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1078 | handle_t *handle; | 1118 | handle_t *handle; |
1079 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1119 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1080 | struct ocfs2_dinode *di; | 1120 | struct ocfs2_dinode *di; |
1081 | 1121 | ||
1082 | mlog_entry("(Inode %llu, mode 0%o)\n", | 1122 | mlog_entry("(Inode %llu, mode 0%o)\n", |
1083 | (unsigned long long)oi->ip_blkno, inode->i_mode); | 1123 | (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); |
1084 | 1124 | ||
1085 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1125 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1086 | if (handle == NULL) { | 1126 | if (handle == NULL) { |
@@ -1089,17 +1129,11 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1089 | goto out; | 1129 | goto out; |
1090 | } | 1130 | } |
1091 | 1131 | ||
1092 | ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1093 | if (ret < 0) { | ||
1094 | mlog_errno(ret); | ||
1095 | goto out_trans; | ||
1096 | } | ||
1097 | |||
1098 | ret = ocfs2_journal_access(handle, inode, bh, | 1132 | ret = ocfs2_journal_access(handle, inode, bh, |
1099 | OCFS2_JOURNAL_ACCESS_WRITE); | 1133 | OCFS2_JOURNAL_ACCESS_WRITE); |
1100 | if (ret < 0) { | 1134 | if (ret < 0) { |
1101 | mlog_errno(ret); | 1135 | mlog_errno(ret); |
1102 | goto out_bh; | 1136 | goto out_trans; |
1103 | } | 1137 | } |
1104 | 1138 | ||
1105 | inode->i_mode &= ~S_ISUID; | 1139 | inode->i_mode &= ~S_ISUID; |
@@ -1112,8 +1146,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1112 | ret = ocfs2_journal_dirty(handle, bh); | 1146 | ret = ocfs2_journal_dirty(handle, bh); |
1113 | if (ret < 0) | 1147 | if (ret < 0) |
1114 | mlog_errno(ret); | 1148 | mlog_errno(ret); |
1115 | out_bh: | 1149 | |
1116 | brelse(bh); | ||
1117 | out_trans: | 1150 | out_trans: |
1118 | ocfs2_commit_trans(osb, handle); | 1151 | ocfs2_commit_trans(osb, handle); |
1119 | out: | 1152 | out: |
@@ -1159,6 +1192,460 @@ out: | |||
1159 | return ret; | 1192 | return ret; |
1160 | } | 1193 | } |
1161 | 1194 | ||
1195 | static int ocfs2_write_remove_suid(struct inode *inode) | ||
1196 | { | ||
1197 | int ret; | ||
1198 | struct buffer_head *bh = NULL; | ||
1199 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1200 | |||
1201 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
1202 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1203 | if (ret < 0) { | ||
1204 | mlog_errno(ret); | ||
1205 | goto out; | ||
1206 | } | ||
1207 | |||
1208 | ret = __ocfs2_write_remove_suid(inode, bh); | ||
1209 | out: | ||
1210 | brelse(bh); | ||
1211 | return ret; | ||
1212 | } | ||
1213 | |||
1214 | /* | ||
1215 | * Allocate enough extents to cover the region starting at byte offset | ||
1216 | * start for len bytes. Existing extents are skipped, any extents | ||
1217 | * added are marked as "unwritten". | ||
1218 | */ | ||
1219 | static int ocfs2_allocate_unwritten_extents(struct inode *inode, | ||
1220 | u64 start, u64 len) | ||
1221 | { | ||
1222 | int ret; | ||
1223 | u32 cpos, phys_cpos, clusters, alloc_size; | ||
1224 | |||
1225 | /* | ||
1226 | * We consider both start and len to be inclusive. | ||
1227 | */ | ||
1228 | cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
1229 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); | ||
1230 | clusters -= cpos; | ||
1231 | |||
1232 | while (clusters) { | ||
1233 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
1234 | &alloc_size, NULL); | ||
1235 | if (ret) { | ||
1236 | mlog_errno(ret); | ||
1237 | goto out; | ||
1238 | } | ||
1239 | |||
1240 | /* | ||
1241 | * Hole or existing extent len can be arbitrary, so | ||
1242 | * cap it to our own allocation request. | ||
1243 | */ | ||
1244 | if (alloc_size > clusters) | ||
1245 | alloc_size = clusters; | ||
1246 | |||
1247 | if (phys_cpos) { | ||
1248 | /* | ||
1249 | * We already have an allocation at this | ||
1250 | * region so we can safely skip it. | ||
1251 | */ | ||
1252 | goto next; | ||
1253 | } | ||
1254 | |||
1255 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); | ||
1256 | if (ret) { | ||
1257 | if (ret != -ENOSPC) | ||
1258 | mlog_errno(ret); | ||
1259 | goto out; | ||
1260 | } | ||
1261 | |||
1262 | next: | ||
1263 | cpos += alloc_size; | ||
1264 | clusters -= alloc_size; | ||
1265 | } | ||
1266 | |||
1267 | ret = 0; | ||
1268 | out: | ||
1269 | return ret; | ||
1270 | } | ||
1271 | |||
1272 | static int __ocfs2_remove_inode_range(struct inode *inode, | ||
1273 | struct buffer_head *di_bh, | ||
1274 | u32 cpos, u32 phys_cpos, u32 len, | ||
1275 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
1276 | { | ||
1277 | int ret; | ||
1278 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
1279 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1280 | struct inode *tl_inode = osb->osb_tl_inode; | ||
1281 | handle_t *handle; | ||
1282 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1283 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1284 | |||
1285 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||
1286 | if (ret) { | ||
1287 | mlog_errno(ret); | ||
1288 | return ret; | ||
1289 | } | ||
1290 | |||
1291 | mutex_lock(&tl_inode->i_mutex); | ||
1292 | |||
1293 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
1294 | ret = __ocfs2_flush_truncate_log(osb); | ||
1295 | if (ret < 0) { | ||
1296 | mlog_errno(ret); | ||
1297 | goto out; | ||
1298 | } | ||
1299 | } | ||
1300 | |||
1301 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
1302 | if (handle == NULL) { | ||
1303 | ret = -ENOMEM; | ||
1304 | mlog_errno(ret); | ||
1305 | goto out; | ||
1306 | } | ||
1307 | |||
1308 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1309 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1310 | if (ret) { | ||
1311 | mlog_errno(ret); | ||
1312 | goto out; | ||
1313 | } | ||
1314 | |||
1315 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||
1316 | dealloc); | ||
1317 | if (ret) { | ||
1318 | mlog_errno(ret); | ||
1319 | goto out_commit; | ||
1320 | } | ||
1321 | |||
1322 | OCFS2_I(inode)->ip_clusters -= len; | ||
1323 | di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||
1324 | |||
1325 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1326 | if (ret) { | ||
1327 | mlog_errno(ret); | ||
1328 | goto out_commit; | ||
1329 | } | ||
1330 | |||
1331 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
1332 | if (ret) | ||
1333 | mlog_errno(ret); | ||
1334 | |||
1335 | out_commit: | ||
1336 | ocfs2_commit_trans(osb, handle); | ||
1337 | out: | ||
1338 | mutex_unlock(&tl_inode->i_mutex); | ||
1339 | |||
1340 | if (meta_ac) | ||
1341 | ocfs2_free_alloc_context(meta_ac); | ||
1342 | |||
1343 | return ret; | ||
1344 | } | ||
1345 | |||
1346 | /* | ||
1347 | * Truncate a byte range, avoiding pages within partial clusters. This | ||
1348 | * preserves those pages for the zeroing code to write to. | ||
1349 | */ | ||
1350 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||
1351 | u64 byte_len) | ||
1352 | { | ||
1353 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1354 | loff_t start, end; | ||
1355 | struct address_space *mapping = inode->i_mapping; | ||
1356 | |||
1357 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||
1358 | end = byte_start + byte_len; | ||
1359 | end = end & ~(osb->s_clustersize - 1); | ||
1360 | |||
1361 | if (start < end) { | ||
1362 | unmap_mapping_range(mapping, start, end - start, 0); | ||
1363 | truncate_inode_pages_range(mapping, start, end - 1); | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | static int ocfs2_zero_partial_clusters(struct inode *inode, | ||
1368 | u64 start, u64 len) | ||
1369 | { | ||
1370 | int ret = 0; | ||
1371 | u64 tmpend, end = start + len; | ||
1372 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1373 | unsigned int csize = osb->s_clustersize; | ||
1374 | handle_t *handle; | ||
1375 | |||
1376 | /* | ||
1377 | * The "start" and "end" values are NOT necessarily part of | ||
1378 | * the range whose allocation is being deleted. Rather, this | ||
1379 | * is what the user passed in with the request. We must zero | ||
1380 | * partial clusters here. There's no need to worry about | ||
1381 | * physical allocation - the zeroing code knows to skip holes. | ||
1382 | */ | ||
1383 | mlog(0, "byte start: %llu, end: %llu\n", | ||
1384 | (unsigned long long)start, (unsigned long long)end); | ||
1385 | |||
1386 | /* | ||
1387 | * If both edges are on a cluster boundary then there's no | ||
1388 | * zeroing required as the region is part of the allocation to | ||
1389 | * be truncated. | ||
1390 | */ | ||
1391 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||
1392 | goto out; | ||
1393 | |||
1394 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1395 | if (handle == NULL) { | ||
1396 | ret = -ENOMEM; | ||
1397 | mlog_errno(ret); | ||
1398 | goto out; | ||
1399 | } | ||
1400 | |||
1401 | /* | ||
1402 | * We want to get the byte offset of the end of the 1st cluster. | ||
1403 | */ | ||
1404 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||
1405 | if (tmpend > end) | ||
1406 | tmpend = end; | ||
1407 | |||
1408 | mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||
1409 | (unsigned long long)start, (unsigned long long)tmpend); | ||
1410 | |||
1411 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||
1412 | if (ret) | ||
1413 | mlog_errno(ret); | ||
1414 | |||
1415 | if (tmpend < end) { | ||
1416 | /* | ||
1417 | * This may make start and end equal, but the zeroing | ||
1418 | * code will skip any work in that case so there's no | ||
1419 | * need to catch it up here. | ||
1420 | */ | ||
1421 | start = end & ~(osb->s_clustersize - 1); | ||
1422 | |||
1423 | mlog(0, "2nd range: start: %llu, end: %llu\n", | ||
1424 | (unsigned long long)start, (unsigned long long)end); | ||
1425 | |||
1426 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||
1427 | if (ret) | ||
1428 | mlog_errno(ret); | ||
1429 | } | ||
1430 | |||
1431 | ocfs2_commit_trans(osb, handle); | ||
1432 | out: | ||
1433 | return ret; | ||
1434 | } | ||
1435 | |||
1436 | static int ocfs2_remove_inode_range(struct inode *inode, | ||
1437 | struct buffer_head *di_bh, u64 byte_start, | ||
1438 | u64 byte_len) | ||
1439 | { | ||
1440 | int ret = 0; | ||
1441 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||
1442 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1443 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
1444 | |||
1445 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
1446 | |||
1447 | if (byte_len == 0) | ||
1448 | return 0; | ||
1449 | |||
1450 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||
1451 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||
1452 | if (trunc_len >= trunc_start) | ||
1453 | trunc_len -= trunc_start; | ||
1454 | else | ||
1455 | trunc_len = 0; | ||
1456 | |||
1457 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||
1458 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1459 | (unsigned long long)byte_start, | ||
1460 | (unsigned long long)byte_len, trunc_start, trunc_len); | ||
1461 | |||
1462 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||
1463 | if (ret) { | ||
1464 | mlog_errno(ret); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | |||
1468 | cpos = trunc_start; | ||
1469 | while (trunc_len) { | ||
1470 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
1471 | &alloc_size, NULL); | ||
1472 | if (ret) { | ||
1473 | mlog_errno(ret); | ||
1474 | goto out; | ||
1475 | } | ||
1476 | |||
1477 | if (alloc_size > trunc_len) | ||
1478 | alloc_size = trunc_len; | ||
1479 | |||
1480 | /* Only do work for non-holes */ | ||
1481 | if (phys_cpos != 0) { | ||
1482 | ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||
1483 | phys_cpos, alloc_size, | ||
1484 | &dealloc); | ||
1485 | if (ret) { | ||
1486 | mlog_errno(ret); | ||
1487 | goto out; | ||
1488 | } | ||
1489 | } | ||
1490 | |||
1491 | cpos += alloc_size; | ||
1492 | trunc_len -= alloc_size; | ||
1493 | } | ||
1494 | |||
1495 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||
1496 | |||
1497 | out: | ||
1498 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
1499 | ocfs2_run_deallocs(osb, &dealloc); | ||
1500 | |||
1501 | return ret; | ||
1502 | } | ||
1503 | |||
1504 | /* | ||
1505 | * Parts of this function taken from xfs_change_file_space() | ||
1506 | */ | ||
1507 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | ||
1508 | struct ocfs2_space_resv *sr) | ||
1509 | { | ||
1510 | int ret; | ||
1511 | s64 llen; | ||
1512 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1513 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1514 | struct buffer_head *di_bh = NULL; | ||
1515 | handle_t *handle; | ||
1516 | unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); | ||
1517 | |||
1518 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && | ||
1519 | !ocfs2_writes_unwritten_extents(osb)) | ||
1520 | return -ENOTTY; | ||
1521 | else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && | ||
1522 | !ocfs2_sparse_alloc(osb)) | ||
1523 | return -ENOTTY; | ||
1524 | |||
1525 | if (!S_ISREG(inode->i_mode)) | ||
1526 | return -EINVAL; | ||
1527 | |||
1528 | if (!(file->f_mode & FMODE_WRITE)) | ||
1529 | return -EBADF; | ||
1530 | |||
1531 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
1532 | return -EROFS; | ||
1533 | |||
1534 | mutex_lock(&inode->i_mutex); | ||
1535 | |||
1536 | /* | ||
1537 | * This prevents concurrent writes on other nodes | ||
1538 | */ | ||
1539 | ret = ocfs2_rw_lock(inode, 1); | ||
1540 | if (ret) { | ||
1541 | mlog_errno(ret); | ||
1542 | goto out; | ||
1543 | } | ||
1544 | |||
1545 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
1546 | if (ret) { | ||
1547 | mlog_errno(ret); | ||
1548 | goto out_rw_unlock; | ||
1549 | } | ||
1550 | |||
1551 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
1552 | ret = -EPERM; | ||
1553 | goto out_meta_unlock; | ||
1554 | } | ||
1555 | |||
1556 | switch (sr->l_whence) { | ||
1557 | case 0: /*SEEK_SET*/ | ||
1558 | break; | ||
1559 | case 1: /*SEEK_CUR*/ | ||
1560 | sr->l_start += file->f_pos; | ||
1561 | break; | ||
1562 | case 2: /*SEEK_END*/ | ||
1563 | sr->l_start += i_size_read(inode); | ||
1564 | break; | ||
1565 | default: | ||
1566 | ret = -EINVAL; | ||
1567 | goto out_meta_unlock; | ||
1568 | } | ||
1569 | sr->l_whence = 0; | ||
1570 | |||
1571 | llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; | ||
1572 | |||
1573 | if (sr->l_start < 0 | ||
1574 | || sr->l_start > max_off | ||
1575 | || (sr->l_start + llen) < 0 | ||
1576 | || (sr->l_start + llen) > max_off) { | ||
1577 | ret = -EINVAL; | ||
1578 | goto out_meta_unlock; | ||
1579 | } | ||
1580 | |||
1581 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | ||
1582 | if (sr->l_len <= 0) { | ||
1583 | ret = -EINVAL; | ||
1584 | goto out_meta_unlock; | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | if (should_remove_suid(file->f_path.dentry)) { | ||
1589 | ret = __ocfs2_write_remove_suid(inode, di_bh); | ||
1590 | if (ret) { | ||
1591 | mlog_errno(ret); | ||
1592 | goto out_meta_unlock; | ||
1593 | } | ||
1594 | } | ||
1595 | |||
1596 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1597 | switch (cmd) { | ||
1598 | case OCFS2_IOC_RESVSP: | ||
1599 | case OCFS2_IOC_RESVSP64: | ||
1600 | /* | ||
1601 | * This takes unsigned offsets, but the signed ones we | ||
1602 | * pass have been checked against overflow above. | ||
1603 | */ | ||
1604 | ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, | ||
1605 | sr->l_len); | ||
1606 | break; | ||
1607 | case OCFS2_IOC_UNRESVSP: | ||
1608 | case OCFS2_IOC_UNRESVSP64: | ||
1609 | ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, | ||
1610 | sr->l_len); | ||
1611 | break; | ||
1612 | default: | ||
1613 | ret = -EINVAL; | ||
1614 | } | ||
1615 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1616 | if (ret) { | ||
1617 | mlog_errno(ret); | ||
1618 | goto out_meta_unlock; | ||
1619 | } | ||
1620 | |||
1621 | /* | ||
1622 | * We update c/mtime for these changes | ||
1623 | */ | ||
1624 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1625 | if (IS_ERR(handle)) { | ||
1626 | ret = PTR_ERR(handle); | ||
1627 | mlog_errno(ret); | ||
1628 | goto out_meta_unlock; | ||
1629 | } | ||
1630 | |||
1631 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
1632 | ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); | ||
1633 | if (ret < 0) | ||
1634 | mlog_errno(ret); | ||
1635 | |||
1636 | ocfs2_commit_trans(osb, handle); | ||
1637 | |||
1638 | out_meta_unlock: | ||
1639 | brelse(di_bh); | ||
1640 | ocfs2_meta_unlock(inode, 1); | ||
1641 | out_rw_unlock: | ||
1642 | ocfs2_rw_unlock(inode, 1); | ||
1643 | |||
1644 | mutex_unlock(&inode->i_mutex); | ||
1645 | out: | ||
1646 | return ret; | ||
1647 | } | ||
1648 | |||
1162 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1649 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
1163 | loff_t *ppos, | 1650 | loff_t *ppos, |
1164 | size_t count, | 1651 | size_t count, |
@@ -1329,15 +1816,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) | |||
1329 | *basep = base; | 1816 | *basep = base; |
1330 | } | 1817 | } |
1331 | 1818 | ||
1332 | static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, | 1819 | static struct page * ocfs2_get_write_source(char **ret_src_buf, |
1333 | const struct iovec *cur_iov, | 1820 | const struct iovec *cur_iov, |
1334 | size_t iov_offset) | 1821 | size_t iov_offset) |
1335 | { | 1822 | { |
1336 | int ret; | 1823 | int ret; |
1337 | char *buf; | 1824 | char *buf = cur_iov->iov_base + iov_offset; |
1338 | struct page *src_page = NULL; | 1825 | struct page *src_page = NULL; |
1826 | unsigned long off; | ||
1339 | 1827 | ||
1340 | buf = cur_iov->iov_base + iov_offset; | 1828 | off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; |
1341 | 1829 | ||
1342 | if (!segment_eq(get_fs(), KERNEL_DS)) { | 1830 | if (!segment_eq(get_fs(), KERNEL_DS)) { |
1343 | /* | 1831 | /* |
@@ -1349,18 +1837,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp | |||
1349 | (unsigned long)buf & PAGE_CACHE_MASK, 1, | 1837 | (unsigned long)buf & PAGE_CACHE_MASK, 1, |
1350 | 0, 0, &src_page, NULL); | 1838 | 0, 0, &src_page, NULL); |
1351 | if (ret == 1) | 1839 | if (ret == 1) |
1352 | bp->b_src_buf = kmap(src_page); | 1840 | *ret_src_buf = kmap(src_page) + off; |
1353 | else | 1841 | else |
1354 | src_page = ERR_PTR(-EFAULT); | 1842 | src_page = ERR_PTR(-EFAULT); |
1355 | } else { | 1843 | } else { |
1356 | bp->b_src_buf = buf; | 1844 | *ret_src_buf = buf; |
1357 | } | 1845 | } |
1358 | 1846 | ||
1359 | return src_page; | 1847 | return src_page; |
1360 | } | 1848 | } |
1361 | 1849 | ||
1362 | static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp, | 1850 | static void ocfs2_put_write_source(struct page *page) |
1363 | struct page *page) | ||
1364 | { | 1851 | { |
1365 | if (page) { | 1852 | if (page) { |
1366 | kunmap(page); | 1853 | kunmap(page); |
@@ -1376,10 +1863,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1376 | { | 1863 | { |
1377 | int ret = 0; | 1864 | int ret = 0; |
1378 | ssize_t copied, total = 0; | 1865 | ssize_t copied, total = 0; |
1379 | size_t iov_offset = 0; | 1866 | size_t iov_offset = 0, bytes; |
1867 | loff_t pos; | ||
1380 | const struct iovec *cur_iov = iov; | 1868 | const struct iovec *cur_iov = iov; |
1381 | struct ocfs2_buffered_write_priv bp; | 1869 | struct page *user_page, *page; |
1382 | struct page *page; | 1870 | char *buf, *dst; |
1871 | void *fsdata; | ||
1383 | 1872 | ||
1384 | /* | 1873 | /* |
1385 | * handle partial DIO write. Adjust cur_iov if needed. | 1874 | * handle partial DIO write. Adjust cur_iov if needed. |
@@ -1387,21 +1876,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1387 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); | 1876 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); |
1388 | 1877 | ||
1389 | do { | 1878 | do { |
1390 | bp.b_cur_off = iov_offset; | 1879 | pos = *ppos; |
1391 | bp.b_cur_iov = cur_iov; | ||
1392 | 1880 | ||
1393 | page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); | 1881 | user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); |
1394 | if (IS_ERR(page)) { | 1882 | if (IS_ERR(user_page)) { |
1395 | ret = PTR_ERR(page); | 1883 | ret = PTR_ERR(user_page); |
1396 | goto out; | 1884 | goto out; |
1397 | } | 1885 | } |
1398 | 1886 | ||
1399 | copied = ocfs2_buffered_write_cluster(file, *ppos, count, | 1887 | /* Stay within our page boundaries */ |
1400 | ocfs2_map_and_write_user_data, | 1888 | bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), |
1401 | &bp); | 1889 | (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); |
1890 | /* Stay within the vector boundary */ | ||
1891 | bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); | ||
1892 | /* Stay within count */ | ||
1893 | bytes = min(bytes, count); | ||
1894 | |||
1895 | page = NULL; | ||
1896 | ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, | ||
1897 | &page, &fsdata); | ||
1898 | if (ret) { | ||
1899 | mlog_errno(ret); | ||
1900 | goto out; | ||
1901 | } | ||
1402 | 1902 | ||
1403 | ocfs2_put_write_source(&bp, page); | 1903 | dst = kmap_atomic(page, KM_USER0); |
1904 | memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); | ||
1905 | kunmap_atomic(dst, KM_USER0); | ||
1906 | flush_dcache_page(page); | ||
1907 | ocfs2_put_write_source(user_page); | ||
1404 | 1908 | ||
1909 | copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, | ||
1910 | bytes, page, fsdata); | ||
1405 | if (copied < 0) { | 1911 | if (copied < 0) { |
1406 | mlog_errno(copied); | 1912 | mlog_errno(copied); |
1407 | ret = copied; | 1913 | ret = copied; |
@@ -1409,7 +1915,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1409 | } | 1915 | } |
1410 | 1916 | ||
1411 | total += copied; | 1917 | total += copied; |
1412 | *ppos = *ppos + copied; | 1918 | *ppos = pos + copied; |
1413 | count -= copied; | 1919 | count -= copied; |
1414 | 1920 | ||
1415 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); | 1921 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); |
@@ -1579,52 +2085,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, | |||
1579 | struct pipe_buffer *buf, | 2085 | struct pipe_buffer *buf, |
1580 | struct splice_desc *sd) | 2086 | struct splice_desc *sd) |
1581 | { | 2087 | { |
1582 | int ret, count, total = 0; | 2088 | int ret, count; |
1583 | ssize_t copied = 0; | 2089 | ssize_t copied = 0; |
1584 | struct ocfs2_splice_write_priv sp; | 2090 | struct file *file = sd->u.file; |
2091 | unsigned int offset; | ||
2092 | struct page *page = NULL; | ||
2093 | void *fsdata; | ||
2094 | char *src, *dst; | ||
1585 | 2095 | ||
1586 | ret = buf->ops->confirm(pipe, buf); | 2096 | ret = buf->ops->confirm(pipe, buf); |
1587 | if (ret) | 2097 | if (ret) |
1588 | goto out; | 2098 | goto out; |
1589 | 2099 | ||
1590 | sp.s_sd = sd; | 2100 | offset = sd->pos & ~PAGE_CACHE_MASK; |
1591 | sp.s_buf = buf; | ||
1592 | sp.s_pipe = pipe; | ||
1593 | sp.s_offset = sd->pos & ~PAGE_CACHE_MASK; | ||
1594 | sp.s_buf_offset = buf->offset; | ||
1595 | |||
1596 | count = sd->len; | 2101 | count = sd->len; |
1597 | if (count + sp.s_offset > PAGE_CACHE_SIZE) | 2102 | if (count + offset > PAGE_CACHE_SIZE) |
1598 | count = PAGE_CACHE_SIZE - sp.s_offset; | 2103 | count = PAGE_CACHE_SIZE - offset; |
1599 | 2104 | ||
1600 | do { | 2105 | ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, |
1601 | /* | 2106 | &page, &fsdata); |
1602 | * splice wants us to copy up to one page at a | 2107 | if (ret) { |
1603 | * time. For pagesize > cluster size, this means we | 2108 | mlog_errno(ret); |
1604 | * might enter ocfs2_buffered_write_cluster() more | 2109 | goto out; |
1605 | * than once, so keep track of our progress here. | 2110 | } |
1606 | */ | ||
1607 | copied = ocfs2_buffered_write_cluster(sd->u.file, | ||
1608 | (loff_t)sd->pos + total, | ||
1609 | count, | ||
1610 | ocfs2_map_and_write_splice_data, | ||
1611 | &sp); | ||
1612 | if (copied < 0) { | ||
1613 | mlog_errno(copied); | ||
1614 | ret = copied; | ||
1615 | goto out; | ||
1616 | } | ||
1617 | 2111 | ||
1618 | count -= copied; | 2112 | src = buf->ops->map(pipe, buf, 1); |
1619 | sp.s_offset += copied; | 2113 | dst = kmap_atomic(page, KM_USER1); |
1620 | sp.s_buf_offset += copied; | 2114 | memcpy(dst + offset, src + buf->offset, count); |
1621 | total += copied; | 2115 | kunmap_atomic(page, KM_USER1); |
1622 | } while (count); | 2116 | buf->ops->unmap(pipe, buf, src); |
1623 | 2117 | ||
1624 | ret = 0; | 2118 | copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, |
2119 | page, fsdata); | ||
2120 | if (copied < 0) { | ||
2121 | mlog_errno(copied); | ||
2122 | ret = copied; | ||
2123 | goto out; | ||
2124 | } | ||
1625 | out: | 2125 | out: |
1626 | 2126 | ||
1627 | return total ? total : ret; | 2127 | return copied ? copied : ret; |
1628 | } | 2128 | } |
1629 | 2129 | ||
1630 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 2130 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index a4dd1fa1822b..36fe27f268ee 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -39,15 +39,16 @@ enum ocfs2_alloc_restarted { | |||
39 | }; | 39 | }; |
40 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 40 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, |
41 | struct inode *inode, | 41 | struct inode *inode, |
42 | u32 *cluster_start, | 42 | u32 *logical_offset, |
43 | u32 clusters_to_add, | 43 | u32 clusters_to_add, |
44 | int mark_unwritten, | ||
44 | struct buffer_head *fe_bh, | 45 | struct buffer_head *fe_bh, |
45 | handle_t *handle, | 46 | handle_t *handle, |
46 | struct ocfs2_alloc_context *data_ac, | 47 | struct ocfs2_alloc_context *data_ac, |
47 | struct ocfs2_alloc_context *meta_ac, | 48 | struct ocfs2_alloc_context *meta_ac, |
48 | enum ocfs2_alloc_restarted *reason); | 49 | enum ocfs2_alloc_restarted *reason_ret); |
49 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | 50 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, |
50 | u32 clusters_to_add, | 51 | u32 clusters_to_add, u32 extents_to_split, |
51 | struct ocfs2_alloc_context **data_ac, | 52 | struct ocfs2_alloc_context **data_ac, |
52 | struct ocfs2_alloc_context **meta_ac); | 53 | struct ocfs2_alloc_context **meta_ac); |
53 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 54 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
@@ -61,4 +62,7 @@ int ocfs2_should_update_atime(struct inode *inode, | |||
61 | int ocfs2_update_inode_atime(struct inode *inode, | 62 | int ocfs2_update_inode_atime(struct inode *inode, |
62 | struct buffer_head *bh); | 63 | struct buffer_head *bh); |
63 | 64 | ||
65 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | ||
66 | struct ocfs2_space_resv *sr); | ||
67 | |||
64 | #endif /* OCFS2_FILE_H */ | 68 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index b25ef63781ba..352eb4a13f98 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) | |||
157 | if (ocfs2_mount_local(osb)) | 157 | if (ocfs2_mount_local(osb)) |
158 | return 0; | 158 | return 0; |
159 | 159 | ||
160 | status = o2hb_register_callback(&osb->osb_hb_down); | 160 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); |
161 | if (status < 0) { | 161 | if (status < 0) { |
162 | mlog_errno(status); | 162 | mlog_errno(status); |
163 | goto bail; | 163 | goto bail; |
164 | } | 164 | } |
165 | 165 | ||
166 | status = o2hb_register_callback(&osb->osb_hb_up); | 166 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); |
167 | if (status < 0) { | 167 | if (status < 0) { |
168 | mlog_errno(status); | 168 | mlog_errno(status); |
169 | o2hb_unregister_callback(&osb->osb_hb_down); | 169 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); |
170 | } | 170 | } |
171 | 171 | ||
172 | bail: | 172 | bail: |
@@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) | |||
178 | if (ocfs2_mount_local(osb)) | 178 | if (ocfs2_mount_local(osb)) |
179 | return; | 179 | return; |
180 | 180 | ||
181 | o2hb_unregister_callback(&osb->osb_hb_down); | 181 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); |
182 | o2hb_unregister_callback(&osb->osb_hb_up); | 182 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); |
183 | } | 183 | } |
184 | 184 | ||
185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index f3ad21ad9aed..bd68c3f2afbe 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include "ocfs2.h" | 14 | #include "ocfs2.h" |
15 | #include "alloc.h" | 15 | #include "alloc.h" |
16 | #include "dlmglue.h" | 16 | #include "dlmglue.h" |
17 | #include "file.h" | ||
17 | #include "inode.h" | 18 | #include "inode.h" |
18 | #include "journal.h" | 19 | #include "journal.h" |
19 | 20 | ||
@@ -115,6 +116,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
115 | { | 116 | { |
116 | unsigned int flags; | 117 | unsigned int flags; |
117 | int status; | 118 | int status; |
119 | struct ocfs2_space_resv sr; | ||
118 | 120 | ||
119 | switch (cmd) { | 121 | switch (cmd) { |
120 | case OCFS2_IOC_GETFLAGS: | 122 | case OCFS2_IOC_GETFLAGS: |
@@ -130,6 +132,14 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
130 | 132 | ||
131 | return ocfs2_set_inode_attr(inode, flags, | 133 | return ocfs2_set_inode_attr(inode, flags, |
132 | OCFS2_FL_MODIFIABLE); | 134 | OCFS2_FL_MODIFIABLE); |
135 | case OCFS2_IOC_RESVSP: | ||
136 | case OCFS2_IOC_RESVSP64: | ||
137 | case OCFS2_IOC_UNRESVSP: | ||
138 | case OCFS2_IOC_UNRESVSP64: | ||
139 | if (copy_from_user(&sr, (int __user *) arg, sizeof(sr))) | ||
140 | return -EFAULT; | ||
141 | |||
142 | return ocfs2_change_file_space(filp, cmd, &sr); | ||
133 | default: | 143 | default: |
134 | return -ENOTTY; | 144 | return -ENOTTY; |
135 | } | 145 | } |
@@ -148,6 +158,11 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
148 | case OCFS2_IOC32_SETFLAGS: | 158 | case OCFS2_IOC32_SETFLAGS: |
149 | cmd = OCFS2_IOC_SETFLAGS; | 159 | cmd = OCFS2_IOC_SETFLAGS; |
150 | break; | 160 | break; |
161 | case OCFS2_IOC_RESVSP: | ||
162 | case OCFS2_IOC_RESVSP64: | ||
163 | case OCFS2_IOC_UNRESVSP: | ||
164 | case OCFS2_IOC_UNRESVSP64: | ||
165 | break; | ||
151 | default: | 166 | default: |
152 | return -ENOIOCTLCMD; | 167 | return -ENOIOCTLCMD; |
153 | } | 168 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index dc1188081720..dbfb20bb27ea 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
722 | container_of(work, struct ocfs2_journal, j_recovery_work); | 722 | container_of(work, struct ocfs2_journal, j_recovery_work); |
723 | struct ocfs2_super *osb = journal->j_osb; | 723 | struct ocfs2_super *osb = journal->j_osb; |
724 | struct ocfs2_dinode *la_dinode, *tl_dinode; | 724 | struct ocfs2_dinode *la_dinode, *tl_dinode; |
725 | struct ocfs2_la_recovery_item *item; | 725 | struct ocfs2_la_recovery_item *item, *n; |
726 | struct list_head *p, *n; | ||
727 | LIST_HEAD(tmp_la_list); | 726 | LIST_HEAD(tmp_la_list); |
728 | 727 | ||
729 | mlog_entry_void(); | 728 | mlog_entry_void(); |
@@ -734,8 +733,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
734 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); | 733 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); |
735 | spin_unlock(&journal->j_lock); | 734 | spin_unlock(&journal->j_lock); |
736 | 735 | ||
737 | list_for_each_safe(p, n, &tmp_la_list) { | 736 | list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { |
738 | item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); | ||
739 | list_del_init(&item->lri_list); | 737 | list_del_init(&item->lri_list); |
740 | 738 | ||
741 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); | 739 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3db5de4506da..ce60aab013aa 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ | 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ |
290 | + OCFS2_TRUNCATE_LOG_UPDATE) | 290 | + OCFS2_TRUNCATE_LOG_UPDATE) |
291 | 291 | ||
292 | #define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) | ||
293 | |||
292 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 294 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
293 | * bitmap block for the new bit) */ | 295 | * bitmap block for the new bit) */ |
294 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 296 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af01158b39f5..d79aa12137d2 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -37,11 +37,29 @@ | |||
37 | 37 | ||
38 | #include "ocfs2.h" | 38 | #include "ocfs2.h" |
39 | 39 | ||
40 | #include "aops.h" | ||
40 | #include "dlmglue.h" | 41 | #include "dlmglue.h" |
41 | #include "file.h" | 42 | #include "file.h" |
42 | #include "inode.h" | 43 | #include "inode.h" |
43 | #include "mmap.h" | 44 | #include "mmap.h" |
44 | 45 | ||
46 | static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) | ||
47 | { | ||
48 | /* The best way to deal with signals in the vm path is | ||
49 | * to block them upfront, rather than allowing the | ||
50 | * locking paths to return -ERESTARTSYS. */ | ||
51 | sigfillset(blocked); | ||
52 | |||
53 | /* We should technically never get a bad return value | ||
54 | * from sigprocmask */ | ||
55 | return sigprocmask(SIG_BLOCK, blocked, oldset); | ||
56 | } | ||
57 | |||
58 | static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) | ||
59 | { | ||
60 | return sigprocmask(SIG_SETMASK, oldset, NULL); | ||
61 | } | ||
62 | |||
45 | static struct page *ocfs2_nopage(struct vm_area_struct * area, | 63 | static struct page *ocfs2_nopage(struct vm_area_struct * area, |
46 | unsigned long address, | 64 | unsigned long address, |
47 | int *type) | 65 | int *type) |
@@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
53 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, | 71 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, |
54 | type); | 72 | type); |
55 | 73 | ||
56 | /* The best way to deal with signals in this path is | 74 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); |
57 | * to block them upfront, rather than allowing the | ||
58 | * locking paths to return -ERESTARTSYS. */ | ||
59 | sigfillset(&blocked); | ||
60 | |||
61 | /* We should technically never get a bad ret return | ||
62 | * from sigprocmask */ | ||
63 | ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); | ||
64 | if (ret < 0) { | 75 | if (ret < 0) { |
65 | mlog_errno(ret); | 76 | mlog_errno(ret); |
66 | goto out; | 77 | goto out; |
@@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
68 | 79 | ||
69 | page = filemap_nopage(area, address, type); | 80 | page = filemap_nopage(area, address, type); |
70 | 81 | ||
71 | ret = sigprocmask(SIG_SETMASK, &oldset, NULL); | 82 | ret = ocfs2_vm_op_unblock_sigs(&oldset); |
72 | if (ret < 0) | 83 | if (ret < 0) |
73 | mlog_errno(ret); | 84 | mlog_errno(ret); |
74 | out: | 85 | out: |
@@ -76,28 +87,136 @@ out: | |||
76 | return page; | 87 | return page; |
77 | } | 88 | } |
78 | 89 | ||
79 | static struct vm_operations_struct ocfs2_file_vm_ops = { | 90 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, |
80 | .nopage = ocfs2_nopage, | 91 | struct page *page) |
81 | }; | 92 | { |
93 | int ret; | ||
94 | struct address_space *mapping = inode->i_mapping; | ||
95 | loff_t pos = page->index << PAGE_CACHE_SHIFT; | ||
96 | unsigned int len = PAGE_CACHE_SIZE; | ||
97 | pgoff_t last_index; | ||
98 | struct page *locked_page = NULL; | ||
99 | void *fsdata; | ||
100 | loff_t size = i_size_read(inode); | ||
82 | 101 | ||
83 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | 102 | /* |
103 | * Another node might have truncated while we were waiting on | ||
104 | * cluster locks. | ||
105 | */ | ||
106 | last_index = size >> PAGE_CACHE_SHIFT; | ||
107 | if (page->index > last_index) { | ||
108 | ret = -EINVAL; | ||
109 | goto out; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * The i_size check above doesn't catch the case where nodes | ||
114 | * truncated and then re-extended the file. We'll re-check the | ||
115 | * page mapping after taking the page lock inside of | ||
116 | * ocfs2_write_begin_nolock(). | ||
117 | */ | ||
118 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | ||
119 | ret = -EINVAL; | ||
120 | goto out; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * Call ocfs2_write_begin() and ocfs2_write_end() to take | ||
125 | * advantage of the allocation code there. We pass a write | ||
126 | * length of the whole page (chopped to i_size) to make sure | ||
127 | * the whole thing is allocated. | ||
128 | * | ||
129 | * Since we know the page is up to date, we don't have to | ||
130 | * worry about ocfs2_write_begin() skipping some buffer reads | ||
131 | * because the "write" would invalidate their data. | ||
132 | */ | ||
133 | if (page->index == last_index) | ||
134 | len = size & ~PAGE_CACHE_MASK; | ||
135 | |||
136 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | ||
137 | &fsdata, di_bh, page); | ||
138 | if (ret) { | ||
139 | if (ret != -ENOSPC) | ||
140 | mlog_errno(ret); | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | ||
145 | fsdata); | ||
146 | if (ret < 0) { | ||
147 | mlog_errno(ret); | ||
148 | goto out; | ||
149 | } | ||
150 | BUG_ON(ret != len); | ||
151 | ret = 0; | ||
152 | out: | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
84 | { | 157 | { |
85 | int ret = 0, lock_level = 0; | 158 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
86 | struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); | 159 | struct buffer_head *di_bh = NULL; |
160 | sigset_t blocked, oldset; | ||
161 | int ret, ret2; | ||
162 | |||
163 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); | ||
164 | if (ret < 0) { | ||
165 | mlog_errno(ret); | ||
166 | return ret; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * The cluster locks taken will block a truncate from another | ||
171 | * node. Taking the data lock will also ensure that we don't | ||
172 | * attempt page truncation as part of a downconvert. | ||
173 | */ | ||
174 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
175 | if (ret < 0) { | ||
176 | mlog_errno(ret); | ||
177 | goto out; | ||
178 | } | ||
87 | 179 | ||
88 | /* | 180 | /* |
89 | * Only support shared writeable mmap for local mounts which | 181 | * The alloc sem should be enough to serialize with |
90 | * don't know about holes. | 182 | * ocfs2_truncate_file() changing i_size as well as any thread |
183 | * modifying the inode btree. | ||
91 | */ | 184 | */ |
92 | if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && | 185 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
93 | ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && | 186 | |
94 | ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { | 187 | ret = ocfs2_data_lock(inode, 1); |
95 | mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); | 188 | if (ret < 0) { |
96 | /* This is -EINVAL because generic_file_readonly_mmap | 189 | mlog_errno(ret); |
97 | * returns it in a similar situation. */ | 190 | goto out_meta_unlock; |
98 | return -EINVAL; | ||
99 | } | 191 | } |
100 | 192 | ||
193 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | ||
194 | |||
195 | ocfs2_data_unlock(inode, 1); | ||
196 | |||
197 | out_meta_unlock: | ||
198 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
199 | |||
200 | brelse(di_bh); | ||
201 | ocfs2_meta_unlock(inode, 1); | ||
202 | |||
203 | out: | ||
204 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | ||
205 | if (ret2 < 0) | ||
206 | mlog_errno(ret2); | ||
207 | |||
208 | return ret; | ||
209 | } | ||
210 | |||
211 | static struct vm_operations_struct ocfs2_file_vm_ops = { | ||
212 | .nopage = ocfs2_nopage, | ||
213 | .page_mkwrite = ocfs2_page_mkwrite, | ||
214 | }; | ||
215 | |||
216 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | ||
217 | { | ||
218 | int ret = 0, lock_level = 0; | ||
219 | |||
101 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 220 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, |
102 | file->f_vfsmnt, &lock_level); | 221 | file->f_vfsmnt, &lock_level); |
103 | if (ret < 0) { | 222 | if (ret < 0) { |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 36289e6295ce..d430fdab16e9 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -1674,7 +1674,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1674 | u32 offset = 0; | 1674 | u32 offset = 0; |
1675 | 1675 | ||
1676 | inode->i_op = &ocfs2_symlink_inode_operations; | 1676 | inode->i_op = &ocfs2_symlink_inode_operations; |
1677 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, | 1677 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, |
1678 | new_fe_bh, | 1678 | new_fe_bh, |
1679 | handle, data_ac, NULL, | 1679 | handle, data_ac, NULL, |
1680 | NULL); | 1680 | NULL); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a860633e833f..5cc90a40b3c5 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -219,6 +219,7 @@ struct ocfs2_super | |||
219 | u16 max_slots; | 219 | u16 max_slots; |
220 | s16 node_num; | 220 | s16 node_num; |
221 | s16 slot_num; | 221 | s16 slot_num; |
222 | s16 preferred_slot; | ||
222 | int s_sectsize_bits; | 223 | int s_sectsize_bits; |
223 | int s_clustersize; | 224 | int s_clustersize; |
224 | int s_clustersize_bits; | 225 | int s_clustersize_bits; |
@@ -305,6 +306,19 @@ static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) | |||
305 | return 0; | 306 | return 0; |
306 | } | 307 | } |
307 | 308 | ||
309 | static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) | ||
310 | { | ||
311 | /* | ||
312 | * Support for sparse files is a pre-requisite | ||
313 | */ | ||
314 | if (!ocfs2_sparse_alloc(osb)) | ||
315 | return 0; | ||
316 | |||
317 | if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) | ||
318 | return 1; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
308 | /* set / clear functions because cluster events can make these happen | 322 | /* set / clear functions because cluster events can make these happen |
309 | * in parallel so we want the transitions to be atomic. this also | 323 | * in parallel so we want the transitions to be atomic. this also |
310 | * means that any future flags osb_flags must be protected by spinlock | 324 | * means that any future flags osb_flags must be protected by spinlock |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index f0d9eb08547a..82f8a75b207e 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -88,7 +88,7 @@ | |||
88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB | 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB |
89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ |
90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) | 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) |
91 | #define OCFS2_FEATURE_RO_COMPAT_SUPP 0 | 91 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Heartbeat-only devices are missing journals and other files. The | 94 | * Heartbeat-only devices are missing journals and other files. The |
@@ -116,6 +116,11 @@ | |||
116 | */ | 116 | */ |
117 | #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 | 117 | #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 |
118 | 118 | ||
119 | /* | ||
120 | * Unwritten extents support. | ||
121 | */ | ||
122 | #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 | ||
123 | |||
119 | /* The byte offset of the first backup block will be 1G. | 124 | /* The byte offset of the first backup block will be 1G. |
120 | * The following will be 4G, 16G, 64G, 256G and 1T. | 125 | * The following will be 4G, 16G, 64G, 256G and 1T. |
121 | */ | 126 | */ |
@@ -170,6 +175,32 @@ | |||
170 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 175 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) |
171 | 176 | ||
172 | /* | 177 | /* |
178 | * Space reservation / allocation / free ioctls and argument structure | ||
179 | * are designed to be compatible with XFS. | ||
180 | * | ||
181 | * ALLOCSP* and FREESP* are not and will never be supported, but are | ||
182 | * included here for completeness. | ||
183 | */ | ||
184 | struct ocfs2_space_resv { | ||
185 | __s16 l_type; | ||
186 | __s16 l_whence; | ||
187 | __s64 l_start; | ||
188 | __s64 l_len; /* len == 0 means until end of file */ | ||
189 | __s32 l_sysid; | ||
190 | __u32 l_pid; | ||
191 | __s32 l_pad[4]; /* reserve area */ | ||
192 | }; | ||
193 | |||
194 | #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) | ||
195 | #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) | ||
196 | #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) | ||
197 | #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) | ||
198 | #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) | ||
199 | #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) | ||
200 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | ||
201 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | ||
202 | |||
203 | /* | ||
173 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 204 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
174 | */ | 205 | */ |
175 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ | 206 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d8b79067dc14..af4882b62cfa 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -121,17 +121,25 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
121 | return ret; | 121 | return ret; |
122 | } | 122 | } |
123 | 123 | ||
124 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si) | 124 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) |
125 | { | 125 | { |
126 | int i; | 126 | int i; |
127 | s16 ret = OCFS2_INVALID_SLOT; | 127 | s16 ret = OCFS2_INVALID_SLOT; |
128 | 128 | ||
129 | if (preferred >= 0 && preferred < si->si_num_slots) { | ||
130 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { | ||
131 | ret = preferred; | ||
132 | goto out; | ||
133 | } | ||
134 | } | ||
135 | |||
129 | for(i = 0; i < si->si_num_slots; i++) { | 136 | for(i = 0; i < si->si_num_slots; i++) { |
130 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { | 137 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { |
131 | ret = (s16) i; | 138 | ret = (s16) i; |
132 | break; | 139 | break; |
133 | } | 140 | } |
134 | } | 141 | } |
142 | out: | ||
135 | return ret; | 143 | return ret; |
136 | } | 144 | } |
137 | 145 | ||
@@ -248,7 +256,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
248 | if (slot == OCFS2_INVALID_SLOT) { | 256 | if (slot == OCFS2_INVALID_SLOT) { |
249 | /* if no slot yet, then just take 1st available | 257 | /* if no slot yet, then just take 1st available |
250 | * one. */ | 258 | * one. */ |
251 | slot = __ocfs2_find_empty_slot(si); | 259 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
252 | if (slot == OCFS2_INVALID_SLOT) { | 260 | if (slot == OCFS2_INVALID_SLOT) { |
253 | spin_unlock(&si->si_lock); | 261 | spin_unlock(&si->si_lock); |
254 | mlog(ML_ERROR, "no free slots available!\n"); | 262 | mlog(ML_ERROR, "no free slots available!\n"); |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e3437626d183..d9c5c9fcb30f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
98 | u16 chain); | 98 | u16 chain); |
99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
100 | u32 wanted); | 100 | u32 wanted); |
101 | static int ocfs2_free_suballoc_bits(handle_t *handle, | ||
102 | struct inode *alloc_inode, | ||
103 | struct buffer_head *alloc_bh, | ||
104 | unsigned int start_bit, | ||
105 | u64 bg_blkno, | ||
106 | unsigned int count); | ||
107 | static inline u64 ocfs2_which_suballoc_group(u64 block, | ||
108 | unsigned int bit); | ||
109 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
110 | u64 bg_blkno, | 102 | u64 bg_blkno, |
111 | u16 bg_bit_off); | 103 | u16 bg_bit_off); |
@@ -496,13 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
496 | 488 | ||
497 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | 489 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); |
498 | (*ac)->ac_which = OCFS2_AC_USE_META; | 490 | (*ac)->ac_which = OCFS2_AC_USE_META; |
499 | |||
500 | #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS | ||
501 | slot = 0; | ||
502 | #else | ||
503 | slot = osb->slot_num; | 491 | slot = osb->slot_num; |
504 | #endif | ||
505 | |||
506 | (*ac)->ac_group_search = ocfs2_block_group_search; | 492 | (*ac)->ac_group_search = ocfs2_block_group_search; |
507 | 493 | ||
508 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 494 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
@@ -1626,12 +1612,12 @@ bail: | |||
1626 | /* | 1612 | /* |
1627 | * expects the suballoc inode to already be locked. | 1613 | * expects the suballoc inode to already be locked. |
1628 | */ | 1614 | */ |
1629 | static int ocfs2_free_suballoc_bits(handle_t *handle, | 1615 | int ocfs2_free_suballoc_bits(handle_t *handle, |
1630 | struct inode *alloc_inode, | 1616 | struct inode *alloc_inode, |
1631 | struct buffer_head *alloc_bh, | 1617 | struct buffer_head *alloc_bh, |
1632 | unsigned int start_bit, | 1618 | unsigned int start_bit, |
1633 | u64 bg_blkno, | 1619 | u64 bg_blkno, |
1634 | unsigned int count) | 1620 | unsigned int count) |
1635 | { | 1621 | { |
1636 | int status = 0; | 1622 | int status = 0; |
1637 | u32 tmp_used; | 1623 | u32 tmp_used; |
@@ -1703,13 +1689,6 @@ bail: | |||
1703 | return status; | 1689 | return status; |
1704 | } | 1690 | } |
1705 | 1691 | ||
1706 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
1707 | { | ||
1708 | u64 group = block - (u64) bit; | ||
1709 | |||
1710 | return group; | ||
1711 | } | ||
1712 | |||
1713 | int ocfs2_free_dinode(handle_t *handle, | 1692 | int ocfs2_free_dinode(handle_t *handle, |
1714 | struct inode *inode_alloc_inode, | 1693 | struct inode *inode_alloc_inode, |
1715 | struct buffer_head *inode_alloc_bh, | 1694 | struct buffer_head *inode_alloc_bh, |
@@ -1723,19 +1702,6 @@ int ocfs2_free_dinode(handle_t *handle, | |||
1723 | inode_alloc_bh, bit, bg_blkno, 1); | 1702 | inode_alloc_bh, bit, bg_blkno, 1); |
1724 | } | 1703 | } |
1725 | 1704 | ||
1726 | int ocfs2_free_extent_block(handle_t *handle, | ||
1727 | struct inode *eb_alloc_inode, | ||
1728 | struct buffer_head *eb_alloc_bh, | ||
1729 | struct ocfs2_extent_block *eb) | ||
1730 | { | ||
1731 | u64 blk = le64_to_cpu(eb->h_blkno); | ||
1732 | u16 bit = le16_to_cpu(eb->h_suballoc_bit); | ||
1733 | u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
1734 | |||
1735 | return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, | ||
1736 | bit, bg_blkno, 1); | ||
1737 | } | ||
1738 | |||
1739 | int ocfs2_free_clusters(handle_t *handle, | 1705 | int ocfs2_free_clusters(handle_t *handle, |
1740 | struct inode *bitmap_inode, | 1706 | struct inode *bitmap_inode, |
1741 | struct buffer_head *bitmap_bh, | 1707 | struct buffer_head *bitmap_bh, |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 1a3c94cb9250..f212dc01a84b 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -86,20 +86,29 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
86 | u32 *cluster_start, | 86 | u32 *cluster_start, |
87 | u32 *num_clusters); | 87 | u32 *num_clusters); |
88 | 88 | ||
89 | int ocfs2_free_suballoc_bits(handle_t *handle, | ||
90 | struct inode *alloc_inode, | ||
91 | struct buffer_head *alloc_bh, | ||
92 | unsigned int start_bit, | ||
93 | u64 bg_blkno, | ||
94 | unsigned int count); | ||
89 | int ocfs2_free_dinode(handle_t *handle, | 95 | int ocfs2_free_dinode(handle_t *handle, |
90 | struct inode *inode_alloc_inode, | 96 | struct inode *inode_alloc_inode, |
91 | struct buffer_head *inode_alloc_bh, | 97 | struct buffer_head *inode_alloc_bh, |
92 | struct ocfs2_dinode *di); | 98 | struct ocfs2_dinode *di); |
93 | int ocfs2_free_extent_block(handle_t *handle, | ||
94 | struct inode *eb_alloc_inode, | ||
95 | struct buffer_head *eb_alloc_bh, | ||
96 | struct ocfs2_extent_block *eb); | ||
97 | int ocfs2_free_clusters(handle_t *handle, | 99 | int ocfs2_free_clusters(handle_t *handle, |
98 | struct inode *bitmap_inode, | 100 | struct inode *bitmap_inode, |
99 | struct buffer_head *bitmap_bh, | 101 | struct buffer_head *bitmap_bh, |
100 | u64 start_blk, | 102 | u64 start_blk, |
101 | unsigned int num_clusters); | 103 | unsigned int num_clusters); |
102 | 104 | ||
105 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
106 | { | ||
107 | u64 group = block - (u64) bit; | ||
108 | |||
109 | return group; | ||
110 | } | ||
111 | |||
103 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, | 112 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, |
104 | u64 bg_blkno) | 113 | u64 bg_blkno) |
105 | { | 114 | { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 86b559c7dce9..3a5a1ed09ac9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -82,7 +82,8 @@ MODULE_AUTHOR("Oracle"); | |||
82 | MODULE_LICENSE("GPL"); | 82 | MODULE_LICENSE("GPL"); |
83 | 83 | ||
84 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 84 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
85 | unsigned long *mount_opt, int is_remount); | 85 | unsigned long *mount_opt, s16 *slot, |
86 | int is_remount); | ||
86 | static void ocfs2_put_super(struct super_block *sb); | 87 | static void ocfs2_put_super(struct super_block *sb); |
87 | static int ocfs2_mount_volume(struct super_block *sb); | 88 | static int ocfs2_mount_volume(struct super_block *sb); |
88 | static int ocfs2_remount(struct super_block *sb, int *flags, char *data); | 89 | static int ocfs2_remount(struct super_block *sb, int *flags, char *data); |
@@ -114,8 +115,6 @@ static void ocfs2_write_super(struct super_block *sb); | |||
114 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); | 115 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); |
115 | static void ocfs2_destroy_inode(struct inode *inode); | 116 | static void ocfs2_destroy_inode(struct inode *inode); |
116 | 117 | ||
117 | static unsigned long long ocfs2_max_file_offset(unsigned int blockshift); | ||
118 | |||
119 | static const struct super_operations ocfs2_sops = { | 118 | static const struct super_operations ocfs2_sops = { |
120 | .statfs = ocfs2_statfs, | 119 | .statfs = ocfs2_statfs, |
121 | .alloc_inode = ocfs2_alloc_inode, | 120 | .alloc_inode = ocfs2_alloc_inode, |
@@ -140,6 +139,7 @@ enum { | |||
140 | Opt_data_ordered, | 139 | Opt_data_ordered, |
141 | Opt_data_writeback, | 140 | Opt_data_writeback, |
142 | Opt_atime_quantum, | 141 | Opt_atime_quantum, |
142 | Opt_slot, | ||
143 | Opt_err, | 143 | Opt_err, |
144 | }; | 144 | }; |
145 | 145 | ||
@@ -154,6 +154,7 @@ static match_table_t tokens = { | |||
154 | {Opt_data_ordered, "data=ordered"}, | 154 | {Opt_data_ordered, "data=ordered"}, |
155 | {Opt_data_writeback, "data=writeback"}, | 155 | {Opt_data_writeback, "data=writeback"}, |
156 | {Opt_atime_quantum, "atime_quantum=%u"}, | 156 | {Opt_atime_quantum, "atime_quantum=%u"}, |
157 | {Opt_slot, "preferred_slot=%u"}, | ||
157 | {Opt_err, NULL} | 158 | {Opt_err, NULL} |
158 | }; | 159 | }; |
159 | 160 | ||
@@ -318,7 +319,7 @@ static void ocfs2_destroy_inode(struct inode *inode) | |||
318 | /* From xfs_super.c:xfs_max_file_offset | 319 | /* From xfs_super.c:xfs_max_file_offset |
319 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. | 320 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. |
320 | */ | 321 | */ |
321 | static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) | 322 | unsigned long long ocfs2_max_file_offset(unsigned int blockshift) |
322 | { | 323 | { |
323 | unsigned int pagefactor = 1; | 324 | unsigned int pagefactor = 1; |
324 | unsigned int bitshift = BITS_PER_LONG - 1; | 325 | unsigned int bitshift = BITS_PER_LONG - 1; |
@@ -355,9 +356,10 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
355 | int incompat_features; | 356 | int incompat_features; |
356 | int ret = 0; | 357 | int ret = 0; |
357 | unsigned long parsed_options; | 358 | unsigned long parsed_options; |
359 | s16 slot; | ||
358 | struct ocfs2_super *osb = OCFS2_SB(sb); | 360 | struct ocfs2_super *osb = OCFS2_SB(sb); |
359 | 361 | ||
360 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { | 362 | if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) { |
361 | ret = -EINVAL; | 363 | ret = -EINVAL; |
362 | goto out; | 364 | goto out; |
363 | } | 365 | } |
@@ -534,6 +536,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
534 | struct dentry *root; | 536 | struct dentry *root; |
535 | int status, sector_size; | 537 | int status, sector_size; |
536 | unsigned long parsed_opt; | 538 | unsigned long parsed_opt; |
539 | s16 slot; | ||
537 | struct inode *inode = NULL; | 540 | struct inode *inode = NULL; |
538 | struct ocfs2_super *osb = NULL; | 541 | struct ocfs2_super *osb = NULL; |
539 | struct buffer_head *bh = NULL; | 542 | struct buffer_head *bh = NULL; |
@@ -541,7 +544,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
541 | 544 | ||
542 | mlog_entry("%p, %p, %i", sb, data, silent); | 545 | mlog_entry("%p, %p, %i", sb, data, silent); |
543 | 546 | ||
544 | if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { | 547 | if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) { |
545 | status = -EINVAL; | 548 | status = -EINVAL; |
546 | goto read_super_error; | 549 | goto read_super_error; |
547 | } | 550 | } |
@@ -571,6 +574,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
571 | brelse(bh); | 574 | brelse(bh); |
572 | bh = NULL; | 575 | bh = NULL; |
573 | osb->s_mount_opt = parsed_opt; | 576 | osb->s_mount_opt = parsed_opt; |
577 | osb->preferred_slot = slot; | ||
574 | 578 | ||
575 | sb->s_magic = OCFS2_SUPER_MAGIC; | 579 | sb->s_magic = OCFS2_SUPER_MAGIC; |
576 | 580 | ||
@@ -713,6 +717,7 @@ static struct file_system_type ocfs2_fs_type = { | |||
713 | static int ocfs2_parse_options(struct super_block *sb, | 717 | static int ocfs2_parse_options(struct super_block *sb, |
714 | char *options, | 718 | char *options, |
715 | unsigned long *mount_opt, | 719 | unsigned long *mount_opt, |
720 | s16 *slot, | ||
716 | int is_remount) | 721 | int is_remount) |
717 | { | 722 | { |
718 | int status; | 723 | int status; |
@@ -722,6 +727,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
722 | options ? options : "(none)"); | 727 | options ? options : "(none)"); |
723 | 728 | ||
724 | *mount_opt = 0; | 729 | *mount_opt = 0; |
730 | *slot = OCFS2_INVALID_SLOT; | ||
725 | 731 | ||
726 | if (!options) { | 732 | if (!options) { |
727 | status = 1; | 733 | status = 1; |
@@ -782,6 +788,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
782 | else | 788 | else |
783 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 789 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
784 | break; | 790 | break; |
791 | case Opt_slot: | ||
792 | option = 0; | ||
793 | if (match_int(&args[0], &option)) { | ||
794 | status = 0; | ||
795 | goto bail; | ||
796 | } | ||
797 | if (option) | ||
798 | *slot = (s16)option; | ||
799 | break; | ||
785 | default: | 800 | default: |
786 | mlog(ML_ERROR, | 801 | mlog(ML_ERROR, |
787 | "Unrecognized mount option \"%s\" " | 802 | "Unrecognized mount option \"%s\" " |
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index 783f5270f2a1..3b9cb3d0b008 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h | |||
@@ -45,4 +45,6 @@ void __ocfs2_abort(struct super_block *sb, | |||
45 | 45 | ||
46 | #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) | 46 | #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) |
47 | 47 | ||
48 | unsigned long long ocfs2_max_file_offset(unsigned int blockshift); | ||
49 | |||
48 | #endif /* OCFS2_SUPER_H */ | 50 | #endif /* OCFS2_SUPER_H */ |
@@ -855,7 +855,7 @@ EXPORT_SYMBOL(dentry_open); | |||
855 | /* | 855 | /* |
856 | * Find an empty file descriptor entry, and mark it busy. | 856 | * Find an empty file descriptor entry, and mark it busy. |
857 | */ | 857 | */ |
858 | int get_unused_fd(void) | 858 | int get_unused_fd_flags(int flags) |
859 | { | 859 | { |
860 | struct files_struct * files = current->files; | 860 | struct files_struct * files = current->files; |
861 | int fd, error; | 861 | int fd, error; |
@@ -891,7 +891,10 @@ repeat: | |||
891 | } | 891 | } |
892 | 892 | ||
893 | FD_SET(fd, fdt->open_fds); | 893 | FD_SET(fd, fdt->open_fds); |
894 | FD_CLR(fd, fdt->close_on_exec); | 894 | if (flags & O_CLOEXEC) |
895 | FD_SET(fd, fdt->close_on_exec); | ||
896 | else | ||
897 | FD_CLR(fd, fdt->close_on_exec); | ||
895 | files->next_fd = fd + 1; | 898 | files->next_fd = fd + 1; |
896 | #if 1 | 899 | #if 1 |
897 | /* Sanity check */ | 900 | /* Sanity check */ |
@@ -907,6 +910,11 @@ out: | |||
907 | return error; | 910 | return error; |
908 | } | 911 | } |
909 | 912 | ||
913 | int get_unused_fd(void) | ||
914 | { | ||
915 | return get_unused_fd_flags(0); | ||
916 | } | ||
917 | |||
910 | EXPORT_SYMBOL(get_unused_fd); | 918 | EXPORT_SYMBOL(get_unused_fd); |
911 | 919 | ||
912 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | 920 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) |
@@ -959,7 +967,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) | |||
959 | int fd = PTR_ERR(tmp); | 967 | int fd = PTR_ERR(tmp); |
960 | 968 | ||
961 | if (!IS_ERR(tmp)) { | 969 | if (!IS_ERR(tmp)) { |
962 | fd = get_unused_fd(); | 970 | fd = get_unused_fd_flags(flags); |
963 | if (fd >= 0) { | 971 | if (fd >= 0) { |
964 | struct file *f = do_filp_open(dfd, tmp, flags, mode); | 972 | struct file *f = do_filp_open(dfd, tmp, flags, mode); |
965 | if (IS_ERR(f)) { | 973 | if (IS_ERR(f)) { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index e3491328596b..3d3e16631472 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #define PARTITION_RISCIX_SCSI 2 | 25 | #define PARTITION_RISCIX_SCSI 2 |
26 | #define PARTITION_LINUX 9 | 26 | #define PARTITION_LINUX 9 |
27 | 27 | ||
28 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
29 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
28 | static struct adfs_discrecord * | 30 | static struct adfs_discrecord * |
29 | adfs_partition(struct parsed_partitions *state, char *name, char *data, | 31 | adfs_partition(struct parsed_partitions *state, char *name, char *data, |
30 | unsigned long first_sector, int slot) | 32 | unsigned long first_sector, int slot) |
@@ -48,6 +50,7 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data, | |||
48 | put_partition(state, slot, first_sector, nr_sects); | 50 | put_partition(state, slot, first_sector, nr_sects); |
49 | return dr; | 51 | return dr; |
50 | } | 52 | } |
53 | #endif | ||
51 | 54 | ||
52 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 55 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
53 | 56 | ||
@@ -65,6 +68,8 @@ struct riscix_record { | |||
65 | struct riscix_part part[8]; | 68 | struct riscix_part part[8]; |
66 | }; | 69 | }; |
67 | 70 | ||
71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
72 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
68 | static int | 73 | static int |
69 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | 74 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, |
70 | unsigned long first_sect, int slot, unsigned long nr_sects) | 75 | unsigned long first_sect, int slot, unsigned long nr_sects) |
@@ -105,6 +110,7 @@ riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
105 | return slot; | 110 | return slot; |
106 | } | 111 | } |
107 | #endif | 112 | #endif |
113 | #endif | ||
108 | 114 | ||
109 | #define LINUX_NATIVE_MAGIC 0xdeafa1de | 115 | #define LINUX_NATIVE_MAGIC 0xdeafa1de |
110 | #define LINUX_SWAP_MAGIC 0xdeafab1e | 116 | #define LINUX_SWAP_MAGIC 0xdeafab1e |
@@ -115,6 +121,8 @@ struct linux_part { | |||
115 | __le32 nr_sects; | 121 | __le32 nr_sects; |
116 | }; | 122 | }; |
117 | 123 | ||
124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
125 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
118 | static int | 126 | static int |
119 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, | 127 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, |
120 | unsigned long first_sect, int slot, unsigned long nr_sects) | 128 | unsigned long first_sect, int slot, unsigned long nr_sects) |
@@ -146,6 +154,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
146 | put_dev_sector(sect); | 154 | put_dev_sector(sect); |
147 | return slot; | 155 | return slot; |
148 | } | 156 | } |
157 | #endif | ||
149 | 158 | ||
150 | #ifdef CONFIG_ACORN_PARTITION_CUMANA | 159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA |
151 | int | 160 | int |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 99873a2b4cbc..e7dd1d4e3473 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -677,15 +677,24 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
677 | * Return: -1 Error, the calculated offset exceeded the size of the buffer | 677 | * Return: -1 Error, the calculated offset exceeded the size of the buffer |
678 | * n OK, a range-checked offset into buffer | 678 | * n OK, a range-checked offset into buffer |
679 | */ | 679 | */ |
680 | static int ldm_relative (const u8 *buffer, int buflen, int base, int offset) | 680 | static int ldm_relative(const u8 *buffer, int buflen, int base, int offset) |
681 | { | 681 | { |
682 | 682 | ||
683 | base += offset; | 683 | base += offset; |
684 | if ((!buffer) || (offset < 0) || (base > buflen)) | 684 | if (!buffer || offset < 0 || base > buflen) { |
685 | if (!buffer) | ||
686 | ldm_error("!buffer"); | ||
687 | if (offset < 0) | ||
688 | ldm_error("offset (%d) < 0", offset); | ||
689 | if (base > buflen) | ||
690 | ldm_error("base (%d) > buflen (%d)", base, buflen); | ||
685 | return -1; | 691 | return -1; |
686 | if ((base + buffer[base]) >= buflen) | 692 | } |
693 | if (base + buffer[base] >= buflen) { | ||
694 | ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base, | ||
695 | buffer[base], buflen); | ||
687 | return -1; | 696 | return -1; |
688 | 697 | } | |
689 | return buffer[base] + offset + 1; | 698 | return buffer[base] + offset + 1; |
690 | } | 699 | } |
691 | 700 | ||
@@ -1054,60 +1063,98 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) | |||
1054 | * Return: 'true' @vb contains a Volume VBLK | 1063 | * Return: 'true' @vb contains a Volume VBLK |
1055 | * 'false' @vb contents are not defined | 1064 | * 'false' @vb contents are not defined |
1056 | */ | 1065 | */ |
1057 | static bool ldm_parse_vol5 (const u8 *buffer, int buflen, struct vblk *vb) | 1066 | static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) |
1058 | { | 1067 | { |
1059 | int r_objid, r_name, r_vtype, r_child, r_size, r_id1, r_id2, r_size2; | 1068 | int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size; |
1060 | int r_drive, len; | 1069 | int r_id1, r_id2, r_size2, r_drive, len; |
1061 | struct vblk_volu *volu; | 1070 | struct vblk_volu *volu; |
1062 | 1071 | ||
1063 | BUG_ON (!buffer || !vb); | 1072 | BUG_ON(!buffer || !vb); |
1064 | 1073 | r_objid = ldm_relative(buffer, buflen, 0x18, 0); | |
1065 | r_objid = ldm_relative (buffer, buflen, 0x18, 0); | 1074 | if (r_objid < 0) { |
1066 | r_name = ldm_relative (buffer, buflen, 0x18, r_objid); | 1075 | ldm_error("r_objid %d < 0", r_objid); |
1067 | r_vtype = ldm_relative (buffer, buflen, 0x18, r_name); | 1076 | return false; |
1068 | r_child = ldm_relative (buffer, buflen, 0x2E, r_vtype); | 1077 | } |
1069 | r_size = ldm_relative (buffer, buflen, 0x3E, r_child); | 1078 | r_name = ldm_relative(buffer, buflen, 0x18, r_objid); |
1070 | 1079 | if (r_name < 0) { | |
1071 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) | 1080 | ldm_error("r_name %d < 0", r_name); |
1072 | r_id1 = ldm_relative (buffer, buflen, 0x53, r_size); | 1081 | return false; |
1073 | else | 1082 | } |
1083 | r_vtype = ldm_relative(buffer, buflen, 0x18, r_name); | ||
1084 | if (r_vtype < 0) { | ||
1085 | ldm_error("r_vtype %d < 0", r_vtype); | ||
1086 | return false; | ||
1087 | } | ||
1088 | r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype); | ||
1089 | if (r_disable_drive_letter < 0) { | ||
1090 | ldm_error("r_disable_drive_letter %d < 0", | ||
1091 | r_disable_drive_letter); | ||
1092 | return false; | ||
1093 | } | ||
1094 | r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter); | ||
1095 | if (r_child < 0) { | ||
1096 | ldm_error("r_child %d < 0", r_child); | ||
1097 | return false; | ||
1098 | } | ||
1099 | r_size = ldm_relative(buffer, buflen, 0x3D, r_child); | ||
1100 | if (r_size < 0) { | ||
1101 | ldm_error("r_size %d < 0", r_size); | ||
1102 | return false; | ||
1103 | } | ||
1104 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) { | ||
1105 | r_id1 = ldm_relative(buffer, buflen, 0x52, r_size); | ||
1106 | if (r_id1 < 0) { | ||
1107 | ldm_error("r_id1 %d < 0", r_id1); | ||
1108 | return false; | ||
1109 | } | ||
1110 | } else | ||
1074 | r_id1 = r_size; | 1111 | r_id1 = r_size; |
1075 | 1112 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) { | |
1076 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) | 1113 | r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1); |
1077 | r_id2 = ldm_relative (buffer, buflen, 0x53, r_id1); | 1114 | if (r_id2 < 0) { |
1078 | else | 1115 | ldm_error("r_id2 %d < 0", r_id2); |
1116 | return false; | ||
1117 | } | ||
1118 | } else | ||
1079 | r_id2 = r_id1; | 1119 | r_id2 = r_id1; |
1080 | 1120 | if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) { | |
1081 | if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) | 1121 | r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2); |
1082 | r_size2 = ldm_relative (buffer, buflen, 0x53, r_id2); | 1122 | if (r_size2 < 0) { |
1083 | else | 1123 | ldm_error("r_size2 %d < 0", r_size2); |
1124 | return false; | ||
1125 | } | ||
1126 | } else | ||
1084 | r_size2 = r_id2; | 1127 | r_size2 = r_id2; |
1085 | 1128 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { | |
1086 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) | 1129 | r_drive = ldm_relative(buffer, buflen, 0x52, r_size2); |
1087 | r_drive = ldm_relative (buffer, buflen, 0x53, r_size2); | 1130 | if (r_drive < 0) { |
1088 | else | 1131 | ldm_error("r_drive %d < 0", r_drive); |
1132 | return false; | ||
1133 | } | ||
1134 | } else | ||
1089 | r_drive = r_size2; | 1135 | r_drive = r_size2; |
1090 | |||
1091 | len = r_drive; | 1136 | len = r_drive; |
1092 | if (len < 0) | 1137 | if (len < 0) { |
1138 | ldm_error("len %d < 0", len); | ||
1093 | return false; | 1139 | return false; |
1094 | 1140 | } | |
1095 | len += VBLK_SIZE_VOL5; | 1141 | len += VBLK_SIZE_VOL5; |
1096 | if (len != BE32 (buffer + 0x14)) | 1142 | if (len > BE32(buffer + 0x14)) { |
1143 | ldm_error("len %d > BE32(buffer + 0x14) %d", len, | ||
1144 | BE32(buffer + 0x14)); | ||
1097 | return false; | 1145 | return false; |
1098 | 1146 | } | |
1099 | volu = &vb->vblk.volu; | 1147 | volu = &vb->vblk.volu; |
1100 | 1148 | ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type, | |
1101 | ldm_get_vstr (buffer + 0x18 + r_name, volu->volume_type, | 1149 | sizeof(volu->volume_type)); |
1102 | sizeof (volu->volume_type)); | 1150 | memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter, |
1103 | memcpy (volu->volume_state, buffer + 0x19 + r_vtype, | 1151 | sizeof(volu->volume_state)); |
1104 | sizeof (volu->volume_state)); | 1152 | volu->size = ldm_get_vnum(buffer + 0x3D + r_child); |
1105 | volu->size = ldm_get_vnum (buffer + 0x3E + r_child); | 1153 | volu->partition_type = buffer[0x41 + r_size]; |
1106 | volu->partition_type = buffer[0x42 + r_size]; | 1154 | memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid)); |
1107 | memcpy (volu->guid, buffer + 0x43 + r_size, sizeof (volu->guid)); | ||
1108 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { | 1155 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { |
1109 | ldm_get_vstr (buffer + 0x53 + r_size, volu->drive_hint, | 1156 | ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint, |
1110 | sizeof (volu->drive_hint)); | 1157 | sizeof(volu->drive_hint)); |
1111 | } | 1158 | } |
1112 | return true; | 1159 | return true; |
1113 | } | 1160 | } |
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index d2e6a3046939..80f63b5fdd9f 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h | |||
@@ -68,7 +68,7 @@ struct parsed_partitions; | |||
68 | #define VBLK_SIZE_DSK3 12 | 68 | #define VBLK_SIZE_DSK3 12 |
69 | #define VBLK_SIZE_DSK4 45 | 69 | #define VBLK_SIZE_DSK4 45 |
70 | #define VBLK_SIZE_PRT3 28 | 70 | #define VBLK_SIZE_PRT3 28 |
71 | #define VBLK_SIZE_VOL5 59 | 71 | #define VBLK_SIZE_VOL5 58 |
72 | 72 | ||
73 | /* component types */ | 73 | /* component types */ |
74 | #define COMP_STRIPE 0x01 /* Stripe-set */ | 74 | #define COMP_STRIPE 0x01 /* Stripe-set */ |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 98e78e2f18d6..965625a0977d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -62,6 +62,8 @@ | |||
62 | #include <linux/mman.h> | 62 | #include <linux/mman.h> |
63 | #include <linux/proc_fs.h> | 63 | #include <linux/proc_fs.h> |
64 | #include <linux/ioport.h> | 64 | #include <linux/ioport.h> |
65 | #include <linux/uaccess.h> | ||
66 | #include <linux/io.h> | ||
65 | #include <linux/mm.h> | 67 | #include <linux/mm.h> |
66 | #include <linux/hugetlb.h> | 68 | #include <linux/hugetlb.h> |
67 | #include <linux/pagemap.h> | 69 | #include <linux/pagemap.h> |
@@ -76,9 +78,7 @@ | |||
76 | #include <linux/rcupdate.h> | 78 | #include <linux/rcupdate.h> |
77 | #include <linux/delayacct.h> | 79 | #include <linux/delayacct.h> |
78 | 80 | ||
79 | #include <asm/uaccess.h> | ||
80 | #include <asm/pgtable.h> | 81 | #include <asm/pgtable.h> |
81 | #include <asm/io.h> | ||
82 | #include <asm/processor.h> | 82 | #include <asm/processor.h> |
83 | #include "internal.h" | 83 | #include "internal.h" |
84 | 84 | ||
@@ -87,10 +87,10 @@ | |||
87 | do { memcpy(buffer, string, strlen(string)); \ | 87 | do { memcpy(buffer, string, strlen(string)); \ |
88 | buffer += strlen(string); } while (0) | 88 | buffer += strlen(string); } while (0) |
89 | 89 | ||
90 | static inline char * task_name(struct task_struct *p, char * buf) | 90 | static inline char *task_name(struct task_struct *p, char *buf) |
91 | { | 91 | { |
92 | int i; | 92 | int i; |
93 | char * name; | 93 | char *name; |
94 | char tcomm[sizeof(p->comm)]; | 94 | char tcomm[sizeof(p->comm)]; |
95 | 95 | ||
96 | get_task_comm(tcomm, p); | 96 | get_task_comm(tcomm, p); |
@@ -138,7 +138,7 @@ static const char *task_state_array[] = { | |||
138 | "X (dead)" /* 32 */ | 138 | "X (dead)" /* 32 */ |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static inline const char * get_task_state(struct task_struct *tsk) | 141 | static inline const char *get_task_state(struct task_struct *tsk) |
142 | { | 142 | { |
143 | unsigned int state = (tsk->state & (TASK_RUNNING | | 143 | unsigned int state = (tsk->state & (TASK_RUNNING | |
144 | TASK_INTERRUPTIBLE | | 144 | TASK_INTERRUPTIBLE | |
@@ -156,7 +156,7 @@ static inline const char * get_task_state(struct task_struct *tsk) | |||
156 | return *p; | 156 | return *p; |
157 | } | 157 | } |
158 | 158 | ||
159 | static inline char * task_state(struct task_struct *p, char *buffer) | 159 | static inline char *task_state(struct task_struct *p, char *buffer) |
160 | { | 160 | { |
161 | struct group_info *group_info; | 161 | struct group_info *group_info; |
162 | int g; | 162 | int g; |
@@ -172,8 +172,8 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
172 | "Uid:\t%d\t%d\t%d\t%d\n" | 172 | "Uid:\t%d\t%d\t%d\t%d\n" |
173 | "Gid:\t%d\t%d\t%d\t%d\n", | 173 | "Gid:\t%d\t%d\t%d\t%d\n", |
174 | get_task_state(p), | 174 | get_task_state(p), |
175 | p->tgid, p->pid, | 175 | p->tgid, p->pid, |
176 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, | 176 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, |
177 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, | 177 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, |
178 | p->uid, p->euid, p->suid, p->fsuid, | 178 | p->uid, p->euid, p->suid, p->fsuid, |
179 | p->gid, p->egid, p->sgid, p->fsgid); | 179 | p->gid, p->egid, p->sgid, p->fsgid); |
@@ -191,15 +191,15 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
191 | get_group_info(group_info); | 191 | get_group_info(group_info); |
192 | task_unlock(p); | 192 | task_unlock(p); |
193 | 193 | ||
194 | for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) | 194 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) |
195 | buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); | 195 | buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g)); |
196 | put_group_info(group_info); | 196 | put_group_info(group_info); |
197 | 197 | ||
198 | buffer += sprintf(buffer, "\n"); | 198 | buffer += sprintf(buffer, "\n"); |
199 | return buffer; | 199 | return buffer; |
200 | } | 200 | } |
201 | 201 | ||
202 | static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) | 202 | static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) |
203 | { | 203 | { |
204 | int i, len; | 204 | int i, len; |
205 | 205 | ||
@@ -239,7 +239,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, | |||
239 | } | 239 | } |
240 | } | 240 | } |
241 | 241 | ||
242 | static inline char * task_sig(struct task_struct *p, char *buffer) | 242 | static inline char *task_sig(struct task_struct *p, char *buffer) |
243 | { | 243 | { |
244 | unsigned long flags; | 244 | unsigned long flags; |
245 | sigset_t pending, shpending, blocked, ignored, caught; | 245 | sigset_t pending, shpending, blocked, ignored, caught; |
@@ -289,14 +289,23 @@ static inline char *task_cap(struct task_struct *p, char *buffer) | |||
289 | cap_t(p->cap_effective)); | 289 | cap_t(p->cap_effective)); |
290 | } | 290 | } |
291 | 291 | ||
292 | int proc_pid_status(struct task_struct *task, char * buffer) | 292 | static inline char *task_context_switch_counts(struct task_struct *p, |
293 | char *buffer) | ||
293 | { | 294 | { |
294 | char * orig = buffer; | 295 | return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" |
296 | "nonvoluntary_ctxt_switches:\t%lu\n", | ||
297 | p->nvcsw, | ||
298 | p->nivcsw); | ||
299 | } | ||
300 | |||
301 | int proc_pid_status(struct task_struct *task, char *buffer) | ||
302 | { | ||
303 | char *orig = buffer; | ||
295 | struct mm_struct *mm = get_task_mm(task); | 304 | struct mm_struct *mm = get_task_mm(task); |
296 | 305 | ||
297 | buffer = task_name(task, buffer); | 306 | buffer = task_name(task, buffer); |
298 | buffer = task_state(task, buffer); | 307 | buffer = task_state(task, buffer); |
299 | 308 | ||
300 | if (mm) { | 309 | if (mm) { |
301 | buffer = task_mem(mm, buffer); | 310 | buffer = task_mem(mm, buffer); |
302 | mmput(mm); | 311 | mmput(mm); |
@@ -307,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) | |||
307 | #if defined(CONFIG_S390) | 316 | #if defined(CONFIG_S390) |
308 | buffer = task_show_regs(task, buffer); | 317 | buffer = task_show_regs(task, buffer); |
309 | #endif | 318 | #endif |
319 | buffer = task_context_switch_counts(task, buffer); | ||
310 | return buffer - orig; | 320 | return buffer - orig; |
311 | } | 321 | } |
312 | 322 | ||
@@ -332,7 +342,7 @@ static clock_t task_utime(struct task_struct *p) | |||
332 | 342 | ||
333 | static clock_t task_stime(struct task_struct *p) | 343 | static clock_t task_stime(struct task_struct *p) |
334 | { | 344 | { |
335 | clock_t stime = cputime_to_clock_t(p->stime); | 345 | clock_t stime; |
336 | 346 | ||
337 | /* | 347 | /* |
338 | * Use CFS's precise accounting. (we subtract utime from | 348 | * Use CFS's precise accounting. (we subtract utime from |
@@ -344,8 +354,7 @@ static clock_t task_stime(struct task_struct *p) | |||
344 | return stime; | 354 | return stime; |
345 | } | 355 | } |
346 | 356 | ||
347 | 357 | static int do_task_stat(struct task_struct *task, char *buffer, int whole) | |
348 | static int do_task_stat(struct task_struct *task, char * buffer, int whole) | ||
349 | { | 358 | { |
350 | unsigned long vsize, eip, esp, wchan = ~0UL; | 359 | unsigned long vsize, eip, esp, wchan = ~0UL; |
351 | long priority, nice; | 360 | long priority, nice; |
@@ -353,7 +362,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
353 | sigset_t sigign, sigcatch; | 362 | sigset_t sigign, sigcatch; |
354 | char state; | 363 | char state; |
355 | int res; | 364 | int res; |
356 | pid_t ppid = 0, pgid = -1, sid = -1; | 365 | pid_t ppid = 0, pgid = -1, sid = -1; |
357 | int num_threads = 0; | 366 | int num_threads = 0; |
358 | struct mm_struct *mm; | 367 | struct mm_struct *mm; |
359 | unsigned long long start_time; | 368 | unsigned long long start_time; |
@@ -424,7 +433,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
424 | } | 433 | } |
425 | rcu_read_unlock(); | 434 | rcu_read_unlock(); |
426 | 435 | ||
427 | if (!whole || num_threads<2) | 436 | if (!whole || num_threads < 2) |
428 | wchan = get_wchan(task); | 437 | wchan = get_wchan(task); |
429 | if (!whole) { | 438 | if (!whole) { |
430 | min_flt = task->min_flt; | 439 | min_flt = task->min_flt; |
@@ -440,12 +449,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
440 | 449 | ||
441 | /* Temporary variable needed for gcc-2.96 */ | 450 | /* Temporary variable needed for gcc-2.96 */ |
442 | /* convert timespec -> nsec*/ | 451 | /* convert timespec -> nsec*/ |
443 | start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC | 452 | start_time = |
444 | + task->start_time.tv_nsec; | 453 | (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC |
454 | + task->real_start_time.tv_nsec; | ||
445 | /* convert nsec -> ticks */ | 455 | /* convert nsec -> ticks */ |
446 | start_time = nsec_to_clock_t(start_time); | 456 | start_time = nsec_to_clock_t(start_time); |
447 | 457 | ||
448 | res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \ | 458 | res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ |
449 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 459 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
450 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", | 460 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", |
451 | task->pid, | 461 | task->pid, |
@@ -471,7 +481,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
471 | start_time, | 481 | start_time, |
472 | vsize, | 482 | vsize, |
473 | mm ? get_mm_rss(mm) : 0, | 483 | mm ? get_mm_rss(mm) : 0, |
474 | rsslim, | 484 | rsslim, |
475 | mm ? mm->start_code : 0, | 485 | mm ? mm->start_code : 0, |
476 | mm ? mm->end_code : 0, | 486 | mm ? mm->end_code : 0, |
477 | mm ? mm->start_stack : 0, | 487 | mm ? mm->start_stack : 0, |
@@ -493,17 +503,17 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
493 | task->rt_priority, | 503 | task->rt_priority, |
494 | task->policy, | 504 | task->policy, |
495 | (unsigned long long)delayacct_blkio_ticks(task)); | 505 | (unsigned long long)delayacct_blkio_ticks(task)); |
496 | if(mm) | 506 | if (mm) |
497 | mmput(mm); | 507 | mmput(mm); |
498 | return res; | 508 | return res; |
499 | } | 509 | } |
500 | 510 | ||
501 | int proc_tid_stat(struct task_struct *task, char * buffer) | 511 | int proc_tid_stat(struct task_struct *task, char *buffer) |
502 | { | 512 | { |
503 | return do_task_stat(task, buffer, 0); | 513 | return do_task_stat(task, buffer, 0); |
504 | } | 514 | } |
505 | 515 | ||
506 | int proc_tgid_stat(struct task_struct *task, char * buffer) | 516 | int proc_tgid_stat(struct task_struct *task, char *buffer) |
507 | { | 517 | { |
508 | return do_task_stat(task, buffer, 1); | 518 | return do_task_stat(task, buffer, 1); |
509 | } | 519 | } |
@@ -512,12 +522,12 @@ int proc_pid_statm(struct task_struct *task, char *buffer) | |||
512 | { | 522 | { |
513 | int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; | 523 | int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; |
514 | struct mm_struct *mm = get_task_mm(task); | 524 | struct mm_struct *mm = get_task_mm(task); |
515 | 525 | ||
516 | if (mm) { | 526 | if (mm) { |
517 | size = task_statm(mm, &shared, &text, &data, &resident); | 527 | size = task_statm(mm, &shared, &text, &data, &resident); |
518 | mmput(mm); | 528 | mmput(mm); |
519 | } | 529 | } |
520 | 530 | ||
521 | return sprintf(buffer,"%d %d %d %d %d %d %d\n", | 531 | return sprintf(buffer, "%d %d %d %d %d %d %d\n", |
522 | size, resident, shared, text, lib, data, 0); | 532 | size, resident, shared, text, lib, data, 0); |
523 | } | 533 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 46ea5d56e1bb..42cb4f5613b6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -67,7 +67,6 @@ | |||
67 | #include <linux/mount.h> | 67 | #include <linux/mount.h> |
68 | #include <linux/security.h> | 68 | #include <linux/security.h> |
69 | #include <linux/ptrace.h> | 69 | #include <linux/ptrace.h> |
70 | #include <linux/seccomp.h> | ||
71 | #include <linux/cpuset.h> | 70 | #include <linux/cpuset.h> |
72 | #include <linux/audit.h> | 71 | #include <linux/audit.h> |
73 | #include <linux/poll.h> | 72 | #include <linux/poll.h> |
@@ -204,12 +203,17 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) | |||
204 | int res = 0; | 203 | int res = 0; |
205 | struct mm_struct *mm = get_task_mm(task); | 204 | struct mm_struct *mm = get_task_mm(task); |
206 | if (mm) { | 205 | if (mm) { |
207 | unsigned int len = mm->env_end - mm->env_start; | 206 | unsigned int len; |
207 | |||
208 | res = -ESRCH; | ||
209 | if (!ptrace_may_attach(task)) | ||
210 | goto out; | ||
211 | |||
212 | len = mm->env_end - mm->env_start; | ||
208 | if (len > PAGE_SIZE) | 213 | if (len > PAGE_SIZE) |
209 | len = PAGE_SIZE; | 214 | len = PAGE_SIZE; |
210 | res = access_process_vm(task, mm->env_start, buffer, len, 0); | 215 | res = access_process_vm(task, mm->env_start, buffer, len, 0); |
211 | if (!ptrace_may_attach(task)) | 216 | out: |
212 | res = -ESRCH; | ||
213 | mmput(mm); | 217 | mmput(mm); |
214 | } | 218 | } |
215 | return res; | 219 | return res; |
@@ -279,7 +283,7 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) | |||
279 | static int proc_pid_wchan(struct task_struct *task, char *buffer) | 283 | static int proc_pid_wchan(struct task_struct *task, char *buffer) |
280 | { | 284 | { |
281 | unsigned long wchan; | 285 | unsigned long wchan; |
282 | char symname[KSYM_NAME_LEN+1]; | 286 | char symname[KSYM_NAME_LEN]; |
283 | 287 | ||
284 | wchan = get_wchan(task); | 288 | wchan = get_wchan(task); |
285 | 289 | ||
@@ -812,71 +816,6 @@ static const struct file_operations proc_loginuid_operations = { | |||
812 | }; | 816 | }; |
813 | #endif | 817 | #endif |
814 | 818 | ||
815 | #ifdef CONFIG_SECCOMP | ||
816 | static ssize_t seccomp_read(struct file *file, char __user *buf, | ||
817 | size_t count, loff_t *ppos) | ||
818 | { | ||
819 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); | ||
820 | char __buf[20]; | ||
821 | size_t len; | ||
822 | |||
823 | if (!tsk) | ||
824 | return -ESRCH; | ||
825 | /* no need to print the trailing zero, so use only len */ | ||
826 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | ||
827 | put_task_struct(tsk); | ||
828 | |||
829 | return simple_read_from_buffer(buf, count, ppos, __buf, len); | ||
830 | } | ||
831 | |||
832 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | ||
833 | size_t count, loff_t *ppos) | ||
834 | { | ||
835 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); | ||
836 | char __buf[20], *end; | ||
837 | unsigned int seccomp_mode; | ||
838 | ssize_t result; | ||
839 | |||
840 | result = -ESRCH; | ||
841 | if (!tsk) | ||
842 | goto out_no_task; | ||
843 | |||
844 | /* can set it only once to be even more secure */ | ||
845 | result = -EPERM; | ||
846 | if (unlikely(tsk->seccomp.mode)) | ||
847 | goto out; | ||
848 | |||
849 | result = -EFAULT; | ||
850 | memset(__buf, 0, sizeof(__buf)); | ||
851 | count = min(count, sizeof(__buf) - 1); | ||
852 | if (copy_from_user(__buf, buf, count)) | ||
853 | goto out; | ||
854 | |||
855 | seccomp_mode = simple_strtoul(__buf, &end, 0); | ||
856 | if (*end == '\n') | ||
857 | end++; | ||
858 | result = -EINVAL; | ||
859 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
860 | tsk->seccomp.mode = seccomp_mode; | ||
861 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | ||
862 | } else | ||
863 | goto out; | ||
864 | result = -EIO; | ||
865 | if (unlikely(!(end - __buf))) | ||
866 | goto out; | ||
867 | result = end - __buf; | ||
868 | out: | ||
869 | put_task_struct(tsk); | ||
870 | out_no_task: | ||
871 | return result; | ||
872 | } | ||
873 | |||
874 | static const struct file_operations proc_seccomp_operations = { | ||
875 | .read = seccomp_read, | ||
876 | .write = seccomp_write, | ||
877 | }; | ||
878 | #endif /* CONFIG_SECCOMP */ | ||
879 | |||
880 | #ifdef CONFIG_FAULT_INJECTION | 819 | #ifdef CONFIG_FAULT_INJECTION |
881 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, | 820 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, |
882 | size_t count, loff_t *ppos) | 821 | size_t count, loff_t *ppos) |
@@ -2037,9 +1976,6 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2037 | REG("numa_maps", S_IRUGO, numa_maps), | 1976 | REG("numa_maps", S_IRUGO, numa_maps), |
2038 | #endif | 1977 | #endif |
2039 | REG("mem", S_IRUSR|S_IWUSR, mem), | 1978 | REG("mem", S_IRUSR|S_IWUSR, mem), |
2040 | #ifdef CONFIG_SECCOMP | ||
2041 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), | ||
2042 | #endif | ||
2043 | LNK("cwd", cwd), | 1979 | LNK("cwd", cwd), |
2044 | LNK("root", root), | 1980 | LNK("root", root), |
2045 | LNK("exe", exe), | 1981 | LNK("exe", exe), |
@@ -2324,9 +2260,6 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2324 | REG("numa_maps", S_IRUGO, numa_maps), | 2260 | REG("numa_maps", S_IRUGO, numa_maps), |
2325 | #endif | 2261 | #endif |
2326 | REG("mem", S_IRUSR|S_IWUSR, mem), | 2262 | REG("mem", S_IRUSR|S_IWUSR, mem), |
2327 | #ifdef CONFIG_SECCOMP | ||
2328 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), | ||
2329 | #endif | ||
2330 | LNK("cwd", cwd), | 2263 | LNK("cwd", cwd), |
2331 | LNK("root", root), | 2264 | LNK("root", root), |
2332 | LNK("exe", exe), | 2265 | LNK("exe", exe), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8a40e15f5ecb..b5e7155d30d8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/namei.h> | 20 | #include <linux/namei.h> |
21 | #include <linux/bitops.h> | 21 | #include <linux/bitops.h> |
22 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
23 | #include <linux/completion.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | 25 | ||
25 | #include "internal.h" | 26 | #include "internal.h" |
@@ -529,12 +530,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
529 | return -EAGAIN; | 530 | return -EAGAIN; |
530 | dp->low_ino = i; | 531 | dp->low_ino = i; |
531 | 532 | ||
532 | spin_lock(&proc_subdir_lock); | ||
533 | dp->next = dir->subdir; | ||
534 | dp->parent = dir; | ||
535 | dir->subdir = dp; | ||
536 | spin_unlock(&proc_subdir_lock); | ||
537 | |||
538 | if (S_ISDIR(dp->mode)) { | 533 | if (S_ISDIR(dp->mode)) { |
539 | if (dp->proc_iops == NULL) { | 534 | if (dp->proc_iops == NULL) { |
540 | dp->proc_fops = &proc_dir_operations; | 535 | dp->proc_fops = &proc_dir_operations; |
@@ -550,6 +545,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
550 | if (dp->proc_iops == NULL) | 545 | if (dp->proc_iops == NULL) |
551 | dp->proc_iops = &proc_file_inode_operations; | 546 | dp->proc_iops = &proc_file_inode_operations; |
552 | } | 547 | } |
548 | |||
549 | spin_lock(&proc_subdir_lock); | ||
550 | dp->next = dir->subdir; | ||
551 | dp->parent = dir; | ||
552 | dir->subdir = dp; | ||
553 | spin_unlock(&proc_subdir_lock); | ||
554 | |||
553 | return 0; | 555 | return 0; |
554 | } | 556 | } |
555 | 557 | ||
@@ -613,6 +615,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, | |||
613 | ent->namelen = len; | 615 | ent->namelen = len; |
614 | ent->mode = mode; | 616 | ent->mode = mode; |
615 | ent->nlink = nlink; | 617 | ent->nlink = nlink; |
618 | ent->pde_users = 0; | ||
619 | spin_lock_init(&ent->pde_unload_lock); | ||
620 | ent->pde_unload_completion = NULL; | ||
616 | out: | 621 | out: |
617 | return ent; | 622 | return ent; |
618 | } | 623 | } |
@@ -649,9 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, | |||
649 | 654 | ||
650 | ent = proc_create(&parent, name, S_IFDIR | mode, 2); | 655 | ent = proc_create(&parent, name, S_IFDIR | mode, 2); |
651 | if (ent) { | 656 | if (ent) { |
652 | ent->proc_fops = &proc_dir_operations; | ||
653 | ent->proc_iops = &proc_dir_inode_operations; | ||
654 | |||
655 | if (proc_register(parent, ent) < 0) { | 657 | if (proc_register(parent, ent) < 0) { |
656 | kfree(ent); | 658 | kfree(ent); |
657 | ent = NULL; | 659 | ent = NULL; |
@@ -686,10 +688,6 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, | |||
686 | 688 | ||
687 | ent = proc_create(&parent,name,mode,nlink); | 689 | ent = proc_create(&parent,name,mode,nlink); |
688 | if (ent) { | 690 | if (ent) { |
689 | if (S_ISDIR(mode)) { | ||
690 | ent->proc_fops = &proc_dir_operations; | ||
691 | ent->proc_iops = &proc_dir_inode_operations; | ||
692 | } | ||
693 | if (proc_register(parent, ent) < 0) { | 691 | if (proc_register(parent, ent) < 0) { |
694 | kfree(ent); | 692 | kfree(ent); |
695 | ent = NULL; | 693 | ent = NULL; |
@@ -734,9 +732,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
734 | de = *p; | 732 | de = *p; |
735 | *p = de->next; | 733 | *p = de->next; |
736 | de->next = NULL; | 734 | de->next = NULL; |
735 | |||
736 | spin_lock(&de->pde_unload_lock); | ||
737 | /* | ||
738 | * Stop accepting new callers into module. If you're | ||
739 | * dynamically allocating ->proc_fops, save a pointer somewhere. | ||
740 | */ | ||
741 | de->proc_fops = NULL; | ||
742 | /* Wait until all existing callers into module are done. */ | ||
743 | if (de->pde_users > 0) { | ||
744 | DECLARE_COMPLETION_ONSTACK(c); | ||
745 | |||
746 | if (!de->pde_unload_completion) | ||
747 | de->pde_unload_completion = &c; | ||
748 | |||
749 | spin_unlock(&de->pde_unload_lock); | ||
750 | spin_unlock(&proc_subdir_lock); | ||
751 | |||
752 | wait_for_completion(de->pde_unload_completion); | ||
753 | |||
754 | spin_lock(&proc_subdir_lock); | ||
755 | goto continue_removing; | ||
756 | } | ||
757 | spin_unlock(&de->pde_unload_lock); | ||
758 | |||
759 | continue_removing: | ||
737 | if (S_ISDIR(de->mode)) | 760 | if (S_ISDIR(de->mode)) |
738 | parent->nlink--; | 761 | parent->nlink--; |
739 | proc_kill_inodes(de); | 762 | if (!S_ISREG(de->mode)) |
763 | proc_kill_inodes(de); | ||
740 | de->nlink = 0; | 764 | de->nlink = 0; |
741 | WARN_ON(de->subdir); | 765 | WARN_ON(de->subdir); |
742 | if (!atomic_read(&de->count)) | 766 | if (!atomic_read(&de->count)) |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d5ce65c68d7b..dd28e86ab422 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/stat.h> | 12 | #include <linux/stat.h> |
13 | #include <linux/completion.h> | ||
13 | #include <linux/file.h> | 14 | #include <linux/file.h> |
14 | #include <linux/limits.h> | 15 | #include <linux/limits.h> |
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
@@ -140,6 +141,251 @@ static const struct super_operations proc_sops = { | |||
140 | .remount_fs = proc_remount, | 141 | .remount_fs = proc_remount, |
141 | }; | 142 | }; |
142 | 143 | ||
144 | static void pde_users_dec(struct proc_dir_entry *pde) | ||
145 | { | ||
146 | spin_lock(&pde->pde_unload_lock); | ||
147 | pde->pde_users--; | ||
148 | if (pde->pde_unload_completion && pde->pde_users == 0) | ||
149 | complete(pde->pde_unload_completion); | ||
150 | spin_unlock(&pde->pde_unload_lock); | ||
151 | } | ||
152 | |||
153 | static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) | ||
154 | { | ||
155 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
156 | loff_t rv = -EINVAL; | ||
157 | loff_t (*llseek)(struct file *, loff_t, int); | ||
158 | |||
159 | spin_lock(&pde->pde_unload_lock); | ||
160 | /* | ||
161 | * remove_proc_entry() is going to delete PDE (as part of module | ||
162 | * cleanup sequence). No new callers into module allowed. | ||
163 | */ | ||
164 | if (!pde->proc_fops) { | ||
165 | spin_unlock(&pde->pde_unload_lock); | ||
166 | return rv; | ||
167 | } | ||
168 | /* | ||
169 | * Bump refcount so that remove_proc_entry will wail for ->llseek to | ||
170 | * complete. | ||
171 | */ | ||
172 | pde->pde_users++; | ||
173 | /* | ||
174 | * Save function pointer under lock, to protect against ->proc_fops | ||
175 | * NULL'ifying right after ->pde_unload_lock is dropped. | ||
176 | */ | ||
177 | llseek = pde->proc_fops->llseek; | ||
178 | spin_unlock(&pde->pde_unload_lock); | ||
179 | |||
180 | if (!llseek) | ||
181 | llseek = default_llseek; | ||
182 | rv = llseek(file, offset, whence); | ||
183 | |||
184 | pde_users_dec(pde); | ||
185 | return rv; | ||
186 | } | ||
187 | |||
188 | static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
189 | { | ||
190 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
191 | ssize_t rv = -EIO; | ||
192 | ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); | ||
193 | |||
194 | spin_lock(&pde->pde_unload_lock); | ||
195 | if (!pde->proc_fops) { | ||
196 | spin_unlock(&pde->pde_unload_lock); | ||
197 | return rv; | ||
198 | } | ||
199 | pde->pde_users++; | ||
200 | read = pde->proc_fops->read; | ||
201 | spin_unlock(&pde->pde_unload_lock); | ||
202 | |||
203 | if (read) | ||
204 | rv = read(file, buf, count, ppos); | ||
205 | |||
206 | pde_users_dec(pde); | ||
207 | return rv; | ||
208 | } | ||
209 | |||
210 | static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) | ||
211 | { | ||
212 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
213 | ssize_t rv = -EIO; | ||
214 | ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); | ||
215 | |||
216 | spin_lock(&pde->pde_unload_lock); | ||
217 | if (!pde->proc_fops) { | ||
218 | spin_unlock(&pde->pde_unload_lock); | ||
219 | return rv; | ||
220 | } | ||
221 | pde->pde_users++; | ||
222 | write = pde->proc_fops->write; | ||
223 | spin_unlock(&pde->pde_unload_lock); | ||
224 | |||
225 | if (write) | ||
226 | rv = write(file, buf, count, ppos); | ||
227 | |||
228 | pde_users_dec(pde); | ||
229 | return rv; | ||
230 | } | ||
231 | |||
232 | static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) | ||
233 | { | ||
234 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
235 | unsigned int rv = 0; | ||
236 | unsigned int (*poll)(struct file *, struct poll_table_struct *); | ||
237 | |||
238 | spin_lock(&pde->pde_unload_lock); | ||
239 | if (!pde->proc_fops) { | ||
240 | spin_unlock(&pde->pde_unload_lock); | ||
241 | return rv; | ||
242 | } | ||
243 | pde->pde_users++; | ||
244 | poll = pde->proc_fops->poll; | ||
245 | spin_unlock(&pde->pde_unload_lock); | ||
246 | |||
247 | if (poll) | ||
248 | rv = poll(file, pts); | ||
249 | |||
250 | pde_users_dec(pde); | ||
251 | return rv; | ||
252 | } | ||
253 | |||
254 | static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
255 | { | ||
256 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
257 | long rv = -ENOTTY; | ||
258 | long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); | ||
259 | int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); | ||
260 | |||
261 | spin_lock(&pde->pde_unload_lock); | ||
262 | if (!pde->proc_fops) { | ||
263 | spin_unlock(&pde->pde_unload_lock); | ||
264 | return rv; | ||
265 | } | ||
266 | pde->pde_users++; | ||
267 | unlocked_ioctl = pde->proc_fops->unlocked_ioctl; | ||
268 | ioctl = pde->proc_fops->ioctl; | ||
269 | spin_unlock(&pde->pde_unload_lock); | ||
270 | |||
271 | if (unlocked_ioctl) { | ||
272 | rv = unlocked_ioctl(file, cmd, arg); | ||
273 | if (rv == -ENOIOCTLCMD) | ||
274 | rv = -EINVAL; | ||
275 | } else if (ioctl) { | ||
276 | lock_kernel(); | ||
277 | rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); | ||
278 | unlock_kernel(); | ||
279 | } | ||
280 | |||
281 | pde_users_dec(pde); | ||
282 | return rv; | ||
283 | } | ||
284 | |||
285 | #ifdef CONFIG_COMPAT | ||
286 | static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
287 | { | ||
288 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
289 | long rv = -ENOTTY; | ||
290 | long (*compat_ioctl)(struct file *, unsigned int, unsigned long); | ||
291 | |||
292 | spin_lock(&pde->pde_unload_lock); | ||
293 | if (!pde->proc_fops) { | ||
294 | spin_unlock(&pde->pde_unload_lock); | ||
295 | return rv; | ||
296 | } | ||
297 | pde->pde_users++; | ||
298 | compat_ioctl = pde->proc_fops->compat_ioctl; | ||
299 | spin_unlock(&pde->pde_unload_lock); | ||
300 | |||
301 | if (compat_ioctl) | ||
302 | rv = compat_ioctl(file, cmd, arg); | ||
303 | |||
304 | pde_users_dec(pde); | ||
305 | return rv; | ||
306 | } | ||
307 | #endif | ||
308 | |||
309 | static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) | ||
310 | { | ||
311 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
312 | int rv = -EIO; | ||
313 | int (*mmap)(struct file *, struct vm_area_struct *); | ||
314 | |||
315 | spin_lock(&pde->pde_unload_lock); | ||
316 | if (!pde->proc_fops) { | ||
317 | spin_unlock(&pde->pde_unload_lock); | ||
318 | return rv; | ||
319 | } | ||
320 | pde->pde_users++; | ||
321 | mmap = pde->proc_fops->mmap; | ||
322 | spin_unlock(&pde->pde_unload_lock); | ||
323 | |||
324 | if (mmap) | ||
325 | rv = mmap(file, vma); | ||
326 | |||
327 | pde_users_dec(pde); | ||
328 | return rv; | ||
329 | } | ||
330 | |||
331 | static int proc_reg_open(struct inode *inode, struct file *file) | ||
332 | { | ||
333 | struct proc_dir_entry *pde = PDE(inode); | ||
334 | int rv = 0; | ||
335 | int (*open)(struct inode *, struct file *); | ||
336 | |||
337 | spin_lock(&pde->pde_unload_lock); | ||
338 | if (!pde->proc_fops) { | ||
339 | spin_unlock(&pde->pde_unload_lock); | ||
340 | return rv; | ||
341 | } | ||
342 | pde->pde_users++; | ||
343 | open = pde->proc_fops->open; | ||
344 | spin_unlock(&pde->pde_unload_lock); | ||
345 | |||
346 | if (open) | ||
347 | rv = open(inode, file); | ||
348 | |||
349 | pde_users_dec(pde); | ||
350 | return rv; | ||
351 | } | ||
352 | |||
353 | static int proc_reg_release(struct inode *inode, struct file *file) | ||
354 | { | ||
355 | struct proc_dir_entry *pde = PDE(inode); | ||
356 | int rv = 0; | ||
357 | int (*release)(struct inode *, struct file *); | ||
358 | |||
359 | spin_lock(&pde->pde_unload_lock); | ||
360 | if (!pde->proc_fops) { | ||
361 | spin_unlock(&pde->pde_unload_lock); | ||
362 | return rv; | ||
363 | } | ||
364 | pde->pde_users++; | ||
365 | release = pde->proc_fops->release; | ||
366 | spin_unlock(&pde->pde_unload_lock); | ||
367 | |||
368 | if (release) | ||
369 | rv = release(inode, file); | ||
370 | |||
371 | pde_users_dec(pde); | ||
372 | return rv; | ||
373 | } | ||
374 | |||
375 | static const struct file_operations proc_reg_file_ops = { | ||
376 | .llseek = proc_reg_llseek, | ||
377 | .read = proc_reg_read, | ||
378 | .write = proc_reg_write, | ||
379 | .poll = proc_reg_poll, | ||
380 | .unlocked_ioctl = proc_reg_unlocked_ioctl, | ||
381 | #ifdef CONFIG_COMPAT | ||
382 | .compat_ioctl = proc_reg_compat_ioctl, | ||
383 | #endif | ||
384 | .mmap = proc_reg_mmap, | ||
385 | .open = proc_reg_open, | ||
386 | .release = proc_reg_release, | ||
387 | }; | ||
388 | |||
143 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | 389 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, |
144 | struct proc_dir_entry *de) | 390 | struct proc_dir_entry *de) |
145 | { | 391 | { |
@@ -166,8 +412,12 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | |||
166 | inode->i_nlink = de->nlink; | 412 | inode->i_nlink = de->nlink; |
167 | if (de->proc_iops) | 413 | if (de->proc_iops) |
168 | inode->i_op = de->proc_iops; | 414 | inode->i_op = de->proc_iops; |
169 | if (de->proc_fops) | 415 | if (de->proc_fops) { |
170 | inode->i_fop = de->proc_fops; | 416 | if (S_ISREG(inode->i_mode)) |
417 | inode->i_fop = &proc_reg_file_ops; | ||
418 | else | ||
419 | inode->i_fop = de->proc_fops; | ||
420 | } | ||
171 | } | 421 | } |
172 | 422 | ||
173 | return inode; | 423 | return inode; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5fd49e47f83a..d24b8d46059a 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -105,6 +105,7 @@ static int uptime_read_proc(char *page, char **start, off_t off, | |||
105 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); | 105 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); |
106 | 106 | ||
107 | do_posix_clock_monotonic_gettime(&uptime); | 107 | do_posix_clock_monotonic_gettime(&uptime); |
108 | monotonic_to_bootbased(&uptime); | ||
108 | cputime_to_timespec(idletime, &idle); | 109 | cputime_to_timespec(idletime, &idle); |
109 | len = sprintf(page,"%lu.%02lu %lu.%02lu\n", | 110 | len = sprintf(page,"%lu.%02lu %lu.%02lu\n", |
110 | (unsigned long) uptime.tv_sec, | 111 | (unsigned long) uptime.tv_sec, |
@@ -443,12 +444,12 @@ static int show_stat(struct seq_file *p, void *v) | |||
443 | unsigned long jif; | 444 | unsigned long jif; |
444 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | 445 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; |
445 | u64 sum = 0; | 446 | u64 sum = 0; |
447 | struct timespec boottime; | ||
446 | 448 | ||
447 | user = nice = system = idle = iowait = | 449 | user = nice = system = idle = iowait = |
448 | irq = softirq = steal = cputime64_zero; | 450 | irq = softirq = steal = cputime64_zero; |
449 | jif = - wall_to_monotonic.tv_sec; | 451 | getboottime(&boottime); |
450 | if (wall_to_monotonic.tv_nsec) | 452 | jif = boottime.tv_sec; |
451 | --jif; | ||
452 | 453 | ||
453 | for_each_possible_cpu(i) { | 454 | for_each_possible_cpu(i) { |
454 | int j; | 455 | int j; |
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index b3a473b0a191..22846225acfa 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c | |||
@@ -69,7 +69,7 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p, | |||
69 | 69 | ||
70 | static int show_tty_driver(struct seq_file *m, void *v) | 70 | static int show_tty_driver(struct seq_file *m, void *v) |
71 | { | 71 | { |
72 | struct tty_driver *p = v; | 72 | struct tty_driver *p = list_entry(v, struct tty_driver, tty_drivers); |
73 | dev_t from = MKDEV(p->major, p->minor_start); | 73 | dev_t from = MKDEV(p->major, p->minor_start); |
74 | dev_t to = from + p->num; | 74 | dev_t to = from + p->num; |
75 | 75 | ||
@@ -106,22 +106,13 @@ static int show_tty_driver(struct seq_file *m, void *v) | |||
106 | /* iterator */ | 106 | /* iterator */ |
107 | static void *t_start(struct seq_file *m, loff_t *pos) | 107 | static void *t_start(struct seq_file *m, loff_t *pos) |
108 | { | 108 | { |
109 | struct list_head *p; | ||
110 | loff_t l = *pos; | ||
111 | |||
112 | mutex_lock(&tty_mutex); | 109 | mutex_lock(&tty_mutex); |
113 | list_for_each(p, &tty_drivers) | 110 | return seq_list_start(&tty_drivers, *pos); |
114 | if (!l--) | ||
115 | return list_entry(p, struct tty_driver, tty_drivers); | ||
116 | return NULL; | ||
117 | } | 111 | } |
118 | 112 | ||
119 | static void *t_next(struct seq_file *m, void *v, loff_t *pos) | 113 | static void *t_next(struct seq_file *m, void *v, loff_t *pos) |
120 | { | 114 | { |
121 | struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next; | 115 | return seq_list_next(v, &tty_drivers, pos); |
122 | (*pos)++; | ||
123 | return p==&tty_drivers ? NULL : | ||
124 | list_entry(p, struct tty_driver, tty_drivers); | ||
125 | } | 116 | } |
126 | 117 | ||
127 | static void t_stop(struct seq_file *m, void *v) | 118 | static void t_stop(struct seq_file *m, void *v) |
diff --git a/fs/quota.c b/fs/quota.c index 9f237d6182c9..e6577ac15a6c 100644 --- a/fs/quota.c +++ b/fs/quota.c | |||
@@ -10,12 +10,14 @@ | |||
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <asm/current.h> | 11 | #include <asm/current.h> |
12 | #include <asm/uaccess.h> | 12 | #include <asm/uaccess.h> |
13 | #include <linux/compat.h> | ||
13 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
14 | #include <linux/security.h> | 15 | #include <linux/security.h> |
15 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
16 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
17 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
18 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
20 | #include <linux/types.h> | ||
19 | 21 | ||
20 | /* Check validity of generic quotactl commands */ | 22 | /* Check validity of generic quotactl commands */ |
21 | static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) | 23 | static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) |
@@ -384,3 +386,119 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t | |||
384 | 386 | ||
385 | return ret; | 387 | return ret; |
386 | } | 388 | } |
389 | |||
390 | #if defined(CONFIG_X86_64) || defined(CONFIG_IA64) | ||
391 | /* | ||
392 | * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64) | ||
393 | * and is necessary due to alignment problems. | ||
394 | */ | ||
395 | struct compat_if_dqblk { | ||
396 | compat_u64 dqb_bhardlimit; | ||
397 | compat_u64 dqb_bsoftlimit; | ||
398 | compat_u64 dqb_curspace; | ||
399 | compat_u64 dqb_ihardlimit; | ||
400 | compat_u64 dqb_isoftlimit; | ||
401 | compat_u64 dqb_curinodes; | ||
402 | compat_u64 dqb_btime; | ||
403 | compat_u64 dqb_itime; | ||
404 | compat_uint_t dqb_valid; | ||
405 | }; | ||
406 | |||
407 | /* XFS structures */ | ||
408 | struct compat_fs_qfilestat { | ||
409 | compat_u64 dqb_bhardlimit; | ||
410 | compat_u64 qfs_nblks; | ||
411 | compat_uint_t qfs_nextents; | ||
412 | }; | ||
413 | |||
414 | struct compat_fs_quota_stat { | ||
415 | __s8 qs_version; | ||
416 | __u16 qs_flags; | ||
417 | __s8 qs_pad; | ||
418 | struct compat_fs_qfilestat qs_uquota; | ||
419 | struct compat_fs_qfilestat qs_gquota; | ||
420 | compat_uint_t qs_incoredqs; | ||
421 | compat_int_t qs_btimelimit; | ||
422 | compat_int_t qs_itimelimit; | ||
423 | compat_int_t qs_rtbtimelimit; | ||
424 | __u16 qs_bwarnlimit; | ||
425 | __u16 qs_iwarnlimit; | ||
426 | }; | ||
427 | |||
428 | asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, | ||
429 | qid_t id, void __user *addr) | ||
430 | { | ||
431 | unsigned int cmds; | ||
432 | struct if_dqblk __user *dqblk; | ||
433 | struct compat_if_dqblk __user *compat_dqblk; | ||
434 | struct fs_quota_stat __user *fsqstat; | ||
435 | struct compat_fs_quota_stat __user *compat_fsqstat; | ||
436 | compat_uint_t data; | ||
437 | u16 xdata; | ||
438 | long ret; | ||
439 | |||
440 | cmds = cmd >> SUBCMDSHIFT; | ||
441 | |||
442 | switch (cmds) { | ||
443 | case Q_GETQUOTA: | ||
444 | dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); | ||
445 | compat_dqblk = addr; | ||
446 | ret = sys_quotactl(cmd, special, id, dqblk); | ||
447 | if (ret) | ||
448 | break; | ||
449 | if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) || | ||
450 | get_user(data, &dqblk->dqb_valid) || | ||
451 | put_user(data, &compat_dqblk->dqb_valid)) | ||
452 | ret = -EFAULT; | ||
453 | break; | ||
454 | case Q_SETQUOTA: | ||
455 | dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); | ||
456 | compat_dqblk = addr; | ||
457 | ret = -EFAULT; | ||
458 | if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) || | ||
459 | get_user(data, &compat_dqblk->dqb_valid) || | ||
460 | put_user(data, &dqblk->dqb_valid)) | ||
461 | break; | ||
462 | ret = sys_quotactl(cmd, special, id, dqblk); | ||
463 | break; | ||
464 | case Q_XGETQSTAT: | ||
465 | fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat)); | ||
466 | compat_fsqstat = addr; | ||
467 | ret = sys_quotactl(cmd, special, id, fsqstat); | ||
468 | if (ret) | ||
469 | break; | ||
470 | ret = -EFAULT; | ||
471 | /* Copying qs_version, qs_flags, qs_pad */ | ||
472 | if (copy_in_user(compat_fsqstat, fsqstat, | ||
473 | offsetof(struct compat_fs_quota_stat, qs_uquota))) | ||
474 | break; | ||
475 | /* Copying qs_uquota */ | ||
476 | if (copy_in_user(&compat_fsqstat->qs_uquota, | ||
477 | &fsqstat->qs_uquota, | ||
478 | sizeof(compat_fsqstat->qs_uquota)) || | ||
479 | get_user(data, &fsqstat->qs_uquota.qfs_nextents) || | ||
480 | put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents)) | ||
481 | break; | ||
482 | /* Copying qs_gquota */ | ||
483 | if (copy_in_user(&compat_fsqstat->qs_gquota, | ||
484 | &fsqstat->qs_gquota, | ||
485 | sizeof(compat_fsqstat->qs_gquota)) || | ||
486 | get_user(data, &fsqstat->qs_gquota.qfs_nextents) || | ||
487 | put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents)) | ||
488 | break; | ||
489 | /* Copying the rest */ | ||
490 | if (copy_in_user(&compat_fsqstat->qs_incoredqs, | ||
491 | &fsqstat->qs_incoredqs, | ||
492 | sizeof(struct compat_fs_quota_stat) - | ||
493 | offsetof(struct compat_fs_quota_stat, qs_incoredqs)) || | ||
494 | get_user(xdata, &fsqstat->qs_iwarnlimit) || | ||
495 | put_user(xdata, &compat_fsqstat->qs_iwarnlimit)) | ||
496 | break; | ||
497 | ret = 0; | ||
498 | break; | ||
499 | default: | ||
500 | ret = sys_quotactl(cmd, special, id, addr); | ||
501 | } | ||
502 | return ret; | ||
503 | } | ||
504 | #endif | ||
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index d40d22b347b7..ef2b46d099ff 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -60,6 +60,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
60 | inode->i_blocks = 0; | 60 | inode->i_blocks = 0; |
61 | inode->i_mapping->a_ops = &ramfs_aops; | 61 | inode->i_mapping->a_ops = &ramfs_aops; |
62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | ||
63 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 64 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
64 | switch (mode & S_IFMT) { | 65 | switch (mode & S_IFMT) { |
65 | default: | 66 | default: |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 30eebfb1b2d8..2070aeee2a52 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -1305,7 +1305,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t | |||
1305 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && | 1305 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && |
1306 | *ppos + count > MAX_NON_LFS) { | 1306 | *ppos + count > MAX_NON_LFS) { |
1307 | if (*ppos >= MAX_NON_LFS) { | 1307 | if (*ppos >= MAX_NON_LFS) { |
1308 | send_sig(SIGXFSZ, current, 0); | ||
1309 | return -EFBIG; | 1308 | return -EFBIG; |
1310 | } | 1309 | } |
1311 | if (count > MAX_NON_LFS - (unsigned long)*ppos) | 1310 | if (count > MAX_NON_LFS - (unsigned long)*ppos) |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 1272d11399fb..ddde489f1cb2 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/reiserfs_fs.h> | 7 | #include <linux/reiserfs_fs.h> |
8 | #include <linux/reiserfs_acl.h> | 8 | #include <linux/reiserfs_acl.h> |
9 | #include <linux/reiserfs_xattr.h> | 9 | #include <linux/reiserfs_xattr.h> |
10 | #include <linux/exportfs.h> | ||
10 | #include <linux/smp_lock.h> | 11 | #include <linux/smp_lock.h> |
11 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
12 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b4ac9119200e..5a93cfe1a032 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
24 | #include <linux/exportfs.h> | ||
24 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
25 | #include <linux/mnt_namespace.h> | 26 | #include <linux/mnt_namespace.h> |
26 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 49194a4e6b91..bbb19be260ce 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read); | |||
177 | 177 | ||
178 | static int traverse(struct seq_file *m, loff_t offset) | 178 | static int traverse(struct seq_file *m, loff_t offset) |
179 | { | 179 | { |
180 | loff_t pos = 0; | 180 | loff_t pos = 0, index; |
181 | int error = 0; | 181 | int error = 0; |
182 | void *p; | 182 | void *p; |
183 | 183 | ||
184 | m->version = 0; | 184 | m->version = 0; |
185 | m->index = 0; | 185 | index = 0; |
186 | m->count = m->from = 0; | 186 | m->count = m->from = 0; |
187 | if (!offset) | 187 | if (!offset) { |
188 | m->index = index; | ||
188 | return 0; | 189 | return 0; |
190 | } | ||
189 | if (!m->buf) { | 191 | if (!m->buf) { |
190 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); | 192 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); |
191 | if (!m->buf) | 193 | if (!m->buf) |
192 | return -ENOMEM; | 194 | return -ENOMEM; |
193 | } | 195 | } |
194 | p = m->op->start(m, &m->index); | 196 | p = m->op->start(m, &index); |
195 | while (p) { | 197 | while (p) { |
196 | error = PTR_ERR(p); | 198 | error = PTR_ERR(p); |
197 | if (IS_ERR(p)) | 199 | if (IS_ERR(p)) |
@@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
204 | if (pos + m->count > offset) { | 206 | if (pos + m->count > offset) { |
205 | m->from = offset - pos; | 207 | m->from = offset - pos; |
206 | m->count -= m->from; | 208 | m->count -= m->from; |
209 | m->index = index; | ||
207 | break; | 210 | break; |
208 | } | 211 | } |
209 | pos += m->count; | 212 | pos += m->count; |
210 | m->count = 0; | 213 | m->count = 0; |
211 | if (pos == offset) { | 214 | if (pos == offset) { |
212 | m->index++; | 215 | index++; |
216 | m->index = index; | ||
213 | break; | 217 | break; |
214 | } | 218 | } |
215 | p = m->op->next(m, p, &m->index); | 219 | p = m->op->next(m, p, &index); |
216 | } | 220 | } |
217 | m->op->stop(m, p); | 221 | m->op->stop(m, p); |
218 | return error; | 222 | return error; |
@@ -260,8 +264,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin) | |||
260 | } | 264 | } |
261 | } | 265 | } |
262 | } | 266 | } |
263 | mutex_unlock(&m->lock); | ||
264 | file->f_version = m->version; | 267 | file->f_version = m->version; |
268 | mutex_unlock(&m->lock); | ||
265 | return retval; | 269 | return retval; |
266 | } | 270 | } |
267 | EXPORT_SYMBOL(seq_lseek); | 271 | EXPORT_SYMBOL(seq_lseek); |
diff --git a/fs/splice.c b/fs/splice.c index 6c9828651e6f..53fc2082a468 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1061,8 +1061,9 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
1061 | 1061 | ||
1062 | while (len) { | 1062 | while (len) { |
1063 | size_t read_len; | 1063 | size_t read_len; |
1064 | loff_t pos = sd->pos; | ||
1064 | 1065 | ||
1065 | ret = do_splice_to(in, &sd->pos, pipe, len, flags); | 1066 | ret = do_splice_to(in, &pos, pipe, len, flags); |
1066 | if (unlikely(ret <= 0)) | 1067 | if (unlikely(ret <= 0)) |
1067 | goto out_release; | 1068 | goto out_release; |
1068 | 1069 | ||
@@ -1080,6 +1081,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
1080 | 1081 | ||
1081 | bytes += ret; | 1082 | bytes += ret; |
1082 | len -= ret; | 1083 | len -= ret; |
1084 | sd->pos = pos; | ||
1083 | 1085 | ||
1084 | if (ret < read_len) | 1086 | if (ret < read_len) |
1085 | goto out_release; | 1087 | goto out_release; |
diff --git a/fs/super.c b/fs/super.c index 5260d620c555..fc8ebedc6bed 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -884,6 +884,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
884 | error = type->get_sb(type, flags, name, data, mnt); | 884 | error = type->get_sb(type, flags, name, data, mnt); |
885 | if (error < 0) | 885 | if (error < 0) |
886 | goto out_free_secdata; | 886 | goto out_free_secdata; |
887 | BUG_ON(!mnt->mnt_sb); | ||
887 | 888 | ||
888 | error = security_sb_kern_mount(mnt->mnt_sb, secdata); | 889 | error = security_sb_kern_mount(mnt->mnt_sb, secdata); |
889 | if (error) | 890 | if (error) |
diff --git a/fs/udf/crc.c b/fs/udf/crc.c index 1b82a4adc2f7..ef2bfaa19d75 100644 --- a/fs/udf/crc.c +++ b/fs/udf/crc.c | |||
@@ -106,8 +106,8 @@ int main(void) | |||
106 | { | 106 | { |
107 | unsigned short x; | 107 | unsigned short x; |
108 | 108 | ||
109 | x = udf_crc16(bytes, sizeof bytes); | 109 | x = udf_crc(bytes, sizeof bytes); |
110 | printf("udf_crc16: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); | 110 | printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); |
111 | 111 | ||
112 | return 0; | 112 | return 0; |
113 | } | 113 | } |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 8206983f2ebf..10f3188738af 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -50,7 +50,7 @@ void udf_free_inode(struct inode * inode) | |||
50 | else | 50 | else |
51 | UDF_SB_LVIDIU(sb)->numFiles = | 51 | UDF_SB_LVIDIU(sb)->numFiles = |
52 | cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); | 52 | cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); |
53 | 53 | ||
54 | mark_buffer_dirty(sbi->s_lvidbh); | 54 | mark_buffer_dirty(sbi->s_lvidbh); |
55 | } | 55 | } |
56 | mutex_unlock(&sbi->s_alloc_mutex); | 56 | mutex_unlock(&sbi->s_alloc_mutex); |
@@ -136,6 +136,13 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) | |||
136 | UDF_I_EFE(inode) = 0; | 136 | UDF_I_EFE(inode) = 0; |
137 | UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); | 137 | UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); |
138 | } | 138 | } |
139 | if (!UDF_I_DATA(inode)) | ||
140 | { | ||
141 | iput(inode); | ||
142 | *err = -ENOMEM; | ||
143 | mutex_unlock(&sbi->s_alloc_mutex); | ||
144 | return NULL; | ||
145 | } | ||
139 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
140 | UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; | 147 | UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; |
141 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index bf7de0bdbab3..5b82e489af78 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -49,6 +49,7 @@ MODULE_LICENSE("GPL"); | |||
49 | static mode_t udf_convert_permissions(struct fileEntry *); | 49 | static mode_t udf_convert_permissions(struct fileEntry *); |
50 | static int udf_update_inode(struct inode *, int); | 50 | static int udf_update_inode(struct inode *, int); |
51 | static void udf_fill_inode(struct inode *, struct buffer_head *); | 51 | static void udf_fill_inode(struct inode *, struct buffer_head *); |
52 | static int udf_alloc_i_data(struct inode *inode, size_t size); | ||
52 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, | 53 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, |
53 | long *, int *); | 54 | long *, int *); |
54 | static int8_t udf_insert_aext(struct inode *, struct extent_position, | 55 | static int8_t udf_insert_aext(struct inode *, struct extent_position, |
@@ -734,7 +735,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, int newbl | |||
734 | (*c) ++; | 735 | (*c) ++; |
735 | (*endnum) ++; | 736 | (*endnum) ++; |
736 | } | 737 | } |
737 | 738 | ||
738 | laarr[curr].extLocation.logicalBlockNum = newblocknum; | 739 | laarr[curr].extLocation.logicalBlockNum = newblocknum; |
739 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) | 740 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) |
740 | laarr[curr].extLocation.partitionReferenceNum = | 741 | laarr[curr].extLocation.partitionReferenceNum = |
@@ -836,7 +837,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | |||
836 | { | 837 | { |
837 | numalloc -= elen; | 838 | numalloc -= elen; |
838 | if (*endnum > (i+1)) | 839 | if (*endnum > (i+1)) |
839 | memmove(&laarr[i], &laarr[i+1], | 840 | memmove(&laarr[i], &laarr[i+1], |
840 | sizeof(long_ad) * (*endnum - (i+1))); | 841 | sizeof(long_ad) * (*endnum - (i+1))); |
841 | i --; | 842 | i --; |
842 | (*endnum) --; | 843 | (*endnum) --; |
@@ -1024,7 +1025,7 @@ void udf_truncate(struct inode * inode) | |||
1024 | { | 1025 | { |
1025 | block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); | 1026 | block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); |
1026 | udf_truncate_extents(inode); | 1027 | udf_truncate_extents(inode); |
1027 | } | 1028 | } |
1028 | 1029 | ||
1029 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); | 1030 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); |
1030 | if (IS_SYNC(inode)) | 1031 | if (IS_SYNC(inode)) |
@@ -1087,10 +1088,10 @@ __udf_read_inode(struct inode *inode) | |||
1087 | { | 1088 | { |
1088 | kernel_lb_addr loc; | 1089 | kernel_lb_addr loc; |
1089 | ie = (struct indirectEntry *)ibh->b_data; | 1090 | ie = (struct indirectEntry *)ibh->b_data; |
1090 | 1091 | ||
1091 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | 1092 | loc = lelb_to_cpu(ie->indirectICB.extLocation); |
1092 | 1093 | ||
1093 | if (ie->indirectICB.extLength && | 1094 | if (ie->indirectICB.extLength && |
1094 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) | 1095 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) |
1095 | { | 1096 | { |
1096 | if (ident == TAG_IDENT_FE || | 1097 | if (ident == TAG_IDENT_FE || |
@@ -1156,14 +1157,22 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1156 | { | 1157 | { |
1157 | UDF_I_EFE(inode) = 1; | 1158 | UDF_I_EFE(inode) = 1; |
1158 | UDF_I_USE(inode) = 0; | 1159 | UDF_I_USE(inode) = 0; |
1159 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); | 1160 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry))) |
1161 | { | ||
1162 | make_bad_inode(inode); | ||
1163 | return; | ||
1164 | } | ||
1160 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); | 1165 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); |
1161 | } | 1166 | } |
1162 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) | 1167 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) |
1163 | { | 1168 | { |
1164 | UDF_I_EFE(inode) = 0; | 1169 | UDF_I_EFE(inode) = 0; |
1165 | UDF_I_USE(inode) = 0; | 1170 | UDF_I_USE(inode) = 0; |
1166 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); | 1171 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry))) |
1172 | { | ||
1173 | make_bad_inode(inode); | ||
1174 | return; | ||
1175 | } | ||
1167 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); | 1176 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); |
1168 | } | 1177 | } |
1169 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) | 1178 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) |
@@ -1173,7 +1182,11 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1173 | UDF_I_LENALLOC(inode) = | 1182 | UDF_I_LENALLOC(inode) = |
1174 | le32_to_cpu( | 1183 | le32_to_cpu( |
1175 | ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); | 1184 | ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); |
1176 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry), GFP_KERNEL); | 1185 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry))) |
1186 | { | ||
1187 | make_bad_inode(inode); | ||
1188 | return; | ||
1189 | } | ||
1177 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); | 1190 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); |
1178 | return; | 1191 | return; |
1179 | } | 1192 | } |
@@ -1191,7 +1204,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1191 | inode->i_nlink = le16_to_cpu(fe->fileLinkCount); | 1204 | inode->i_nlink = le16_to_cpu(fe->fileLinkCount); |
1192 | if (!inode->i_nlink) | 1205 | if (!inode->i_nlink) |
1193 | inode->i_nlink = 1; | 1206 | inode->i_nlink = 1; |
1194 | 1207 | ||
1195 | inode->i_size = le64_to_cpu(fe->informationLength); | 1208 | inode->i_size = le64_to_cpu(fe->informationLength); |
1196 | UDF_I_LENEXTENTS(inode) = inode->i_size; | 1209 | UDF_I_LENEXTENTS(inode) = inode->i_size; |
1197 | 1210 | ||
@@ -1243,7 +1256,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1243 | } | 1256 | } |
1244 | else | 1257 | else |
1245 | { | 1258 | { |
1246 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << | 1259 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << |
1247 | (inode->i_sb->s_blocksize_bits - 9); | 1260 | (inode->i_sb->s_blocksize_bits - 9); |
1248 | 1261 | ||
1249 | if ( udf_stamp_to_time(&convtime, &convtime_usec, | 1262 | if ( udf_stamp_to_time(&convtime, &convtime_usec, |
@@ -1374,6 +1387,20 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1374 | } | 1387 | } |
1375 | } | 1388 | } |
1376 | 1389 | ||
1390 | static int udf_alloc_i_data(struct inode *inode, size_t size) | ||
1391 | { | ||
1392 | UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL); | ||
1393 | |||
1394 | if (!UDF_I_DATA(inode)) | ||
1395 | { | ||
1396 | printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n", | ||
1397 | inode->i_ino); | ||
1398 | return -ENOMEM; | ||
1399 | } | ||
1400 | |||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1377 | static mode_t | 1404 | static mode_t |
1378 | udf_convert_permissions(struct fileEntry *fe) | 1405 | udf_convert_permissions(struct fileEntry *fe) |
1379 | { | 1406 | { |
@@ -2072,7 +2099,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
2072 | mark_buffer_dirty_inode(oepos.bh, inode); | 2099 | mark_buffer_dirty_inode(oepos.bh, inode); |
2073 | } | 2100 | } |
2074 | } | 2101 | } |
2075 | 2102 | ||
2076 | brelse(epos.bh); | 2103 | brelse(epos.bh); |
2077 | brelse(oepos.bh); | 2104 | brelse(oepos.bh); |
2078 | return (elen >> 30); | 2105 | return (elen >> 30); |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 22ff6ed55ce9..2b3011689e89 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -87,6 +87,7 @@ | |||
87 | #include <linux/smp_lock.h> | 87 | #include <linux/smp_lock.h> |
88 | #include <linux/buffer_head.h> | 88 | #include <linux/buffer_head.h> |
89 | #include <linux/vfs.h> | 89 | #include <linux/vfs.h> |
90 | #include <linux/log2.h> | ||
90 | 91 | ||
91 | #include "swab.h" | 92 | #include "swab.h" |
92 | #include "util.h" | 93 | #include "util.h" |
@@ -854,7 +855,7 @@ magic_found: | |||
854 | uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); | 855 | uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); |
855 | uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); | 856 | uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); |
856 | 857 | ||
857 | if (uspi->s_fsize & (uspi->s_fsize - 1)) { | 858 | if (!is_power_of_2(uspi->s_fsize)) { |
858 | printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", | 859 | printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", |
859 | uspi->s_fsize); | 860 | uspi->s_fsize); |
860 | goto failed; | 861 | goto failed; |
@@ -869,7 +870,7 @@ magic_found: | |||
869 | uspi->s_fsize); | 870 | uspi->s_fsize); |
870 | goto failed; | 871 | goto failed; |
871 | } | 872 | } |
872 | if (uspi->s_bsize & (uspi->s_bsize - 1)) { | 873 | if (!is_power_of_2(uspi->s_bsize)) { |
873 | printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", | 874 | printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", |
874 | uspi->s_bsize); | 875 | uspi->s_bsize); |
875 | goto failed; | 876 | goto failed; |
diff --git a/fs/utimes.c b/fs/utimes.c index b3c88952465f..83a7e69e706c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -106,7 +106,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
106 | if (IS_IMMUTABLE(inode)) | 106 | if (IS_IMMUTABLE(inode)) |
107 | goto dput_and_out; | 107 | goto dput_and_out; |
108 | 108 | ||
109 | if (current->fsuid != inode->i_uid) { | 109 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) { |
110 | if (f) { | 110 | if (f) { |
111 | if (!(f->f_mode & FMODE_WRITE)) | 111 | if (!(f->f_mode & FMODE_WRITE)) |
112 | goto dput_and_out; | 112 | goto dput_and_out; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2df63622354e..b0f0e58866de 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -35,10 +35,13 @@ | |||
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | 36 | ||
37 | static kmem_zone_t *xfs_buf_zone; | 37 | static kmem_zone_t *xfs_buf_zone; |
38 | static struct shrinker *xfs_buf_shake; | ||
39 | STATIC int xfsbufd(void *); | 38 | STATIC int xfsbufd(void *); |
40 | STATIC int xfsbufd_wakeup(int, gfp_t); | 39 | STATIC int xfsbufd_wakeup(int, gfp_t); |
41 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 40 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
41 | static struct shrinker xfs_buf_shake = { | ||
42 | .shrink = xfsbufd_wakeup, | ||
43 | .seeks = DEFAULT_SEEKS, | ||
44 | }; | ||
42 | 45 | ||
43 | static struct workqueue_struct *xfslogd_workqueue; | 46 | static struct workqueue_struct *xfslogd_workqueue; |
44 | struct workqueue_struct *xfsdatad_workqueue; | 47 | struct workqueue_struct *xfsdatad_workqueue; |
@@ -1832,14 +1835,9 @@ xfs_buf_init(void) | |||
1832 | if (!xfsdatad_workqueue) | 1835 | if (!xfsdatad_workqueue) |
1833 | goto out_destroy_xfslogd_workqueue; | 1836 | goto out_destroy_xfslogd_workqueue; |
1834 | 1837 | ||
1835 | xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup); | 1838 | register_shrinker(&xfs_buf_shake); |
1836 | if (!xfs_buf_shake) | ||
1837 | goto out_destroy_xfsdatad_workqueue; | ||
1838 | |||
1839 | return 0; | 1839 | return 0; |
1840 | 1840 | ||
1841 | out_destroy_xfsdatad_workqueue: | ||
1842 | destroy_workqueue(xfsdatad_workqueue); | ||
1843 | out_destroy_xfslogd_workqueue: | 1841 | out_destroy_xfslogd_workqueue: |
1844 | destroy_workqueue(xfslogd_workqueue); | 1842 | destroy_workqueue(xfslogd_workqueue); |
1845 | out_free_buf_zone: | 1843 | out_free_buf_zone: |
@@ -1854,7 +1852,7 @@ xfs_buf_init(void) | |||
1854 | void | 1852 | void |
1855 | xfs_buf_terminate(void) | 1853 | xfs_buf_terminate(void) |
1856 | { | 1854 | { |
1857 | remove_shrinker(xfs_buf_shake); | 1855 | unregister_shrinker(&xfs_buf_shake); |
1858 | destroy_workqueue(xfsdatad_workqueue); | 1856 | destroy_workqueue(xfsdatad_workqueue); |
1859 | destroy_workqueue(xfslogd_workqueue); | 1857 | destroy_workqueue(xfslogd_workqueue); |
1860 | kmem_zone_destroy(xfs_buf_zone); | 1858 | kmem_zone_destroy(xfs_buf_zone); |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 06894cf00b12..4528f9a3f304 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -562,6 +562,7 @@ xfssyncd( | |||
562 | bhv_vfs_sync_work_t *work, *n; | 562 | bhv_vfs_sync_work_t *work, *n; |
563 | LIST_HEAD (tmp); | 563 | LIST_HEAD (tmp); |
564 | 564 | ||
565 | set_freezable(); | ||
565 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | 566 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); |
566 | for (;;) { | 567 | for (;;) { |
567 | timeleft = schedule_timeout_interruptible(timeleft); | 568 | timeleft = schedule_timeout_interruptible(timeleft); |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 33dd1ca13245..201cc3273c84 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #ifndef __XFS_SUPER_H__ | 18 | #ifndef __XFS_SUPER_H__ |
19 | #define __XFS_SUPER_H__ | 19 | #define __XFS_SUPER_H__ |
20 | 20 | ||
21 | #include <linux/exportfs.h> | ||
22 | |||
21 | #ifdef CONFIG_XFS_DMAPI | 23 | #ifdef CONFIG_XFS_DMAPI |
22 | # define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) | 24 | # define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) |
23 | # define vfs_initdmapi() dmapi_init() | 25 | # define vfs_initdmapi() dmapi_init() |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 7def4c699343..2d274b23ade5 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -62,7 +62,6 @@ uint ndquot; | |||
62 | 62 | ||
63 | kmem_zone_t *qm_dqzone; | 63 | kmem_zone_t *qm_dqzone; |
64 | kmem_zone_t *qm_dqtrxzone; | 64 | kmem_zone_t *qm_dqtrxzone; |
65 | static struct shrinker *xfs_qm_shaker; | ||
66 | 65 | ||
67 | static cred_t xfs_zerocr; | 66 | static cred_t xfs_zerocr; |
68 | 67 | ||
@@ -78,6 +77,11 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | |||
78 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 77 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
79 | STATIC int xfs_qm_shake(int, gfp_t); | 78 | STATIC int xfs_qm_shake(int, gfp_t); |
80 | 79 | ||
80 | static struct shrinker xfs_qm_shaker = { | ||
81 | .shrink = xfs_qm_shake, | ||
82 | .seeks = DEFAULT_SEEKS, | ||
83 | }; | ||
84 | |||
81 | #ifdef DEBUG | 85 | #ifdef DEBUG |
82 | extern mutex_t qcheck_lock; | 86 | extern mutex_t qcheck_lock; |
83 | #endif | 87 | #endif |
@@ -149,7 +153,7 @@ xfs_Gqm_init(void) | |||
149 | } else | 153 | } else |
150 | xqm->qm_dqzone = qm_dqzone; | 154 | xqm->qm_dqzone = qm_dqzone; |
151 | 155 | ||
152 | xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake); | 156 | register_shrinker(&xfs_qm_shaker); |
153 | 157 | ||
154 | /* | 158 | /* |
155 | * The t_dqinfo portion of transactions. | 159 | * The t_dqinfo portion of transactions. |
@@ -181,7 +185,7 @@ xfs_qm_destroy( | |||
181 | 185 | ||
182 | ASSERT(xqm != NULL); | 186 | ASSERT(xqm != NULL); |
183 | ASSERT(xqm->qm_nrefs == 0); | 187 | ASSERT(xqm->qm_nrefs == 0); |
184 | remove_shrinker(xfs_qm_shaker); | 188 | unregister_shrinker(&xfs_qm_shaker); |
185 | hsize = xqm->qm_dqhashmask + 1; | 189 | hsize = xqm->qm_dqhashmask + 1; |
186 | for (i = 0; i < hsize; i++) { | 190 | for (i = 0; i < hsize; i++) { |
187 | xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); | 191 | xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); |