diff options
Diffstat (limited to 'fs')
153 files changed, 7910 insertions, 2292 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 94b9d861bf9b..613df554728d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -991,7 +991,7 @@ config TMPFS_POSIX_ACL | |||
| 991 | 991 | ||
| 992 | config HUGETLBFS | 992 | config HUGETLBFS |
| 993 | bool "HugeTLB file system support" | 993 | bool "HugeTLB file system support" |
| 994 | depends on X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN | 994 | depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN |
| 995 | help | 995 | help |
| 996 | hugetlbfs is a filesystem backing for HugeTLB pages, based on | 996 | hugetlbfs is a filesystem backing for HugeTLB pages, based on |
| 997 | ramfs. For architectures that support it, say Y here and read | 997 | ramfs. For architectures that support it, say Y here and read |
| @@ -1675,6 +1675,7 @@ config NFSD_V3_ACL | |||
| 1675 | config NFSD_V4 | 1675 | config NFSD_V4 |
| 1676 | bool "Provide NFSv4 server support (EXPERIMENTAL)" | 1676 | bool "Provide NFSv4 server support (EXPERIMENTAL)" |
| 1677 | depends on NFSD_V3 && EXPERIMENTAL | 1677 | depends on NFSD_V3 && EXPERIMENTAL |
| 1678 | select RPCSEC_GSS_KRB5 | ||
| 1678 | help | 1679 | help |
| 1679 | If you would like to include the NFSv4 server as well as the NFSv2 | 1680 | If you would like to include the NFSv4 server as well as the NFSv2 |
| 1680 | and NFSv3 servers, say Y here. This feature is experimental, and | 1681 | and NFSv3 servers, say Y here. This feature is experimental, and |
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 73ce561f3ea0..a66671082cfb 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile | |||
| @@ -8,6 +8,7 @@ kafs-objs := \ | |||
| 8 | cmservice.o \ | 8 | cmservice.o \ |
| 9 | dir.o \ | 9 | dir.o \ |
| 10 | file.o \ | 10 | file.o \ |
| 11 | flock.o \ | ||
| 11 | fsclient.o \ | 12 | fsclient.o \ |
| 12 | inode.o \ | 13 | inode.o \ |
| 13 | main.o \ | 14 | main.o \ |
diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 245257948140..c548aa346f0d 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h | |||
| @@ -37,6 +37,13 @@ typedef enum { | |||
| 37 | AFS_FTYPE_SYMLINK = 3, | 37 | AFS_FTYPE_SYMLINK = 3, |
| 38 | } afs_file_type_t; | 38 | } afs_file_type_t; |
| 39 | 39 | ||
| 40 | typedef enum { | ||
| 41 | AFS_LOCK_READ = 0, /* read lock request */ | ||
| 42 | AFS_LOCK_WRITE = 1, /* write lock request */ | ||
| 43 | } afs_lock_type_t; | ||
| 44 | |||
| 45 | #define AFS_LOCKWAIT (5 * 60) /* time until a lock times out (seconds) */ | ||
| 46 | |||
| 40 | /* | 47 | /* |
| 41 | * AFS file identifier | 48 | * AFS file identifier |
| 42 | */ | 49 | */ |
| @@ -120,6 +127,7 @@ struct afs_file_status { | |||
| 120 | struct afs_fid parent; /* parent dir ID for non-dirs only */ | 127 | struct afs_fid parent; /* parent dir ID for non-dirs only */ |
| 121 | time_t mtime_client; /* last time client changed data */ | 128 | time_t mtime_client; /* last time client changed data */ |
| 122 | time_t mtime_server; /* last time server changed data */ | 129 | time_t mtime_server; /* last time server changed data */ |
| 130 | s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ | ||
| 123 | }; | 131 | }; |
| 124 | 132 | ||
| 125 | /* | 133 | /* |
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index a18c374ebe08..eb647323d8f0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h | |||
| @@ -31,6 +31,9 @@ enum AFS_FS_Operations { | |||
| 31 | FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ | 31 | FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ |
| 32 | FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ | 32 | FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ |
| 33 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ | 33 | FSGETROOTVOLUME = 151, /* AFS Get root volume name */ |
| 34 | FSSETLOCK = 156, /* AFS Request a file lock */ | ||
| 35 | FSEXTENDLOCK = 157, /* AFS Extend a file lock */ | ||
| 36 | FSRELEASELOCK = 158, /* AFS Release a file lock */ | ||
| 34 | FSLOOKUP = 161, /* AFS lookup file in directory */ | 37 | FSLOOKUP = 161, /* AFS lookup file in directory */ |
| 35 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ | 38 | FSFETCHDATA64 = 65537, /* AFS Fetch file data */ |
| 36 | FSSTOREDATA64 = 65538, /* AFS Store file data */ | 39 | FSSTOREDATA64 = 65538, /* AFS Store file data */ |
diff --git a/fs/afs/callback.c b/fs/afs/callback.c index bacf518c6fa8..b8243945818d 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c | |||
| @@ -125,6 +125,9 @@ static void afs_break_callback(struct afs_server *server, | |||
| 125 | spin_unlock(&server->cb_lock); | 125 | spin_unlock(&server->cb_lock); |
| 126 | 126 | ||
| 127 | queue_work(afs_callback_update_worker, &vnode->cb_broken_work); | 127 | queue_work(afs_callback_update_worker, &vnode->cb_broken_work); |
| 128 | if (list_empty(&vnode->granted_locks) && | ||
| 129 | !list_empty(&vnode->pending_locks)) | ||
| 130 | afs_lock_may_be_available(vnode); | ||
| 128 | spin_unlock(&vnode->lock); | 131 | spin_unlock(&vnode->lock); |
| 129 | } | 132 | } |
| 130 | } | 133 | } |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 546c59522eb1..33fe39ad4e03 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
| @@ -44,6 +44,7 @@ const struct file_operations afs_dir_file_operations = { | |||
| 44 | .open = afs_dir_open, | 44 | .open = afs_dir_open, |
| 45 | .release = afs_release, | 45 | .release = afs_release, |
| 46 | .readdir = afs_readdir, | 46 | .readdir = afs_readdir, |
| 47 | .lock = afs_lock, | ||
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | const struct inode_operations afs_dir_inode_operations = { | 50 | const struct inode_operations afs_dir_inode_operations = { |
diff --git a/fs/afs/file.c b/fs/afs/file.c index aede7eb66dd4..525f7c56e068 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
| @@ -34,6 +34,8 @@ const struct file_operations afs_file_operations = { | |||
| 34 | .mmap = generic_file_readonly_mmap, | 34 | .mmap = generic_file_readonly_mmap, |
| 35 | .splice_read = generic_file_splice_read, | 35 | .splice_read = generic_file_splice_read, |
| 36 | .fsync = afs_fsync, | 36 | .fsync = afs_fsync, |
| 37 | .lock = afs_lock, | ||
| 38 | .flock = afs_flock, | ||
| 37 | }; | 39 | }; |
| 38 | 40 | ||
| 39 | const struct inode_operations afs_file_inode_operations = { | 41 | const struct inode_operations afs_file_inode_operations = { |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c new file mode 100644 index 000000000000..8f07f8d1bfa9 --- /dev/null +++ b/fs/afs/flock.c | |||
| @@ -0,0 +1,558 @@ | |||
| 1 | /* AFS file locking support | ||
| 2 | * | ||
| 3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/smp_lock.h> | ||
| 13 | #include "internal.h" | ||
| 14 | |||
| 15 | #define AFS_LOCK_GRANTED 0 | ||
| 16 | #define AFS_LOCK_PENDING 1 | ||
| 17 | |||
| 18 | static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); | ||
| 19 | static void afs_fl_release_private(struct file_lock *fl); | ||
| 20 | |||
| 21 | static struct workqueue_struct *afs_lock_manager; | ||
| 22 | |||
| 23 | static struct file_lock_operations afs_lock_ops = { | ||
| 24 | .fl_copy_lock = afs_fl_copy_lock, | ||
| 25 | .fl_release_private = afs_fl_release_private, | ||
| 26 | }; | ||
| 27 | |||
| 28 | /* | ||
| 29 | * initialise the lock manager thread if it isn't already running | ||
| 30 | */ | ||
| 31 | static int afs_init_lock_manager(void) | ||
| 32 | { | ||
| 33 | if (!afs_lock_manager) { | ||
| 34 | afs_lock_manager = create_singlethread_workqueue("kafs_lockd"); | ||
| 35 | if (!afs_lock_manager) | ||
| 36 | return -ENOMEM; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | /* | ||
| 42 | * destroy the lock manager thread if it's running | ||
| 43 | */ | ||
| 44 | void __exit afs_kill_lock_manager(void) | ||
| 45 | { | ||
| 46 | if (afs_lock_manager) | ||
| 47 | destroy_workqueue(afs_lock_manager); | ||
| 48 | } | ||
| 49 | |||
| 50 | /* | ||
| 51 | * if the callback is broken on this vnode, then the lock may now be available | ||
| 52 | */ | ||
| 53 | void afs_lock_may_be_available(struct afs_vnode *vnode) | ||
| 54 | { | ||
| 55 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); | ||
| 56 | |||
| 57 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* | ||
| 61 | * the lock will time out in 5 minutes unless we extend it, so schedule | ||
| 62 | * extension in a bit less than that time | ||
| 63 | */ | ||
| 64 | static void afs_schedule_lock_extension(struct afs_vnode *vnode) | ||
| 65 | { | ||
| 66 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, | ||
| 67 | AFS_LOCKWAIT * HZ / 2); | ||
| 68 | } | ||
| 69 | |||
| 70 | /* | ||
| 71 | * do work for a lock, including: | ||
| 72 | * - probing for a lock we're waiting on but didn't get immediately | ||
| 73 | * - extending a lock that's close to timing out | ||
| 74 | */ | ||
| 75 | void afs_lock_work(struct work_struct *work) | ||
| 76 | { | ||
| 77 | struct afs_vnode *vnode = | ||
| 78 | container_of(work, struct afs_vnode, lock_work.work); | ||
| 79 | struct file_lock *fl; | ||
| 80 | afs_lock_type_t type; | ||
| 81 | struct key *key; | ||
| 82 | int ret; | ||
| 83 | |||
| 84 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); | ||
| 85 | |||
| 86 | spin_lock(&vnode->lock); | ||
| 87 | |||
| 88 | if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) { | ||
| 89 | _debug("unlock"); | ||
| 90 | spin_unlock(&vnode->lock); | ||
| 91 | |||
| 92 | /* attempt to release the server lock; if it fails, we just | ||
| 93 | * wait 5 minutes and it'll time out anyway */ | ||
| 94 | ret = afs_vnode_release_lock(vnode, vnode->unlock_key); | ||
| 95 | if (ret < 0) | ||
| 96 | printk(KERN_WARNING "AFS:" | ||
| 97 | " Failed to release lock on {%x:%x} error %d\n", | ||
| 98 | vnode->fid.vid, vnode->fid.vnode, ret); | ||
| 99 | |||
| 100 | spin_lock(&vnode->lock); | ||
| 101 | key_put(vnode->unlock_key); | ||
| 102 | vnode->unlock_key = NULL; | ||
| 103 | clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags); | ||
| 104 | } | ||
| 105 | |||
| 106 | /* if we've got a lock, then it must be time to extend that lock as AFS | ||
| 107 | * locks time out after 5 minutes */ | ||
| 108 | if (!list_empty(&vnode->granted_locks)) { | ||
| 109 | _debug("extend"); | ||
| 110 | |||
| 111 | if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) | ||
| 112 | BUG(); | ||
| 113 | fl = list_entry(vnode->granted_locks.next, | ||
| 114 | struct file_lock, fl_u.afs.link); | ||
| 115 | key = key_get(fl->fl_file->private_data); | ||
| 116 | spin_unlock(&vnode->lock); | ||
| 117 | |||
| 118 | ret = afs_vnode_extend_lock(vnode, key); | ||
| 119 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
| 120 | key_put(key); | ||
| 121 | switch (ret) { | ||
| 122 | case 0: | ||
| 123 | afs_schedule_lock_extension(vnode); | ||
| 124 | break; | ||
| 125 | default: | ||
| 126 | /* ummm... we failed to extend the lock - retry | ||
| 127 | * extension shortly */ | ||
| 128 | printk(KERN_WARNING "AFS:" | ||
| 129 | " Failed to extend lock on {%x:%x} error %d\n", | ||
| 130 | vnode->fid.vid, vnode->fid.vnode, ret); | ||
| 131 | queue_delayed_work(afs_lock_manager, &vnode->lock_work, | ||
| 132 | HZ * 10); | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | _leave(" [extend]"); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | |||
| 139 | /* if we don't have a granted lock, then we must've been called back by | ||
| 140 | * the server, and so if might be possible to get a lock we're | ||
| 141 | * currently waiting for */ | ||
| 142 | if (!list_empty(&vnode->pending_locks)) { | ||
| 143 | _debug("get"); | ||
| 144 | |||
| 145 | if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) | ||
| 146 | BUG(); | ||
| 147 | fl = list_entry(vnode->pending_locks.next, | ||
| 148 | struct file_lock, fl_u.afs.link); | ||
| 149 | key = key_get(fl->fl_file->private_data); | ||
| 150 | type = (fl->fl_type == F_RDLCK) ? | ||
| 151 | AFS_LOCK_READ : AFS_LOCK_WRITE; | ||
| 152 | spin_unlock(&vnode->lock); | ||
| 153 | |||
| 154 | ret = afs_vnode_set_lock(vnode, key, type); | ||
| 155 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
| 156 | switch (ret) { | ||
| 157 | case -EWOULDBLOCK: | ||
| 158 | _debug("blocked"); | ||
| 159 | break; | ||
| 160 | case 0: | ||
| 161 | _debug("acquired"); | ||
| 162 | if (type == AFS_LOCK_READ) | ||
| 163 | set_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
| 164 | else | ||
| 165 | set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
| 166 | ret = AFS_LOCK_GRANTED; | ||
| 167 | default: | ||
| 168 | spin_lock(&vnode->lock); | ||
| 169 | /* the pending lock may have been withdrawn due to a | ||
| 170 | * signal */ | ||
| 171 | if (list_entry(vnode->pending_locks.next, | ||
| 172 | struct file_lock, fl_u.afs.link) == fl) { | ||
| 173 | fl->fl_u.afs.state = ret; | ||
| 174 | if (ret == AFS_LOCK_GRANTED) | ||
| 175 | list_move_tail(&fl->fl_u.afs.link, | ||
| 176 | &vnode->granted_locks); | ||
| 177 | else | ||
| 178 | list_del_init(&fl->fl_u.afs.link); | ||
| 179 | wake_up(&fl->fl_wait); | ||
| 180 | spin_unlock(&vnode->lock); | ||
| 181 | } else { | ||
| 182 | _debug("withdrawn"); | ||
| 183 | clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
| 184 | clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
| 185 | spin_unlock(&vnode->lock); | ||
| 186 | afs_vnode_release_lock(vnode, key); | ||
| 187 | if (!list_empty(&vnode->pending_locks)) | ||
| 188 | afs_lock_may_be_available(vnode); | ||
| 189 | } | ||
| 190 | break; | ||
| 191 | } | ||
| 192 | key_put(key); | ||
| 193 | _leave(" [pend]"); | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | |||
| 197 | /* looks like the lock request was withdrawn on a signal */ | ||
| 198 | spin_unlock(&vnode->lock); | ||
| 199 | _leave(" [no locks]"); | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * pass responsibility for the unlocking of a vnode on the server to the | ||
| 204 | * manager thread, lest a pending signal in the calling thread interrupt | ||
| 205 | * AF_RXRPC | ||
| 206 | * - the caller must hold the vnode lock | ||
| 207 | */ | ||
| 208 | static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) | ||
| 209 | { | ||
| 210 | cancel_delayed_work(&vnode->lock_work); | ||
| 211 | if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) && | ||
| 212 | !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) | ||
| 213 | BUG(); | ||
| 214 | if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) | ||
| 215 | BUG(); | ||
| 216 | vnode->unlock_key = key_get(key); | ||
| 217 | afs_lock_may_be_available(vnode); | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * request a lock on a file on the server | ||
| 222 | */ | ||
| 223 | static int afs_do_setlk(struct file *file, struct file_lock *fl) | ||
| 224 | { | ||
| 225 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
| 226 | afs_lock_type_t type; | ||
| 227 | struct key *key = file->private_data; | ||
| 228 | int ret; | ||
| 229 | |||
| 230 | _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); | ||
| 231 | |||
| 232 | /* only whole-file locks are supported */ | ||
| 233 | if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) | ||
| 234 | return -EINVAL; | ||
| 235 | |||
| 236 | ret = afs_init_lock_manager(); | ||
| 237 | if (ret < 0) | ||
| 238 | return ret; | ||
| 239 | |||
| 240 | fl->fl_ops = &afs_lock_ops; | ||
| 241 | INIT_LIST_HEAD(&fl->fl_u.afs.link); | ||
| 242 | fl->fl_u.afs.state = AFS_LOCK_PENDING; | ||
| 243 | |||
| 244 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; | ||
| 245 | |||
| 246 | lock_kernel(); | ||
| 247 | |||
| 248 | /* make sure we've got a callback on this file and that our view of the | ||
| 249 | * data version is up to date */ | ||
| 250 | ret = afs_vnode_fetch_status(vnode, NULL, key); | ||
| 251 | if (ret < 0) | ||
| 252 | goto error; | ||
| 253 | |||
| 254 | if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) { | ||
| 255 | ret = -EAGAIN; | ||
| 256 | goto error; | ||
| 257 | } | ||
| 258 | |||
| 259 | spin_lock(&vnode->lock); | ||
| 260 | |||
| 261 | if (list_empty(&vnode->pending_locks)) { | ||
| 262 | /* if there's no-one else with a lock on this vnode, then we | ||
| 263 | * need to ask the server for a lock */ | ||
| 264 | if (list_empty(&vnode->granted_locks)) { | ||
| 265 | _debug("not locked"); | ||
| 266 | ASSERTCMP(vnode->flags & | ||
| 267 | ((1 << AFS_VNODE_LOCKING) | | ||
| 268 | (1 << AFS_VNODE_READLOCKED) | | ||
| 269 | (1 << AFS_VNODE_WRITELOCKED)), ==, 0); | ||
| 270 | list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); | ||
| 271 | set_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
| 272 | spin_unlock(&vnode->lock); | ||
| 273 | |||
| 274 | ret = afs_vnode_set_lock(vnode, key, type); | ||
| 275 | clear_bit(AFS_VNODE_LOCKING, &vnode->flags); | ||
| 276 | switch (ret) { | ||
| 277 | case 0: | ||
| 278 | goto acquired_server_lock; | ||
| 279 | case -EWOULDBLOCK: | ||
| 280 | spin_lock(&vnode->lock); | ||
| 281 | ASSERT(list_empty(&vnode->granted_locks)); | ||
| 282 | ASSERTCMP(vnode->pending_locks.next, ==, | ||
| 283 | &fl->fl_u.afs.link); | ||
| 284 | goto wait; | ||
| 285 | default: | ||
| 286 | spin_lock(&vnode->lock); | ||
| 287 | list_del_init(&fl->fl_u.afs.link); | ||
| 288 | spin_unlock(&vnode->lock); | ||
| 289 | goto error; | ||
| 290 | } | ||
| 291 | } | ||
| 292 | |||
| 293 | /* if we've already got a readlock on the server and no waiting | ||
| 294 | * writelocks, then we might be able to instantly grant another | ||
| 295 | * readlock */ | ||
| 296 | if (type == AFS_LOCK_READ && | ||
| 297 | vnode->flags & (1 << AFS_VNODE_READLOCKED)) { | ||
| 298 | _debug("instant readlock"); | ||
| 299 | ASSERTCMP(vnode->flags & | ||
| 300 | ((1 << AFS_VNODE_LOCKING) | | ||
| 301 | (1 << AFS_VNODE_WRITELOCKED)), ==, 0); | ||
| 302 | ASSERT(!list_empty(&vnode->granted_locks)); | ||
| 303 | goto sharing_existing_lock; | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | /* otherwise, we need to wait for a local lock to become available */ | ||
| 308 | _debug("wait local"); | ||
| 309 | list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); | ||
| 310 | wait: | ||
| 311 | if (!(fl->fl_flags & FL_SLEEP)) { | ||
| 312 | _debug("noblock"); | ||
| 313 | ret = -EAGAIN; | ||
| 314 | goto abort_attempt; | ||
| 315 | } | ||
| 316 | spin_unlock(&vnode->lock); | ||
| 317 | |||
| 318 | /* now we need to sleep and wait for the lock manager thread to get the | ||
| 319 | * lock from the server */ | ||
| 320 | _debug("sleep"); | ||
| 321 | ret = wait_event_interruptible(fl->fl_wait, | ||
| 322 | fl->fl_u.afs.state <= AFS_LOCK_GRANTED); | ||
| 323 | if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) { | ||
| 324 | ret = fl->fl_u.afs.state; | ||
| 325 | if (ret < 0) | ||
| 326 | goto error; | ||
| 327 | spin_lock(&vnode->lock); | ||
| 328 | goto given_lock; | ||
| 329 | } | ||
| 330 | |||
| 331 | /* we were interrupted, but someone may still be in the throes of | ||
| 332 | * giving us the lock */ | ||
| 333 | _debug("intr"); | ||
| 334 | ASSERTCMP(ret, ==, -ERESTARTSYS); | ||
| 335 | |||
| 336 | spin_lock(&vnode->lock); | ||
| 337 | if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) { | ||
| 338 | ret = fl->fl_u.afs.state; | ||
| 339 | if (ret < 0) { | ||
| 340 | spin_unlock(&vnode->lock); | ||
| 341 | goto error; | ||
| 342 | } | ||
| 343 | goto given_lock; | ||
| 344 | } | ||
| 345 | |||
| 346 | abort_attempt: | ||
| 347 | /* we aren't going to get the lock, either because we're unwilling to | ||
| 348 | * wait, or because some signal happened */ | ||
| 349 | _debug("abort"); | ||
| 350 | if (list_empty(&vnode->granted_locks) && | ||
| 351 | vnode->pending_locks.next == &fl->fl_u.afs.link) { | ||
| 352 | if (vnode->pending_locks.prev != &fl->fl_u.afs.link) { | ||
| 353 | /* kick the next pending lock into having a go */ | ||
| 354 | list_del_init(&fl->fl_u.afs.link); | ||
| 355 | afs_lock_may_be_available(vnode); | ||
| 356 | } | ||
| 357 | } else { | ||
| 358 | list_del_init(&fl->fl_u.afs.link); | ||
| 359 | } | ||
| 360 | spin_unlock(&vnode->lock); | ||
| 361 | goto error; | ||
| 362 | |||
| 363 | acquired_server_lock: | ||
| 364 | /* we've acquired a server lock, but it needs to be renewed after 5 | ||
| 365 | * mins */ | ||
| 366 | spin_lock(&vnode->lock); | ||
| 367 | afs_schedule_lock_extension(vnode); | ||
| 368 | if (type == AFS_LOCK_READ) | ||
| 369 | set_bit(AFS_VNODE_READLOCKED, &vnode->flags); | ||
| 370 | else | ||
| 371 | set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); | ||
| 372 | sharing_existing_lock: | ||
| 373 | /* the lock has been granted as far as we're concerned... */ | ||
| 374 | fl->fl_u.afs.state = AFS_LOCK_GRANTED; | ||
| 375 | list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); | ||
| 376 | given_lock: | ||
| 377 | /* ... but we do still need to get the VFS's blessing */ | ||
| 378 | ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING))); | ||
| 379 | ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) | | ||
| 380 | (1 << AFS_VNODE_WRITELOCKED))) != 0); | ||
| 381 | ret = posix_lock_file(file, fl, NULL); | ||
| 382 | if (ret < 0) | ||
| 383 | goto vfs_rejected_lock; | ||
| 384 | spin_unlock(&vnode->lock); | ||
| 385 | |||
| 386 | /* again, make sure we've got a callback on this file and, again, make | ||
| 387 | * sure that our view of the data version is up to date (we ignore | ||
| 388 | * errors incurred here and deal with the consequences elsewhere) */ | ||
| 389 | afs_vnode_fetch_status(vnode, NULL, key); | ||
| 390 | |||
| 391 | error: | ||
| 392 | unlock_kernel(); | ||
| 393 | _leave(" = %d", ret); | ||
| 394 | return ret; | ||
| 395 | |||
| 396 | vfs_rejected_lock: | ||
| 397 | /* the VFS rejected the lock we just obtained, so we have to discard | ||
| 398 | * what we just got */ | ||
| 399 | _debug("vfs refused %d", ret); | ||
| 400 | list_del_init(&fl->fl_u.afs.link); | ||
| 401 | if (list_empty(&vnode->granted_locks)) | ||
| 402 | afs_defer_unlock(vnode, key); | ||
| 403 | spin_unlock(&vnode->lock); | ||
| 404 | goto abort_attempt; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * unlock on a file on the server | ||
| 409 | */ | ||
| 410 | static int afs_do_unlk(struct file *file, struct file_lock *fl) | ||
| 411 | { | ||
| 412 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
| 413 | struct key *key = file->private_data; | ||
| 414 | int ret; | ||
| 415 | |||
| 416 | _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); | ||
| 417 | |||
| 418 | /* only whole-file unlocks are supported */ | ||
| 419 | if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) | ||
| 420 | return -EINVAL; | ||
| 421 | |||
| 422 | fl->fl_ops = &afs_lock_ops; | ||
| 423 | INIT_LIST_HEAD(&fl->fl_u.afs.link); | ||
| 424 | fl->fl_u.afs.state = AFS_LOCK_PENDING; | ||
| 425 | |||
| 426 | spin_lock(&vnode->lock); | ||
| 427 | ret = posix_lock_file(file, fl, NULL); | ||
| 428 | if (ret < 0) { | ||
| 429 | spin_unlock(&vnode->lock); | ||
| 430 | _leave(" = %d [vfs]", ret); | ||
| 431 | return ret; | ||
| 432 | } | ||
| 433 | |||
| 434 | /* discard the server lock only if all granted locks are gone */ | ||
| 435 | if (list_empty(&vnode->granted_locks)) | ||
| 436 | afs_defer_unlock(vnode, key); | ||
| 437 | spin_unlock(&vnode->lock); | ||
| 438 | _leave(" = 0"); | ||
| 439 | return 0; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * return information about a lock we currently hold, if indeed we hold one | ||
| 444 | */ | ||
| 445 | static int afs_do_getlk(struct file *file, struct file_lock *fl) | ||
| 446 | { | ||
| 447 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | ||
| 448 | struct key *key = file->private_data; | ||
| 449 | int ret, lock_count; | ||
| 450 | |||
| 451 | _enter(""); | ||
| 452 | |||
| 453 | fl->fl_type = F_UNLCK; | ||
| 454 | |||
| 455 | mutex_lock(&vnode->vfs_inode.i_mutex); | ||
| 456 | |||
| 457 | /* check local lock records first */ | ||
| 458 | ret = 0; | ||
| 459 | if (posix_test_lock(file, fl) == 0) { | ||
| 460 | /* no local locks; consult the server */ | ||
| 461 | ret = afs_vnode_fetch_status(vnode, NULL, key); | ||
| 462 | if (ret < 0) | ||
| 463 | goto error; | ||
| 464 | lock_count = vnode->status.lock_count; | ||
| 465 | if (lock_count) { | ||
| 466 | if (lock_count > 0) | ||
| 467 | fl->fl_type = F_RDLCK; | ||
| 468 | else | ||
| 469 | fl->fl_type = F_WRLCK; | ||
| 470 | fl->fl_start = 0; | ||
| 471 | fl->fl_end = OFFSET_MAX; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | |||
| 475 | error: | ||
| 476 | mutex_unlock(&vnode->vfs_inode.i_mutex); | ||
| 477 | _leave(" = %d [%hd]", ret, fl->fl_type); | ||
| 478 | return ret; | ||
| 479 | } | ||
| 480 | |||
| 481 | /* | ||
| 482 | * manage POSIX locks on a file | ||
| 483 | */ | ||
| 484 | int afs_lock(struct file *file, int cmd, struct file_lock *fl) | ||
| 485 | { | ||
| 486 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
| 487 | |||
| 488 | _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", | ||
| 489 | vnode->fid.vid, vnode->fid.vnode, cmd, | ||
| 490 | fl->fl_type, fl->fl_flags, | ||
| 491 | (long long) fl->fl_start, (long long) fl->fl_end); | ||
| 492 | |||
| 493 | /* AFS doesn't support mandatory locks */ | ||
| 494 | if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && | ||
| 495 | fl->fl_type != F_UNLCK) | ||
| 496 | return -ENOLCK; | ||
| 497 | |||
| 498 | if (IS_GETLK(cmd)) | ||
| 499 | return afs_do_getlk(file, fl); | ||
| 500 | if (fl->fl_type == F_UNLCK) | ||
| 501 | return afs_do_unlk(file, fl); | ||
| 502 | return afs_do_setlk(file, fl); | ||
| 503 | } | ||
| 504 | |||
| 505 | /* | ||
| 506 | * manage FLOCK locks on a file | ||
| 507 | */ | ||
| 508 | int afs_flock(struct file *file, int cmd, struct file_lock *fl) | ||
| 509 | { | ||
| 510 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
| 511 | |||
| 512 | _enter("{%x:%u},%d,{t=%x,fl=%x}", | ||
| 513 | vnode->fid.vid, vnode->fid.vnode, cmd, | ||
| 514 | fl->fl_type, fl->fl_flags); | ||
| 515 | |||
| 516 | /* | ||
| 517 | * No BSD flocks over NFS allowed. | ||
| 518 | * Note: we could try to fake a POSIX lock request here by | ||
| 519 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
| 520 | * Not sure whether that would be unique, though, or whether | ||
| 521 | * that would break in other places. | ||
| 522 | */ | ||
| 523 | if (!(fl->fl_flags & FL_FLOCK)) | ||
| 524 | return -ENOLCK; | ||
| 525 | |||
| 526 | /* we're simulating flock() locks using posix locks on the server */ | ||
| 527 | fl->fl_owner = (fl_owner_t) file; | ||
| 528 | fl->fl_start = 0; | ||
| 529 | fl->fl_end = OFFSET_MAX; | ||
| 530 | |||
| 531 | if (fl->fl_type == F_UNLCK) | ||
| 532 | return afs_do_unlk(file, fl); | ||
| 533 | return afs_do_setlk(file, fl); | ||
| 534 | } | ||
| 535 | |||
| 536 | /* | ||
| 537 | * the POSIX lock management core VFS code copies the lock record and adds the | ||
| 538 | * copy into its own list, so we need to add that copy to the vnode's lock | ||
| 539 | * queue in the same place as the original (which will be deleted shortly | ||
| 540 | * after) | ||
| 541 | */ | ||
| 542 | static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) | ||
| 543 | { | ||
| 544 | _enter(""); | ||
| 545 | |||
| 546 | list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link); | ||
| 547 | } | ||
| 548 | |||
| 549 | /* | ||
| 550 | * need to remove this lock from the vnode queue when it's removed from the | ||
| 551 | * VFS's list | ||
| 552 | */ | ||
| 553 | static void afs_fl_release_private(struct file_lock *fl) | ||
| 554 | { | ||
| 555 | _enter(""); | ||
| 556 | |||
| 557 | list_del_init(&fl->fl_u.afs.link); | ||
| 558 | } | ||
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 5dff1308b6f0..023b95b0d9d7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
| @@ -67,7 +67,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
| 67 | EXTRACT(status->group); | 67 | EXTRACT(status->group); |
| 68 | bp++; /* sync counter */ | 68 | bp++; /* sync counter */ |
| 69 | data_version |= (u64) ntohl(*bp++) << 32; | 69 | data_version |= (u64) ntohl(*bp++) << 32; |
| 70 | bp++; /* lock count */ | 70 | EXTRACT(status->lock_count); |
| 71 | size |= (u64) ntohl(*bp++) << 32; | 71 | size |= (u64) ntohl(*bp++) << 32; |
| 72 | bp++; /* spare 4 */ | 72 | bp++; /* spare 4 */ |
| 73 | *_bp = bp; | 73 | *_bp = bp; |
| @@ -1748,3 +1748,156 @@ int afs_fs_get_volume_status(struct afs_server *server, | |||
| 1748 | 1748 | ||
| 1749 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | 1749 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); |
| 1750 | } | 1750 | } |
| 1751 | |||
| 1752 | /* | ||
| 1753 | * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock | ||
| 1754 | */ | ||
| 1755 | static int afs_deliver_fs_xxxx_lock(struct afs_call *call, | ||
| 1756 | struct sk_buff *skb, bool last) | ||
| 1757 | { | ||
| 1758 | const __be32 *bp; | ||
| 1759 | |||
| 1760 | _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); | ||
| 1761 | |||
| 1762 | afs_transfer_reply(call, skb); | ||
| 1763 | if (!last) | ||
| 1764 | return 0; | ||
| 1765 | |||
| 1766 | if (call->reply_size != call->reply_max) | ||
| 1767 | return -EBADMSG; | ||
| 1768 | |||
| 1769 | /* unmarshall the reply once we've received all of it */ | ||
| 1770 | bp = call->buffer; | ||
| 1771 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | ||
| 1772 | |||
| 1773 | _leave(" = 0 [done]"); | ||
| 1774 | return 0; | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | /* | ||
| 1778 | * FS.SetLock operation type | ||
| 1779 | */ | ||
| 1780 | static const struct afs_call_type afs_RXFSSetLock = { | ||
| 1781 | .name = "FS.SetLock", | ||
| 1782 | .deliver = afs_deliver_fs_xxxx_lock, | ||
| 1783 | .abort_to_error = afs_abort_to_error, | ||
| 1784 | .destructor = afs_flat_call_destructor, | ||
| 1785 | }; | ||
| 1786 | |||
| 1787 | /* | ||
| 1788 | * FS.ExtendLock operation type | ||
| 1789 | */ | ||
| 1790 | static const struct afs_call_type afs_RXFSExtendLock = { | ||
| 1791 | .name = "FS.ExtendLock", | ||
| 1792 | .deliver = afs_deliver_fs_xxxx_lock, | ||
| 1793 | .abort_to_error = afs_abort_to_error, | ||
| 1794 | .destructor = afs_flat_call_destructor, | ||
| 1795 | }; | ||
| 1796 | |||
| 1797 | /* | ||
| 1798 | * FS.ReleaseLock operation type | ||
| 1799 | */ | ||
| 1800 | static const struct afs_call_type afs_RXFSReleaseLock = { | ||
| 1801 | .name = "FS.ReleaseLock", | ||
| 1802 | .deliver = afs_deliver_fs_xxxx_lock, | ||
| 1803 | .abort_to_error = afs_abort_to_error, | ||
| 1804 | .destructor = afs_flat_call_destructor, | ||
| 1805 | }; | ||
| 1806 | |||
| 1807 | /* | ||
| 1808 | * get a lock on a file | ||
| 1809 | */ | ||
| 1810 | int afs_fs_set_lock(struct afs_server *server, | ||
| 1811 | struct key *key, | ||
| 1812 | struct afs_vnode *vnode, | ||
| 1813 | afs_lock_type_t type, | ||
| 1814 | const struct afs_wait_mode *wait_mode) | ||
| 1815 | { | ||
| 1816 | struct afs_call *call; | ||
| 1817 | __be32 *bp; | ||
| 1818 | |||
| 1819 | _enter(""); | ||
| 1820 | |||
| 1821 | call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4); | ||
| 1822 | if (!call) | ||
| 1823 | return -ENOMEM; | ||
| 1824 | |||
| 1825 | call->key = key; | ||
| 1826 | call->reply = vnode; | ||
| 1827 | call->service_id = FS_SERVICE; | ||
| 1828 | call->port = htons(AFS_FS_PORT); | ||
| 1829 | |||
| 1830 | /* marshall the parameters */ | ||
| 1831 | bp = call->request; | ||
| 1832 | *bp++ = htonl(FSSETLOCK); | ||
| 1833 | *bp++ = htonl(vnode->fid.vid); | ||
| 1834 | *bp++ = htonl(vnode->fid.vnode); | ||
| 1835 | *bp++ = htonl(vnode->fid.unique); | ||
| 1836 | *bp++ = htonl(type); | ||
| 1837 | |||
| 1838 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * extend a lock on a file | ||
| 1843 | */ | ||
| 1844 | int afs_fs_extend_lock(struct afs_server *server, | ||
| 1845 | struct key *key, | ||
| 1846 | struct afs_vnode *vnode, | ||
| 1847 | const struct afs_wait_mode *wait_mode) | ||
| 1848 | { | ||
| 1849 | struct afs_call *call; | ||
| 1850 | __be32 *bp; | ||
| 1851 | |||
| 1852 | _enter(""); | ||
| 1853 | |||
| 1854 | call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4); | ||
| 1855 | if (!call) | ||
| 1856 | return -ENOMEM; | ||
| 1857 | |||
| 1858 | call->key = key; | ||
| 1859 | call->reply = vnode; | ||
| 1860 | call->service_id = FS_SERVICE; | ||
| 1861 | call->port = htons(AFS_FS_PORT); | ||
| 1862 | |||
| 1863 | /* marshall the parameters */ | ||
| 1864 | bp = call->request; | ||
| 1865 | *bp++ = htonl(FSEXTENDLOCK); | ||
| 1866 | *bp++ = htonl(vnode->fid.vid); | ||
| 1867 | *bp++ = htonl(vnode->fid.vnode); | ||
| 1868 | *bp++ = htonl(vnode->fid.unique); | ||
| 1869 | |||
| 1870 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | /* | ||
| 1874 | * release a lock on a file | ||
| 1875 | */ | ||
| 1876 | int afs_fs_release_lock(struct afs_server *server, | ||
| 1877 | struct key *key, | ||
| 1878 | struct afs_vnode *vnode, | ||
| 1879 | const struct afs_wait_mode *wait_mode) | ||
| 1880 | { | ||
| 1881 | struct afs_call *call; | ||
| 1882 | __be32 *bp; | ||
| 1883 | |||
| 1884 | _enter(""); | ||
| 1885 | |||
| 1886 | call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4); | ||
| 1887 | if (!call) | ||
| 1888 | return -ENOMEM; | ||
| 1889 | |||
| 1890 | call->key = key; | ||
| 1891 | call->reply = vnode; | ||
| 1892 | call->service_id = FS_SERVICE; | ||
| 1893 | call->port = htons(AFS_FS_PORT); | ||
| 1894 | |||
| 1895 | /* marshall the parameters */ | ||
| 1896 | bp = call->request; | ||
| 1897 | *bp++ = htonl(FSRELEASELOCK); | ||
| 1898 | *bp++ = htonl(vnode->fid.vid); | ||
| 1899 | *bp++ = htonl(vnode->fid.vnode); | ||
| 1900 | *bp++ = htonl(vnode->fid.unique); | ||
| 1901 | |||
| 1902 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
| 1903 | } | ||
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2c55dd94a1de..6306438f331f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -351,10 +351,18 @@ struct afs_vnode { | |||
| 351 | #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ | 351 | #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ |
| 352 | #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ | 352 | #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ |
| 353 | #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ | 353 | #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ |
| 354 | #define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */ | ||
| 355 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ | ||
| 356 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ | ||
| 357 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ | ||
| 354 | 358 | ||
| 355 | long acl_order; /* ACL check count (callback break count) */ | 359 | long acl_order; /* ACL check count (callback break count) */ |
| 356 | 360 | ||
| 357 | struct list_head writebacks; /* alterations in pagecache that need writing */ | 361 | struct list_head writebacks; /* alterations in pagecache that need writing */ |
| 362 | struct list_head pending_locks; /* locks waiting to be granted */ | ||
| 363 | struct list_head granted_locks; /* locks granted on this file */ | ||
| 364 | struct delayed_work lock_work; /* work to be done in locking */ | ||
| 365 | struct key *unlock_key; /* key to be used in unlocking */ | ||
| 358 | 366 | ||
| 359 | /* outstanding callback notification on this file */ | 367 | /* outstanding callback notification on this file */ |
| 360 | struct rb_node server_rb; /* link in server->fs_vnodes */ | 368 | struct rb_node server_rb; /* link in server->fs_vnodes */ |
| @@ -474,6 +482,15 @@ extern int afs_open(struct inode *, struct file *); | |||
| 474 | extern int afs_release(struct inode *, struct file *); | 482 | extern int afs_release(struct inode *, struct file *); |
| 475 | 483 | ||
| 476 | /* | 484 | /* |
| 485 | * flock.c | ||
| 486 | */ | ||
| 487 | extern void __exit afs_kill_lock_manager(void); | ||
| 488 | extern void afs_lock_work(struct work_struct *); | ||
| 489 | extern void afs_lock_may_be_available(struct afs_vnode *); | ||
| 490 | extern int afs_lock(struct file *, int, struct file_lock *); | ||
| 491 | extern int afs_flock(struct file *, int, struct file_lock *); | ||
| 492 | |||
| 493 | /* | ||
| 477 | * fsclient.c | 494 | * fsclient.c |
| 478 | */ | 495 | */ |
| 479 | extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, | 496 | extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, |
| @@ -513,6 +530,15 @@ extern int afs_fs_get_volume_status(struct afs_server *, struct key *, | |||
| 513 | struct afs_vnode *, | 530 | struct afs_vnode *, |
| 514 | struct afs_volume_status *, | 531 | struct afs_volume_status *, |
| 515 | const struct afs_wait_mode *); | 532 | const struct afs_wait_mode *); |
| 533 | extern int afs_fs_set_lock(struct afs_server *, struct key *, | ||
| 534 | struct afs_vnode *, afs_lock_type_t, | ||
| 535 | const struct afs_wait_mode *); | ||
| 536 | extern int afs_fs_extend_lock(struct afs_server *, struct key *, | ||
| 537 | struct afs_vnode *, | ||
| 538 | const struct afs_wait_mode *); | ||
| 539 | extern int afs_fs_release_lock(struct afs_server *, struct key *, | ||
| 540 | struct afs_vnode *, | ||
| 541 | const struct afs_wait_mode *); | ||
| 516 | 542 | ||
| 517 | /* | 543 | /* |
| 518 | * inode.c | 544 | * inode.c |
| @@ -681,6 +707,10 @@ extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, | |||
| 681 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); | 707 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); |
| 682 | extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, | 708 | extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, |
| 683 | struct afs_volume_status *); | 709 | struct afs_volume_status *); |
| 710 | extern int afs_vnode_set_lock(struct afs_vnode *, struct key *, | ||
| 711 | afs_lock_type_t); | ||
| 712 | extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *); | ||
| 713 | extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); | ||
| 684 | 714 | ||
| 685 | /* | 715 | /* |
| 686 | * volume.c | 716 | * volume.c |
diff --git a/fs/afs/main.c b/fs/afs/main.c index cd21195bbb24..0f60f6b35769 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
| @@ -168,6 +168,7 @@ static void __exit afs_exit(void) | |||
| 168 | printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); | 168 | printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); |
| 169 | 169 | ||
| 170 | afs_fs_exit(); | 170 | afs_fs_exit(); |
| 171 | afs_kill_lock_manager(); | ||
| 171 | afs_close_socket(); | 172 | afs_close_socket(); |
| 172 | afs_purge_servers(); | 173 | afs_purge_servers(); |
| 173 | afs_callback_update_kill(); | 174 | afs_callback_update_kill(); |
diff --git a/fs/afs/misc.c b/fs/afs/misc.c index d1a889c40742..2d33a5f7d218 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c | |||
| @@ -35,6 +35,7 @@ int afs_abort_to_error(u32 abort_code) | |||
| 35 | case VOVERQUOTA: return -EDQUOT; | 35 | case VOVERQUOTA: return -EDQUOT; |
| 36 | case VBUSY: return -EBUSY; | 36 | case VBUSY: return -EBUSY; |
| 37 | case VMOVED: return -ENXIO; | 37 | case VMOVED: return -ENXIO; |
| 38 | case 0x2f6df0a: return -EWOULDBLOCK; | ||
| 38 | case 0x2f6df0c: return -EACCES; | 39 | case 0x2f6df0c: return -EACCES; |
| 39 | case 0x2f6df0f: return -EBUSY; | 40 | case 0x2f6df0f: return -EBUSY; |
| 40 | case 0x2f6df10: return -EEXIST; | 41 | case 0x2f6df10: return -EEXIST; |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 13df512aea9e..6edb56683b9a 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
| @@ -201,23 +201,9 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) | |||
| 201 | */ | 201 | */ |
| 202 | static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) | 202 | static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) |
| 203 | { | 203 | { |
| 204 | struct list_head *_p; | ||
| 205 | loff_t pos = *_pos; | ||
| 206 | |||
| 207 | /* lock the list against modification */ | 204 | /* lock the list against modification */ |
| 208 | down_read(&afs_proc_cells_sem); | 205 | down_read(&afs_proc_cells_sem); |
| 209 | 206 | return seq_list_start_head(&afs_proc_cells, *_pos); | |
| 210 | /* allow for the header line */ | ||
| 211 | if (!pos) | ||
| 212 | return (void *) 1; | ||
| 213 | pos--; | ||
| 214 | |||
| 215 | /* find the n'th element in the list */ | ||
| 216 | list_for_each(_p, &afs_proc_cells) | ||
| 217 | if (!pos--) | ||
| 218 | break; | ||
| 219 | |||
| 220 | return _p != &afs_proc_cells ? _p : NULL; | ||
| 221 | } | 207 | } |
| 222 | 208 | ||
| 223 | /* | 209 | /* |
| @@ -225,14 +211,7 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) | |||
| 225 | */ | 211 | */ |
| 226 | static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) | 212 | static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) |
| 227 | { | 213 | { |
| 228 | struct list_head *_p; | 214 | return seq_list_next(v, &afs_proc_cells, pos); |
| 229 | |||
| 230 | (*pos)++; | ||
| 231 | |||
| 232 | _p = v; | ||
| 233 | _p = v == (void *) 1 ? afs_proc_cells.next : _p->next; | ||
| 234 | |||
| 235 | return _p != &afs_proc_cells ? _p : NULL; | ||
| 236 | } | 215 | } |
| 237 | 216 | ||
| 238 | /* | 217 | /* |
| @@ -250,7 +229,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) | |||
| 250 | { | 229 | { |
| 251 | struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); | 230 | struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); |
| 252 | 231 | ||
| 253 | if (v == (void *) 1) { | 232 | if (v == &afs_proc_cells) { |
| 254 | /* display header on line 1 */ | 233 | /* display header on line 1 */ |
| 255 | seq_puts(m, "USE NAME\n"); | 234 | seq_puts(m, "USE NAME\n"); |
| 256 | return 0; | 235 | return 0; |
| @@ -503,26 +482,13 @@ static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file) | |||
| 503 | */ | 482 | */ |
| 504 | static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) | 483 | static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) |
| 505 | { | 484 | { |
| 506 | struct list_head *_p; | ||
| 507 | struct afs_cell *cell = m->private; | 485 | struct afs_cell *cell = m->private; |
| 508 | loff_t pos = *_pos; | ||
| 509 | 486 | ||
| 510 | _enter("cell=%p pos=%Ld", cell, *_pos); | 487 | _enter("cell=%p pos=%Ld", cell, *_pos); |
| 511 | 488 | ||
| 512 | /* lock the list against modification */ | 489 | /* lock the list against modification */ |
| 513 | down_read(&cell->vl_sem); | 490 | down_read(&cell->vl_sem); |
| 514 | 491 | return seq_list_start_head(&cell->vl_list, *_pos); | |
| 515 | /* allow for the header line */ | ||
| 516 | if (!pos) | ||
| 517 | return (void *) 1; | ||
| 518 | pos--; | ||
| 519 | |||
| 520 | /* find the n'th element in the list */ | ||
| 521 | list_for_each(_p, &cell->vl_list) | ||
| 522 | if (!pos--) | ||
| 523 | break; | ||
| 524 | |||
| 525 | return _p != &cell->vl_list ? _p : NULL; | ||
| 526 | } | 492 | } |
| 527 | 493 | ||
| 528 | /* | 494 | /* |
| @@ -531,17 +497,10 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) | |||
| 531 | static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, | 497 | static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, |
| 532 | loff_t *_pos) | 498 | loff_t *_pos) |
| 533 | { | 499 | { |
| 534 | struct list_head *_p; | ||
| 535 | struct afs_cell *cell = p->private; | 500 | struct afs_cell *cell = p->private; |
| 536 | 501 | ||
| 537 | _enter("cell=%p pos=%Ld", cell, *_pos); | 502 | _enter("cell=%p pos=%Ld", cell, *_pos); |
| 538 | 503 | return seq_list_next(v, &cell->vl_list, _pos); | |
| 539 | (*_pos)++; | ||
| 540 | |||
| 541 | _p = v; | ||
| 542 | _p = (v == (void *) 1) ? cell->vl_list.next : _p->next; | ||
| 543 | |||
| 544 | return (_p != &cell->vl_list) ? _p : NULL; | ||
| 545 | } | 504 | } |
| 546 | 505 | ||
| 547 | /* | 506 | /* |
| @@ -569,11 +528,12 @@ const char afs_vlocation_states[][4] = { | |||
| 569 | */ | 528 | */ |
| 570 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) | 529 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) |
| 571 | { | 530 | { |
| 531 | struct afs_cell *cell = m->private; | ||
| 572 | struct afs_vlocation *vlocation = | 532 | struct afs_vlocation *vlocation = |
| 573 | list_entry(v, struct afs_vlocation, link); | 533 | list_entry(v, struct afs_vlocation, link); |
| 574 | 534 | ||
| 575 | /* display header on line 1 */ | 535 | /* display header on line 1 */ |
| 576 | if (v == (void *) 1) { | 536 | if (v == &cell->vl_list) { |
| 577 | seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); | 537 | seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); |
| 578 | return 0; | 538 | return 0; |
| 579 | } | 539 | } |
| @@ -734,26 +694,13 @@ static int afs_proc_cell_servers_release(struct inode *inode, | |||
| 734 | static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) | 694 | static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) |
| 735 | __acquires(m->private->servers_lock) | 695 | __acquires(m->private->servers_lock) |
| 736 | { | 696 | { |
| 737 | struct list_head *_p; | ||
| 738 | struct afs_cell *cell = m->private; | 697 | struct afs_cell *cell = m->private; |
| 739 | loff_t pos = *_pos; | ||
| 740 | 698 | ||
| 741 | _enter("cell=%p pos=%Ld", cell, *_pos); | 699 | _enter("cell=%p pos=%Ld", cell, *_pos); |
| 742 | 700 | ||
| 743 | /* lock the list against modification */ | 701 | /* lock the list against modification */ |
| 744 | read_lock(&cell->servers_lock); | 702 | read_lock(&cell->servers_lock); |
| 745 | 703 | return seq_list_start_head(&cell->servers, *_pos); | |
| 746 | /* allow for the header line */ | ||
| 747 | if (!pos) | ||
| 748 | return (void *) 1; | ||
| 749 | pos--; | ||
| 750 | |||
| 751 | /* find the n'th element in the list */ | ||
| 752 | list_for_each(_p, &cell->servers) | ||
| 753 | if (!pos--) | ||
| 754 | break; | ||
| 755 | |||
| 756 | return _p != &cell->servers ? _p : NULL; | ||
| 757 | } | 704 | } |
| 758 | 705 | ||
| 759 | /* | 706 | /* |
| @@ -762,17 +709,10 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) | |||
| 762 | static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, | 709 | static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, |
| 763 | loff_t *_pos) | 710 | loff_t *_pos) |
| 764 | { | 711 | { |
| 765 | struct list_head *_p; | ||
| 766 | struct afs_cell *cell = p->private; | 712 | struct afs_cell *cell = p->private; |
| 767 | 713 | ||
| 768 | _enter("cell=%p pos=%Ld", cell, *_pos); | 714 | _enter("cell=%p pos=%Ld", cell, *_pos); |
| 769 | 715 | return seq_list_next(v, &cell->servers, _pos); | |
| 770 | (*_pos)++; | ||
| 771 | |||
| 772 | _p = v; | ||
| 773 | _p = v == (void *) 1 ? cell->servers.next : _p->next; | ||
| 774 | |||
| 775 | return _p != &cell->servers ? _p : NULL; | ||
| 776 | } | 716 | } |
| 777 | 717 | ||
| 778 | /* | 718 | /* |
| @@ -791,11 +731,12 @@ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) | |||
| 791 | */ | 731 | */ |
| 792 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v) | 732 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v) |
| 793 | { | 733 | { |
| 734 | struct afs_cell *cell = m->private; | ||
| 794 | struct afs_server *server = list_entry(v, struct afs_server, link); | 735 | struct afs_server *server = list_entry(v, struct afs_server, link); |
| 795 | char ipaddr[20]; | 736 | char ipaddr[20]; |
| 796 | 737 | ||
| 797 | /* display header on line 1 */ | 738 | /* display header on line 1 */ |
| 798 | if (v == (void *) 1) { | 739 | if (v == &cell->servers) { |
| 799 | seq_puts(m, "USE ADDR STATE\n"); | 740 | seq_puts(m, "USE ADDR STATE\n"); |
| 800 | return 0; | 741 | return 0; |
| 801 | } | 742 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 2e8496ba1205..993cdf1cce3a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
| @@ -460,6 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, | |||
| 460 | spin_lock_init(&vnode->writeback_lock); | 460 | spin_lock_init(&vnode->writeback_lock); |
| 461 | spin_lock_init(&vnode->lock); | 461 | spin_lock_init(&vnode->lock); |
| 462 | INIT_LIST_HEAD(&vnode->writebacks); | 462 | INIT_LIST_HEAD(&vnode->writebacks); |
| 463 | INIT_LIST_HEAD(&vnode->pending_locks); | ||
| 464 | INIT_LIST_HEAD(&vnode->granted_locks); | ||
| 465 | INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); | ||
| 463 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); | 466 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); |
| 464 | } | 467 | } |
| 465 | 468 | ||
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 232c55dc245d..2f05c4fc2a70 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c | |||
| @@ -561,7 +561,7 @@ no_server: | |||
| 561 | /* | 561 | /* |
| 562 | * create a hard link | 562 | * create a hard link |
| 563 | */ | 563 | */ |
| 564 | extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, | 564 | int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, |
| 565 | struct key *key, const char *name) | 565 | struct key *key, const char *name) |
| 566 | { | 566 | { |
| 567 | struct afs_server *server; | 567 | struct afs_server *server; |
| @@ -887,11 +887,6 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, | |||
| 887 | vnode->fid.unique, | 887 | vnode->fid.unique, |
| 888 | key_serial(key)); | 888 | key_serial(key)); |
| 889 | 889 | ||
| 890 | /* this op will fetch the status */ | ||
| 891 | spin_lock(&vnode->lock); | ||
| 892 | vnode->update_cnt++; | ||
| 893 | spin_unlock(&vnode->lock); | ||
| 894 | |||
| 895 | do { | 890 | do { |
| 896 | /* pick a server to query */ | 891 | /* pick a server to query */ |
| 897 | server = afs_volume_pick_fileserver(vnode); | 892 | server = afs_volume_pick_fileserver(vnode); |
| @@ -905,20 +900,127 @@ int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, | |||
| 905 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | 900 | } while (!afs_volume_release_fileserver(vnode, server, ret)); |
| 906 | 901 | ||
| 907 | /* adjust the flags */ | 902 | /* adjust the flags */ |
| 908 | if (ret == 0) { | 903 | if (ret == 0) |
| 909 | afs_vnode_finalise_status_update(vnode, server); | 904 | afs_put_server(server); |
| 905 | |||
| 906 | _leave(" = %d", ret); | ||
| 907 | return ret; | ||
| 908 | |||
| 909 | no_server: | ||
| 910 | return PTR_ERR(server); | ||
| 911 | } | ||
| 912 | |||
| 913 | /* | ||
| 914 | * get a lock on a file | ||
| 915 | */ | ||
| 916 | int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, | ||
| 917 | afs_lock_type_t type) | ||
| 918 | { | ||
| 919 | struct afs_server *server; | ||
| 920 | int ret; | ||
| 921 | |||
| 922 | _enter("%s{%x:%u.%u},%x,%u", | ||
| 923 | vnode->volume->vlocation->vldb.name, | ||
| 924 | vnode->fid.vid, | ||
| 925 | vnode->fid.vnode, | ||
| 926 | vnode->fid.unique, | ||
| 927 | key_serial(key), type); | ||
| 928 | |||
| 929 | do { | ||
| 930 | /* pick a server to query */ | ||
| 931 | server = afs_volume_pick_fileserver(vnode); | ||
| 932 | if (IS_ERR(server)) | ||
| 933 | goto no_server; | ||
| 934 | |||
| 935 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
| 936 | |||
| 937 | ret = afs_fs_set_lock(server, key, vnode, type, &afs_sync_call); | ||
| 938 | |||
| 939 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
| 940 | |||
| 941 | /* adjust the flags */ | ||
| 942 | if (ret == 0) | ||
| 943 | afs_put_server(server); | ||
| 944 | |||
| 945 | _leave(" = %d", ret); | ||
| 946 | return ret; | ||
| 947 | |||
| 948 | no_server: | ||
| 949 | return PTR_ERR(server); | ||
| 950 | } | ||
| 951 | |||
| 952 | /* | ||
| 953 | * extend a lock on a file | ||
| 954 | */ | ||
| 955 | int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) | ||
| 956 | { | ||
| 957 | struct afs_server *server; | ||
| 958 | int ret; | ||
| 959 | |||
| 960 | _enter("%s{%x:%u.%u},%x", | ||
| 961 | vnode->volume->vlocation->vldb.name, | ||
| 962 | vnode->fid.vid, | ||
| 963 | vnode->fid.vnode, | ||
| 964 | vnode->fid.unique, | ||
| 965 | key_serial(key)); | ||
| 966 | |||
| 967 | do { | ||
| 968 | /* pick a server to query */ | ||
| 969 | server = afs_volume_pick_fileserver(vnode); | ||
| 970 | if (IS_ERR(server)) | ||
| 971 | goto no_server; | ||
| 972 | |||
| 973 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
| 974 | |||
| 975 | ret = afs_fs_extend_lock(server, key, vnode, &afs_sync_call); | ||
| 976 | |||
| 977 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
| 978 | |||
| 979 | /* adjust the flags */ | ||
| 980 | if (ret == 0) | ||
| 981 | afs_put_server(server); | ||
| 982 | |||
| 983 | _leave(" = %d", ret); | ||
| 984 | return ret; | ||
| 985 | |||
| 986 | no_server: | ||
| 987 | return PTR_ERR(server); | ||
| 988 | } | ||
| 989 | |||
| 990 | /* | ||
| 991 | * release a lock on a file | ||
| 992 | */ | ||
| 993 | int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) | ||
| 994 | { | ||
| 995 | struct afs_server *server; | ||
| 996 | int ret; | ||
| 997 | |||
| 998 | _enter("%s{%x:%u.%u},%x", | ||
| 999 | vnode->volume->vlocation->vldb.name, | ||
| 1000 | vnode->fid.vid, | ||
| 1001 | vnode->fid.vnode, | ||
| 1002 | vnode->fid.unique, | ||
| 1003 | key_serial(key)); | ||
| 1004 | |||
| 1005 | do { | ||
| 1006 | /* pick a server to query */ | ||
| 1007 | server = afs_volume_pick_fileserver(vnode); | ||
| 1008 | if (IS_ERR(server)) | ||
| 1009 | goto no_server; | ||
| 1010 | |||
| 1011 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
| 1012 | |||
| 1013 | ret = afs_fs_release_lock(server, key, vnode, &afs_sync_call); | ||
| 1014 | |||
| 1015 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
| 1016 | |||
| 1017 | /* adjust the flags */ | ||
| 1018 | if (ret == 0) | ||
| 910 | afs_put_server(server); | 1019 | afs_put_server(server); |
| 911 | } else { | ||
| 912 | afs_vnode_status_update_failed(vnode, ret); | ||
| 913 | } | ||
| 914 | 1020 | ||
| 915 | _leave(" = %d", ret); | 1021 | _leave(" = %d", ret); |
| 916 | return ret; | 1022 | return ret; |
| 917 | 1023 | ||
| 918 | no_server: | 1024 | no_server: |
| 919 | spin_lock(&vnode->lock); | ||
| 920 | vnode->update_cnt--; | ||
| 921 | ASSERTCMP(vnode->update_cnt, >=, 0); | ||
| 922 | spin_unlock(&vnode->lock); | ||
| 923 | return PTR_ERR(server); | 1025 | return PTR_ERR(server); |
| 924 | } | 1026 | } |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index edc67486238f..b4a75880f6fd 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
| @@ -53,7 +53,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = { | |||
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | /** | 55 | /** |
| 56 | * anon_inode_getfd - creates a new file instance by hooking it up to and | 56 | * anon_inode_getfd - creates a new file instance by hooking it up to an |
| 57 | * anonymous inode, and a dentry that describe the "class" | 57 | * anonymous inode, and a dentry that describe the "class" |
| 58 | * of the file | 58 | * of the file |
| 59 | * | 59 | * |
| @@ -66,7 +66,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = { | |||
| 66 | * | 66 | * |
| 67 | * Creates a new file by hooking it on a single inode. This is useful for files | 67 | * Creates a new file by hooking it on a single inode. This is useful for files |
| 68 | * that do not need to have a full-fledged inode in order to operate correctly. | 68 | * that do not need to have a full-fledged inode in order to operate correctly. |
| 69 | * All the files created with anon_inode_getfd() will share a single inode, by | 69 | * All the files created with anon_inode_getfd() will share a single inode, |
| 70 | * hence saving memory and avoiding code duplication for the file/inode/dentry | 70 | * hence saving memory and avoiding code duplication for the file/inode/dentry |
| 71 | * setup. | 71 | * setup. |
| 72 | */ | 72 | */ |
| @@ -142,9 +142,9 @@ err_put_filp: | |||
| 142 | EXPORT_SYMBOL_GPL(anon_inode_getfd); | 142 | EXPORT_SYMBOL_GPL(anon_inode_getfd); |
| 143 | 143 | ||
| 144 | /* | 144 | /* |
| 145 | * A single inode exist for all anon_inode files. Contrary to pipes, | 145 | * A single inode exists for all anon_inode files. Contrary to pipes, |
| 146 | * anon_inode inodes has no per-instance data associated, so we can avoid | 146 | * anon_inode inodes have no associated per-instance data, so we need |
| 147 | * the allocation of multiple of them. | 147 | * only allocate one of them. |
| 148 | */ | 148 | */ |
| 149 | static struct inode *anon_inode_mkinode(void) | 149 | static struct inode *anon_inode_mkinode(void) |
| 150 | { | 150 | { |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 08e4414b8374..a27e42bf3400 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -45,7 +45,7 @@ | |||
| 45 | 45 | ||
| 46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); |
| 47 | static int load_elf_library(struct file *); | 47 | static int load_elf_library(struct file *); |
| 48 | static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); | 48 | static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); |
| 49 | 49 | ||
| 50 | /* | 50 | /* |
| 51 | * If we don't support core dumping, then supply a NULL so we | 51 | * If we don't support core dumping, then supply a NULL so we |
| @@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = { | |||
| 80 | .hasvdso = 1 | 80 | .hasvdso = 1 |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) | 83 | #define BAD_ADDR(x) IS_ERR_VALUE(x) |
| 84 | 84 | ||
| 85 | static int set_brk(unsigned long start, unsigned long end) | 85 | static int set_brk(unsigned long start, unsigned long end) |
| 86 | { | 86 | { |
| @@ -285,33 +285,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, | |||
| 285 | #ifndef elf_map | 285 | #ifndef elf_map |
| 286 | 286 | ||
| 287 | static unsigned long elf_map(struct file *filep, unsigned long addr, | 287 | static unsigned long elf_map(struct file *filep, unsigned long addr, |
| 288 | struct elf_phdr *eppnt, int prot, int type) | 288 | struct elf_phdr *eppnt, int prot, int type, |
| 289 | unsigned long total_size) | ||
| 289 | { | 290 | { |
| 290 | unsigned long map_addr; | 291 | unsigned long map_addr; |
| 291 | unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); | 292 | unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); |
| 293 | unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); | ||
| 294 | addr = ELF_PAGESTART(addr); | ||
| 295 | size = ELF_PAGEALIGN(size); | ||
| 292 | 296 | ||
| 293 | down_write(¤t->mm->mmap_sem); | ||
| 294 | /* mmap() will return -EINVAL if given a zero size, but a | 297 | /* mmap() will return -EINVAL if given a zero size, but a |
| 295 | * segment with zero filesize is perfectly valid */ | 298 | * segment with zero filesize is perfectly valid */ |
| 296 | if (eppnt->p_filesz + pageoffset) | 299 | if (!size) |
| 297 | map_addr = do_mmap(filep, ELF_PAGESTART(addr), | 300 | return addr; |
| 298 | eppnt->p_filesz + pageoffset, prot, type, | 301 | |
| 299 | eppnt->p_offset - pageoffset); | 302 | down_write(¤t->mm->mmap_sem); |
| 300 | else | 303 | /* |
| 301 | map_addr = ELF_PAGESTART(addr); | 304 | * total_size is the size of the ELF (interpreter) image. |
| 305 | * The _first_ mmap needs to know the full size, otherwise | ||
| 306 | * randomization might put this image into an overlapping | ||
| 307 | * position with the ELF binary image. (since size < total_size) | ||
| 308 | * So we first map the 'big' image - and unmap the remainder at | ||
| 309 | * the end. (which unmap is needed for ELF images with holes.) | ||
| 310 | */ | ||
| 311 | if (total_size) { | ||
| 312 | total_size = ELF_PAGEALIGN(total_size); | ||
| 313 | map_addr = do_mmap(filep, addr, total_size, prot, type, off); | ||
| 314 | if (!BAD_ADDR(map_addr)) | ||
| 315 | do_munmap(current->mm, map_addr+size, total_size-size); | ||
| 316 | } else | ||
| 317 | map_addr = do_mmap(filep, addr, size, prot, type, off); | ||
| 318 | |||
| 302 | up_write(¤t->mm->mmap_sem); | 319 | up_write(¤t->mm->mmap_sem); |
| 303 | return(map_addr); | 320 | return(map_addr); |
| 304 | } | 321 | } |
| 305 | 322 | ||
| 306 | #endif /* !elf_map */ | 323 | #endif /* !elf_map */ |
| 307 | 324 | ||
| 325 | static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) | ||
| 326 | { | ||
| 327 | int i, first_idx = -1, last_idx = -1; | ||
| 328 | |||
| 329 | for (i = 0; i < nr; i++) { | ||
| 330 | if (cmds[i].p_type == PT_LOAD) { | ||
| 331 | last_idx = i; | ||
| 332 | if (first_idx == -1) | ||
| 333 | first_idx = i; | ||
| 334 | } | ||
| 335 | } | ||
| 336 | if (first_idx == -1) | ||
| 337 | return 0; | ||
| 338 | |||
| 339 | return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - | ||
| 340 | ELF_PAGESTART(cmds[first_idx].p_vaddr); | ||
| 341 | } | ||
| 342 | |||
| 343 | |||
| 308 | /* This is much more generalized than the library routine read function, | 344 | /* This is much more generalized than the library routine read function, |
| 309 | so we keep this separate. Technically the library read function | 345 | so we keep this separate. Technically the library read function |
| 310 | is only provided so that we can read a.out libraries that have | 346 | is only provided so that we can read a.out libraries that have |
| 311 | an ELF header */ | 347 | an ELF header */ |
| 312 | 348 | ||
| 313 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | 349 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, |
| 314 | struct file *interpreter, unsigned long *interp_load_addr) | 350 | struct file *interpreter, unsigned long *interp_map_addr, |
| 351 | unsigned long no_base) | ||
| 315 | { | 352 | { |
| 316 | struct elf_phdr *elf_phdata; | 353 | struct elf_phdr *elf_phdata; |
| 317 | struct elf_phdr *eppnt; | 354 | struct elf_phdr *eppnt; |
| @@ -319,6 +356,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
| 319 | int load_addr_set = 0; | 356 | int load_addr_set = 0; |
| 320 | unsigned long last_bss = 0, elf_bss = 0; | 357 | unsigned long last_bss = 0, elf_bss = 0; |
| 321 | unsigned long error = ~0UL; | 358 | unsigned long error = ~0UL; |
| 359 | unsigned long total_size; | ||
| 322 | int retval, i, size; | 360 | int retval, i, size; |
| 323 | 361 | ||
| 324 | /* First of all, some simple consistency checks */ | 362 | /* First of all, some simple consistency checks */ |
| @@ -357,6 +395,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
| 357 | goto out_close; | 395 | goto out_close; |
| 358 | } | 396 | } |
| 359 | 397 | ||
| 398 | total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); | ||
| 399 | if (!total_size) { | ||
| 400 | error = -EINVAL; | ||
| 401 | goto out_close; | ||
| 402 | } | ||
| 403 | |||
| 360 | eppnt = elf_phdata; | 404 | eppnt = elf_phdata; |
| 361 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | 405 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { |
| 362 | if (eppnt->p_type == PT_LOAD) { | 406 | if (eppnt->p_type == PT_LOAD) { |
| @@ -374,9 +418,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
| 374 | vaddr = eppnt->p_vaddr; | 418 | vaddr = eppnt->p_vaddr; |
| 375 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) | 419 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) |
| 376 | elf_type |= MAP_FIXED; | 420 | elf_type |= MAP_FIXED; |
| 421 | else if (no_base && interp_elf_ex->e_type == ET_DYN) | ||
| 422 | load_addr = -vaddr; | ||
| 377 | 423 | ||
| 378 | map_addr = elf_map(interpreter, load_addr + vaddr, | 424 | map_addr = elf_map(interpreter, load_addr + vaddr, |
| 379 | eppnt, elf_prot, elf_type); | 425 | eppnt, elf_prot, elf_type, total_size); |
| 426 | total_size = 0; | ||
| 427 | if (!*interp_map_addr) | ||
| 428 | *interp_map_addr = map_addr; | ||
| 380 | error = map_addr; | 429 | error = map_addr; |
| 381 | if (BAD_ADDR(map_addr)) | 430 | if (BAD_ADDR(map_addr)) |
| 382 | goto out_close; | 431 | goto out_close; |
| @@ -442,8 +491,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
| 442 | goto out_close; | 491 | goto out_close; |
| 443 | } | 492 | } |
| 444 | 493 | ||
| 445 | *interp_load_addr = load_addr; | 494 | error = load_addr; |
| 446 | error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; | ||
| 447 | 495 | ||
| 448 | out_close: | 496 | out_close: |
| 449 | kfree(elf_phdata); | 497 | kfree(elf_phdata); |
| @@ -540,7 +588,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 540 | int elf_exec_fileno; | 588 | int elf_exec_fileno; |
| 541 | int retval, i; | 589 | int retval, i; |
| 542 | unsigned int size; | 590 | unsigned int size; |
| 543 | unsigned long elf_entry, interp_load_addr = 0; | 591 | unsigned long elf_entry; |
| 592 | unsigned long interp_load_addr = 0; | ||
| 544 | unsigned long start_code, end_code, start_data, end_data; | 593 | unsigned long start_code, end_code, start_data, end_data; |
| 545 | unsigned long reloc_func_desc = 0; | 594 | unsigned long reloc_func_desc = 0; |
| 546 | char passed_fileno[6]; | 595 | char passed_fileno[6]; |
| @@ -808,9 +857,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 808 | current->mm->start_stack = bprm->p; | 857 | current->mm->start_stack = bprm->p; |
| 809 | 858 | ||
| 810 | /* Now we do a little grungy work by mmaping the ELF image into | 859 | /* Now we do a little grungy work by mmaping the ELF image into |
| 811 | the correct location in memory. At this point, we assume that | 860 | the correct location in memory. */ |
| 812 | the image should be loaded at fixed address, not at a variable | ||
| 813 | address. */ | ||
| 814 | for(i = 0, elf_ppnt = elf_phdata; | 861 | for(i = 0, elf_ppnt = elf_phdata; |
| 815 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 862 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
| 816 | int elf_prot = 0, elf_flags; | 863 | int elf_prot = 0, elf_flags; |
| @@ -864,11 +911,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 864 | * default mmap base, as well as whatever program they | 911 | * default mmap base, as well as whatever program they |
| 865 | * might try to exec. This is because the brk will | 912 | * might try to exec. This is because the brk will |
| 866 | * follow the loader, and is not movable. */ | 913 | * follow the loader, and is not movable. */ |
| 914 | #ifdef CONFIG_X86 | ||
| 915 | load_bias = 0; | ||
| 916 | #else | ||
| 867 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 917 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); |
| 918 | #endif | ||
| 868 | } | 919 | } |
| 869 | 920 | ||
| 870 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 921 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
| 871 | elf_prot, elf_flags); | 922 | elf_prot, elf_flags,0); |
| 872 | if (BAD_ADDR(error)) { | 923 | if (BAD_ADDR(error)) { |
| 873 | send_sig(SIGKILL, current, 0); | 924 | send_sig(SIGKILL, current, 0); |
| 874 | retval = IS_ERR((void *)error) ? | 925 | retval = IS_ERR((void *)error) ? |
| @@ -944,13 +995,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 944 | } | 995 | } |
| 945 | 996 | ||
| 946 | if (elf_interpreter) { | 997 | if (elf_interpreter) { |
| 947 | if (interpreter_type == INTERPRETER_AOUT) | 998 | if (interpreter_type == INTERPRETER_AOUT) { |
| 948 | elf_entry = load_aout_interp(&loc->interp_ex, | 999 | elf_entry = load_aout_interp(&loc->interp_ex, |
| 949 | interpreter); | 1000 | interpreter); |
| 950 | else | 1001 | } else { |
| 1002 | unsigned long uninitialized_var(interp_map_addr); | ||
| 1003 | |||
| 951 | elf_entry = load_elf_interp(&loc->interp_elf_ex, | 1004 | elf_entry = load_elf_interp(&loc->interp_elf_ex, |
| 952 | interpreter, | 1005 | interpreter, |
| 953 | &interp_load_addr); | 1006 | &interp_map_addr, |
| 1007 | load_bias); | ||
| 1008 | if (!BAD_ADDR(elf_entry)) { | ||
| 1009 | /* | ||
| 1010 | * load_elf_interp() returns relocation | ||
| 1011 | * adjustment | ||
| 1012 | */ | ||
| 1013 | interp_load_addr = elf_entry; | ||
| 1014 | elf_entry += loc->interp_elf_ex.e_entry; | ||
| 1015 | } | ||
| 1016 | } | ||
| 954 | if (BAD_ADDR(elf_entry)) { | 1017 | if (BAD_ADDR(elf_entry)) { |
| 955 | force_sig(SIGSEGV, current); | 1018 | force_sig(SIGSEGV, current); |
| 956 | retval = IS_ERR((void *)elf_entry) ? | 1019 | retval = IS_ERR((void *)elf_entry) ? |
diff --git a/fs/block_dev.c b/fs/block_dev.c index b3e9bfa748cf..3635315e3b99 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -588,12 +588,10 @@ EXPORT_SYMBOL(bdget); | |||
| 588 | 588 | ||
| 589 | long nr_blockdev_pages(void) | 589 | long nr_blockdev_pages(void) |
| 590 | { | 590 | { |
| 591 | struct list_head *p; | 591 | struct block_device *bdev; |
| 592 | long ret = 0; | 592 | long ret = 0; |
| 593 | spin_lock(&bdev_lock); | 593 | spin_lock(&bdev_lock); |
| 594 | list_for_each(p, &all_bdevs) { | 594 | list_for_each_entry(bdev, &all_bdevs, bd_list) { |
| 595 | struct block_device *bdev; | ||
| 596 | bdev = list_entry(p, struct block_device, bd_list); | ||
| 597 | ret += bdev->bd_inode->i_mapping->nrpages; | 595 | ret += bdev->bd_inode->i_mapping->nrpages; |
| 598 | } | 596 | } |
| 599 | spin_unlock(&bdev_lock); | 597 | spin_unlock(&bdev_lock); |
| @@ -874,7 +872,7 @@ static struct bd_holder *find_bd_holder(struct block_device *bdev, | |||
| 874 | */ | 872 | */ |
| 875 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | 873 | static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) |
| 876 | { | 874 | { |
| 877 | int ret; | 875 | int err; |
| 878 | 876 | ||
| 879 | if (!bo) | 877 | if (!bo) |
| 880 | return -EINVAL; | 878 | return -EINVAL; |
| @@ -882,15 +880,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) | |||
| 882 | if (!bd_holder_grab_dirs(bdev, bo)) | 880 | if (!bd_holder_grab_dirs(bdev, bo)) |
| 883 | return -EBUSY; | 881 | return -EBUSY; |
| 884 | 882 | ||
| 885 | ret = add_symlink(bo->sdir, bo->sdev); | 883 | err = add_symlink(bo->sdir, bo->sdev); |
| 886 | if (ret == 0) { | 884 | if (err) |
| 887 | ret = add_symlink(bo->hdir, bo->hdev); | 885 | return err; |
| 888 | if (ret) | 886 | |
| 889 | del_symlink(bo->sdir, bo->sdev); | 887 | err = add_symlink(bo->hdir, bo->hdev); |
| 888 | if (err) { | ||
| 889 | del_symlink(bo->sdir, bo->sdev); | ||
| 890 | return err; | ||
| 890 | } | 891 | } |
| 891 | if (ret == 0) | 892 | |
| 892 | list_add_tail(&bo->list, &bdev->bd_holder_list); | 893 | list_add_tail(&bo->list, &bdev->bd_holder_list); |
| 893 | return ret; | 894 | return 0; |
| 894 | } | 895 | } |
| 895 | 896 | ||
| 896 | /** | 897 | /** |
| @@ -948,7 +949,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev, | |||
| 948 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | 949 | static int bd_claim_by_kobject(struct block_device *bdev, void *holder, |
| 949 | struct kobject *kobj) | 950 | struct kobject *kobj) |
| 950 | { | 951 | { |
| 951 | int res; | 952 | int err; |
| 952 | struct bd_holder *bo, *found; | 953 | struct bd_holder *bo, *found; |
| 953 | 954 | ||
| 954 | if (!kobj) | 955 | if (!kobj) |
| @@ -959,21 +960,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |||
| 959 | return -ENOMEM; | 960 | return -ENOMEM; |
| 960 | 961 | ||
| 961 | mutex_lock(&bdev->bd_mutex); | 962 | mutex_lock(&bdev->bd_mutex); |
| 962 | res = bd_claim(bdev, holder); | ||
| 963 | if (res == 0) { | ||
| 964 | found = find_bd_holder(bdev, bo); | ||
| 965 | if (found == NULL) { | ||
| 966 | res = add_bd_holder(bdev, bo); | ||
| 967 | if (res) | ||
| 968 | bd_release(bdev); | ||
| 969 | } | ||
| 970 | } | ||
| 971 | 963 | ||
| 972 | if (res || found) | 964 | err = bd_claim(bdev, holder); |
| 973 | free_bd_holder(bo); | 965 | if (err) |
| 974 | mutex_unlock(&bdev->bd_mutex); | 966 | goto fail; |
| 975 | 967 | ||
| 976 | return res; | 968 | found = find_bd_holder(bdev, bo); |
| 969 | if (found) | ||
| 970 | goto fail; | ||
| 971 | |||
| 972 | err = add_bd_holder(bdev, bo); | ||
| 973 | if (err) | ||
| 974 | bd_release(bdev); | ||
| 975 | else | ||
| 976 | bo = NULL; | ||
| 977 | fail: | ||
| 978 | mutex_unlock(&bdev->bd_mutex); | ||
| 979 | free_bd_holder(bo); | ||
| 980 | return err; | ||
| 977 | } | 981 | } |
| 978 | 982 | ||
| 979 | /** | 983 | /** |
| @@ -987,15 +991,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |||
| 987 | static void bd_release_from_kobject(struct block_device *bdev, | 991 | static void bd_release_from_kobject(struct block_device *bdev, |
| 988 | struct kobject *kobj) | 992 | struct kobject *kobj) |
| 989 | { | 993 | { |
| 990 | struct bd_holder *bo; | ||
| 991 | |||
| 992 | if (!kobj) | 994 | if (!kobj) |
| 993 | return; | 995 | return; |
| 994 | 996 | ||
| 995 | mutex_lock(&bdev->bd_mutex); | 997 | mutex_lock(&bdev->bd_mutex); |
| 996 | bd_release(bdev); | 998 | bd_release(bdev); |
| 997 | if ((bo = del_bd_holder(bdev, kobj))) | 999 | free_bd_holder(del_bd_holder(bdev, kobj)); |
| 998 | free_bd_holder(bo); | ||
| 999 | mutex_unlock(&bdev->bd_mutex); | 1000 | mutex_unlock(&bdev->bd_mutex); |
| 1000 | } | 1001 | } |
| 1001 | 1002 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index aa68206bd517..0f9006714230 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -356,7 +356,7 @@ static void free_more_memory(void) | |||
| 356 | for_each_online_pgdat(pgdat) { | 356 | for_each_online_pgdat(pgdat) { |
| 357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; | 357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; |
| 358 | if (*zones) | 358 | if (*zones) |
| 359 | try_to_free_pages(zones, GFP_NOFS); | 359 | try_to_free_pages(zones, 0, GFP_NOFS); |
| 360 | } | 360 | } |
| 361 | } | 361 | } |
| 362 | 362 | ||
| @@ -676,6 +676,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) | |||
| 676 | EXPORT_SYMBOL(mark_buffer_dirty_inode); | 676 | EXPORT_SYMBOL(mark_buffer_dirty_inode); |
| 677 | 677 | ||
| 678 | /* | 678 | /* |
| 679 | * Mark the page dirty, and set it dirty in the radix tree, and mark the inode | ||
| 680 | * dirty. | ||
| 681 | * | ||
| 682 | * If warn is true, then emit a warning if the page is not uptodate and has | ||
| 683 | * not been truncated. | ||
| 684 | */ | ||
| 685 | static int __set_page_dirty(struct page *page, | ||
| 686 | struct address_space *mapping, int warn) | ||
| 687 | { | ||
| 688 | if (unlikely(!mapping)) | ||
| 689 | return !TestSetPageDirty(page); | ||
| 690 | |||
| 691 | if (TestSetPageDirty(page)) | ||
| 692 | return 0; | ||
| 693 | |||
| 694 | write_lock_irq(&mapping->tree_lock); | ||
| 695 | if (page->mapping) { /* Race with truncate? */ | ||
| 696 | WARN_ON_ONCE(warn && !PageUptodate(page)); | ||
| 697 | |||
| 698 | if (mapping_cap_account_dirty(mapping)) { | ||
| 699 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
| 700 | task_io_account_write(PAGE_CACHE_SIZE); | ||
| 701 | } | ||
| 702 | radix_tree_tag_set(&mapping->page_tree, | ||
| 703 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
| 704 | } | ||
| 705 | write_unlock_irq(&mapping->tree_lock); | ||
| 706 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
| 707 | |||
| 708 | return 1; | ||
| 709 | } | ||
| 710 | |||
| 711 | /* | ||
| 679 | * Add a page to the dirty page list. | 712 | * Add a page to the dirty page list. |
| 680 | * | 713 | * |
| 681 | * It is a sad fact of life that this function is called from several places | 714 | * It is a sad fact of life that this function is called from several places |
| @@ -702,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
| 702 | */ | 735 | */ |
| 703 | int __set_page_dirty_buffers(struct page *page) | 736 | int __set_page_dirty_buffers(struct page *page) |
| 704 | { | 737 | { |
| 705 | struct address_space * const mapping = page_mapping(page); | 738 | struct address_space *mapping = page_mapping(page); |
| 706 | 739 | ||
| 707 | if (unlikely(!mapping)) | 740 | if (unlikely(!mapping)) |
| 708 | return !TestSetPageDirty(page); | 741 | return !TestSetPageDirty(page); |
| @@ -719,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page) | |||
| 719 | } | 752 | } |
| 720 | spin_unlock(&mapping->private_lock); | 753 | spin_unlock(&mapping->private_lock); |
| 721 | 754 | ||
| 722 | if (TestSetPageDirty(page)) | 755 | return __set_page_dirty(page, mapping, 1); |
| 723 | return 0; | ||
| 724 | |||
| 725 | write_lock_irq(&mapping->tree_lock); | ||
| 726 | if (page->mapping) { /* Race with truncate? */ | ||
| 727 | if (mapping_cap_account_dirty(mapping)) { | ||
| 728 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
| 729 | task_io_account_write(PAGE_CACHE_SIZE); | ||
| 730 | } | ||
| 731 | radix_tree_tag_set(&mapping->page_tree, | ||
| 732 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
| 733 | } | ||
| 734 | write_unlock_irq(&mapping->tree_lock); | ||
| 735 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
| 736 | return 1; | ||
| 737 | } | 756 | } |
| 738 | EXPORT_SYMBOL(__set_page_dirty_buffers); | 757 | EXPORT_SYMBOL(__set_page_dirty_buffers); |
| 739 | 758 | ||
| @@ -982,7 +1001,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
| 982 | struct buffer_head *bh; | 1001 | struct buffer_head *bh; |
| 983 | 1002 | ||
| 984 | page = find_or_create_page(inode->i_mapping, index, | 1003 | page = find_or_create_page(inode->i_mapping, index, |
| 985 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 1004 | (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); |
| 986 | if (!page) | 1005 | if (!page) |
| 987 | return NULL; | 1006 | return NULL; |
| 988 | 1007 | ||
| @@ -1026,11 +1045,6 @@ failed: | |||
| 1026 | /* | 1045 | /* |
| 1027 | * Create buffers for the specified block device block's page. If | 1046 | * Create buffers for the specified block device block's page. If |
| 1028 | * that page was dirty, the buffers are set dirty also. | 1047 | * that page was dirty, the buffers are set dirty also. |
| 1029 | * | ||
| 1030 | * Except that's a bug. Attaching dirty buffers to a dirty | ||
| 1031 | * blockdev's page can result in filesystem corruption, because | ||
| 1032 | * some of those buffers may be aliases of filesystem data. | ||
| 1033 | * grow_dev_page() will go BUG() if this happens. | ||
| 1034 | */ | 1048 | */ |
| 1035 | static int | 1049 | static int |
| 1036 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1050 | grow_buffers(struct block_device *bdev, sector_t block, int size) |
| @@ -1137,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
| 1137 | */ | 1151 | */ |
| 1138 | void fastcall mark_buffer_dirty(struct buffer_head *bh) | 1152 | void fastcall mark_buffer_dirty(struct buffer_head *bh) |
| 1139 | { | 1153 | { |
| 1154 | WARN_ON_ONCE(!buffer_uptodate(bh)); | ||
| 1140 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | 1155 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) |
| 1141 | __set_page_dirty_nobuffers(bh->b_page); | 1156 | __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); |
| 1142 | } | 1157 | } |
| 1143 | 1158 | ||
| 1144 | /* | 1159 | /* |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8b0cbf4a4ad0..bd0f2f2353ce 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -849,6 +849,7 @@ static int cifs_oplock_thread(void * dummyarg) | |||
| 849 | __u16 netfid; | 849 | __u16 netfid; |
| 850 | int rc; | 850 | int rc; |
| 851 | 851 | ||
| 852 | set_freezable(); | ||
| 852 | do { | 853 | do { |
| 853 | if (try_to_freeze()) | 854 | if (try_to_freeze()) |
| 854 | continue; | 855 | continue; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f4e92661b223..0a1b8bd1dfcb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -363,6 +363,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) | |||
| 363 | GFP_KERNEL); | 363 | GFP_KERNEL); |
| 364 | } | 364 | } |
| 365 | 365 | ||
| 366 | set_freezable(); | ||
| 366 | while (!kthread_should_stop()) { | 367 | while (!kthread_should_stop()) { |
| 367 | if (try_to_freeze()) | 368 | if (try_to_freeze()) |
| 368 | continue; | 369 | continue; |
diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 1d716392c3aa..96df1d51fdc3 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
| 32 | #include <linux/exportfs.h> | ||
| 32 | 33 | ||
| 33 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 34 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
| 34 | 35 | ||
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b44cdc96fac..e440a7b95d02 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
| @@ -63,6 +63,7 @@ | |||
| 63 | #include <linux/wireless.h> | 63 | #include <linux/wireless.h> |
| 64 | #include <linux/atalk.h> | 64 | #include <linux/atalk.h> |
| 65 | #include <linux/blktrace_api.h> | 65 | #include <linux/blktrace_api.h> |
| 66 | #include <linux/loop.h> | ||
| 66 | 67 | ||
| 67 | #include <net/bluetooth/bluetooth.h> | 68 | #include <net/bluetooth/bluetooth.h> |
| 68 | #include <net/bluetooth/hci.h> | 69 | #include <net/bluetooth/hci.h> |
| @@ -3489,6 +3490,9 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) | |||
| 3489 | 3490 | ||
| 3490 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) | 3491 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) |
| 3491 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) | 3492 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) |
| 3493 | |||
| 3494 | /* loop */ | ||
| 3495 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
| 3492 | }; | 3496 | }; |
| 3493 | 3497 | ||
| 3494 | #define IOCTL_HASHSIZE 256 | 3498 | #define IOCTL_HASHSIZE 256 |
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 7b48c034b312..3b0185fdf9a4 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h | |||
| @@ -29,10 +29,11 @@ | |||
| 29 | 29 | ||
| 30 | struct configfs_dirent { | 30 | struct configfs_dirent { |
| 31 | atomic_t s_count; | 31 | atomic_t s_count; |
| 32 | int s_dependent_count; | ||
| 32 | struct list_head s_sibling; | 33 | struct list_head s_sibling; |
| 33 | struct list_head s_children; | 34 | struct list_head s_children; |
| 34 | struct list_head s_links; | 35 | struct list_head s_links; |
| 35 | void * s_element; | 36 | void * s_element; |
| 36 | int s_type; | 37 | int s_type; |
| 37 | umode_t s_mode; | 38 | umode_t s_mode; |
| 38 | struct dentry * s_dentry; | 39 | struct dentry * s_dentry; |
| @@ -41,8 +42,8 @@ struct configfs_dirent { | |||
| 41 | 42 | ||
| 42 | #define CONFIGFS_ROOT 0x0001 | 43 | #define CONFIGFS_ROOT 0x0001 |
| 43 | #define CONFIGFS_DIR 0x0002 | 44 | #define CONFIGFS_DIR 0x0002 |
| 44 | #define CONFIGFS_ITEM_ATTR 0x0004 | 45 | #define CONFIGFS_ITEM_ATTR 0x0004 |
| 45 | #define CONFIGFS_ITEM_LINK 0x0020 | 46 | #define CONFIGFS_ITEM_LINK 0x0020 |
| 46 | #define CONFIGFS_USET_DIR 0x0040 | 47 | #define CONFIGFS_USET_DIR 0x0040 |
| 47 | #define CONFIGFS_USET_DEFAULT 0x0080 | 48 | #define CONFIGFS_USET_DEFAULT 0x0080 |
| 48 | #define CONFIGFS_USET_DROPPING 0x0100 | 49 | #define CONFIGFS_USET_DROPPING 0x0100 |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5e6e37e58f36..2f436d4f1d6d 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry) | |||
| 355 | /* Mark that we've taken i_mutex */ | 355 | /* Mark that we've taken i_mutex */ |
| 356 | sd->s_type |= CONFIGFS_USET_DROPPING; | 356 | sd->s_type |= CONFIGFS_USET_DROPPING; |
| 357 | 357 | ||
| 358 | /* | ||
| 359 | * Yup, recursive. If there's a problem, blame | ||
| 360 | * deep nesting of default_groups | ||
| 361 | */ | ||
| 358 | ret = configfs_detach_prep(sd->s_dentry); | 362 | ret = configfs_detach_prep(sd->s_dentry); |
| 359 | if (!ret) | 363 | if (!ret) |
| 360 | continue; | 364 | continue; |
| @@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group) | |||
| 562 | 566 | ||
| 563 | /* | 567 | /* |
| 564 | * All of link_obj/unlink_obj/link_group/unlink_group require that | 568 | * All of link_obj/unlink_obj/link_group/unlink_group require that |
| 565 | * subsys->su_sem is held. | 569 | * subsys->su_mutex is held. |
| 566 | */ | 570 | */ |
| 567 | 571 | ||
| 568 | static void unlink_obj(struct config_item *item) | 572 | static void unlink_obj(struct config_item *item) |
| @@ -714,6 +718,28 @@ static void configfs_detach_group(struct config_item *item) | |||
| 714 | } | 718 | } |
| 715 | 719 | ||
| 716 | /* | 720 | /* |
| 721 | * After the item has been detached from the filesystem view, we are | ||
| 722 | * ready to tear it out of the hierarchy. Notify the client before | ||
| 723 | * we do that so they can perform any cleanup that requires | ||
| 724 | * navigating the hierarchy. A client does not need to provide this | ||
| 725 | * callback. The subsystem semaphore MUST be held by the caller, and | ||
| 726 | * references must be valid for both items. It also assumes the | ||
| 727 | * caller has validated ci_type. | ||
| 728 | */ | ||
| 729 | static void client_disconnect_notify(struct config_item *parent_item, | ||
| 730 | struct config_item *item) | ||
| 731 | { | ||
| 732 | struct config_item_type *type; | ||
| 733 | |||
| 734 | type = parent_item->ci_type; | ||
| 735 | BUG_ON(!type); | ||
| 736 | |||
| 737 | if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) | ||
| 738 | type->ct_group_ops->disconnect_notify(to_config_group(parent_item), | ||
| 739 | item); | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 717 | * Drop the initial reference from make_item()/make_group() | 743 | * Drop the initial reference from make_item()/make_group() |
| 718 | * This function assumes that reference is held on item | 744 | * This function assumes that reference is held on item |
| 719 | * and that item holds a valid reference to the parent. Also, it | 745 | * and that item holds a valid reference to the parent. Also, it |
| @@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item, | |||
| 733 | */ | 759 | */ |
| 734 | if (type->ct_group_ops && type->ct_group_ops->drop_item) | 760 | if (type->ct_group_ops && type->ct_group_ops->drop_item) |
| 735 | type->ct_group_ops->drop_item(to_config_group(parent_item), | 761 | type->ct_group_ops->drop_item(to_config_group(parent_item), |
| 736 | item); | 762 | item); |
| 737 | else | 763 | else |
| 738 | config_item_put(item); | 764 | config_item_put(item); |
| 739 | } | 765 | } |
| 740 | 766 | ||
| 767 | #ifdef DEBUG | ||
| 768 | static void configfs_dump_one(struct configfs_dirent *sd, int level) | ||
| 769 | { | ||
| 770 | printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); | ||
| 771 | |||
| 772 | #define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); | ||
| 773 | type_print(CONFIGFS_ROOT); | ||
| 774 | type_print(CONFIGFS_DIR); | ||
| 775 | type_print(CONFIGFS_ITEM_ATTR); | ||
| 776 | type_print(CONFIGFS_ITEM_LINK); | ||
| 777 | type_print(CONFIGFS_USET_DIR); | ||
| 778 | type_print(CONFIGFS_USET_DEFAULT); | ||
| 779 | type_print(CONFIGFS_USET_DROPPING); | ||
| 780 | #undef type_print | ||
| 781 | } | ||
| 782 | |||
| 783 | static int configfs_dump(struct configfs_dirent *sd, int level) | ||
| 784 | { | ||
| 785 | struct configfs_dirent *child_sd; | ||
| 786 | int ret = 0; | ||
| 787 | |||
| 788 | configfs_dump_one(sd, level); | ||
| 789 | |||
| 790 | if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) | ||
| 791 | return 0; | ||
| 792 | |||
| 793 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { | ||
| 794 | ret = configfs_dump(child_sd, level + 2); | ||
| 795 | if (ret) | ||
| 796 | break; | ||
| 797 | } | ||
| 798 | |||
| 799 | return ret; | ||
| 800 | } | ||
| 801 | #endif | ||
| 802 | |||
| 803 | |||
| 804 | /* | ||
| 805 | * configfs_depend_item() and configfs_undepend_item() | ||
| 806 | * | ||
| 807 | * WARNING: Do not call these from a configfs callback! | ||
| 808 | * | ||
| 809 | * This describes these functions and their helpers. | ||
| 810 | * | ||
| 811 | * Allow another kernel system to depend on a config_item. If this | ||
| 812 | * happens, the item cannot go away until the dependant can live without | ||
| 813 | * it. The idea is to give client modules as simple an interface as | ||
| 814 | * possible. When a system asks them to depend on an item, they just | ||
| 815 | * call configfs_depend_item(). If the item is live and the client | ||
| 816 | * driver is in good shape, we'll happily do the work for them. | ||
| 817 | * | ||
| 818 | * Why is the locking complex? Because configfs uses the VFS to handle | ||
| 819 | * all locking, but this function is called outside the normal | ||
| 820 | * VFS->configfs path. So it must take VFS locks to prevent the | ||
| 821 | * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is | ||
| 822 | * why you can't call these functions underneath configfs callbacks. | ||
| 823 | * | ||
| 824 | * Note, btw, that this can be called at *any* time, even when a configfs | ||
| 825 | * subsystem isn't registered, or when configfs is loading or unloading. | ||
| 826 | * Just like configfs_register_subsystem(). So we take the same | ||
| 827 | * precautions. We pin the filesystem. We lock each i_mutex _in_order_ | ||
| 828 | * on our way down the tree. If we can find the target item in the | ||
| 829 | * configfs tree, it must be part of the subsystem tree as well, so we | ||
| 830 | * do not need the subsystem semaphore. Holding the i_mutex chain locks | ||
| 831 | * out mkdir() and rmdir(), who might be racing us. | ||
| 832 | */ | ||
| 833 | |||
| 834 | /* | ||
| 835 | * configfs_depend_prep() | ||
| 836 | * | ||
| 837 | * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are | ||
| 838 | * attributes. This is similar but not the same to configfs_detach_prep(). | ||
| 839 | * Note that configfs_detach_prep() expects the parent to be locked when it | ||
| 840 | * is called, but we lock the parent *inside* configfs_depend_prep(). We | ||
| 841 | * do that so we can unlock it if we find nothing. | ||
| 842 | * | ||
| 843 | * Here we do a depth-first search of the dentry hierarchy looking for | ||
| 844 | * our object. We take i_mutex on each step of the way down. IT IS | ||
| 845 | * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, | ||
| 846 | * we'll drop the i_mutex. | ||
| 847 | * | ||
| 848 | * If the target is not found, -ENOENT is bubbled up and we have released | ||
| 849 | * all locks. If the target was found, the locks will be cleared by | ||
| 850 | * configfs_depend_rollback(). | ||
| 851 | * | ||
| 852 | * This adds a requirement that all config_items be unique! | ||
| 853 | * | ||
| 854 | * This is recursive because the locking traversal is tricky. There isn't | ||
| 855 | * much on the stack, though, so folks that need this function - be careful | ||
| 856 | * about your stack! Patches will be accepted to make it iterative. | ||
| 857 | */ | ||
| 858 | static int configfs_depend_prep(struct dentry *origin, | ||
| 859 | struct config_item *target) | ||
| 860 | { | ||
| 861 | struct configfs_dirent *child_sd, *sd = origin->d_fsdata; | ||
| 862 | int ret = 0; | ||
| 863 | |||
| 864 | BUG_ON(!origin || !sd); | ||
| 865 | |||
| 866 | /* Lock this guy on the way down */ | ||
| 867 | mutex_lock(&sd->s_dentry->d_inode->i_mutex); | ||
| 868 | if (sd->s_element == target) /* Boo-yah */ | ||
| 869 | goto out; | ||
| 870 | |||
| 871 | list_for_each_entry(child_sd, &sd->s_children, s_sibling) { | ||
| 872 | if (child_sd->s_type & CONFIGFS_DIR) { | ||
| 873 | ret = configfs_depend_prep(child_sd->s_dentry, | ||
| 874 | target); | ||
| 875 | if (!ret) | ||
| 876 | goto out; /* Child path boo-yah */ | ||
| 877 | } | ||
| 878 | } | ||
| 879 | |||
| 880 | /* We looped all our children and didn't find target */ | ||
| 881 | mutex_unlock(&sd->s_dentry->d_inode->i_mutex); | ||
| 882 | ret = -ENOENT; | ||
| 883 | |||
| 884 | out: | ||
| 885 | return ret; | ||
| 886 | } | ||
| 887 | |||
| 888 | /* | ||
| 889 | * This is ONLY called if configfs_depend_prep() did its job. So we can | ||
| 890 | * trust the entire path from item back up to origin. | ||
| 891 | * | ||
| 892 | * We walk backwards from item, unlocking each i_mutex. We finish by | ||
| 893 | * unlocking origin. | ||
| 894 | */ | ||
| 895 | static void configfs_depend_rollback(struct dentry *origin, | ||
| 896 | struct config_item *item) | ||
| 897 | { | ||
| 898 | struct dentry *dentry = item->ci_dentry; | ||
| 899 | |||
| 900 | while (dentry != origin) { | ||
| 901 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
| 902 | dentry = dentry->d_parent; | ||
| 903 | } | ||
| 904 | |||
| 905 | mutex_unlock(&origin->d_inode->i_mutex); | ||
| 906 | } | ||
| 907 | |||
| 908 | int configfs_depend_item(struct configfs_subsystem *subsys, | ||
| 909 | struct config_item *target) | ||
| 910 | { | ||
| 911 | int ret; | ||
| 912 | struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; | ||
| 913 | struct config_item *s_item = &subsys->su_group.cg_item; | ||
| 914 | |||
| 915 | /* | ||
| 916 | * Pin the configfs filesystem. This means we can safely access | ||
| 917 | * the root of the configfs filesystem. | ||
| 918 | */ | ||
| 919 | ret = configfs_pin_fs(); | ||
| 920 | if (ret) | ||
| 921 | return ret; | ||
| 922 | |||
| 923 | /* | ||
| 924 | * Next, lock the root directory. We're going to check that the | ||
| 925 | * subsystem is really registered, and so we need to lock out | ||
| 926 | * configfs_[un]register_subsystem(). | ||
| 927 | */ | ||
| 928 | mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); | ||
| 929 | |||
| 930 | root_sd = configfs_sb->s_root->d_fsdata; | ||
| 931 | |||
| 932 | list_for_each_entry(p, &root_sd->s_children, s_sibling) { | ||
| 933 | if (p->s_type & CONFIGFS_DIR) { | ||
| 934 | if (p->s_element == s_item) { | ||
| 935 | subsys_sd = p; | ||
| 936 | break; | ||
| 937 | } | ||
| 938 | } | ||
| 939 | } | ||
| 940 | |||
| 941 | if (!subsys_sd) { | ||
| 942 | ret = -ENOENT; | ||
| 943 | goto out_unlock_fs; | ||
| 944 | } | ||
| 945 | |||
| 946 | /* Ok, now we can trust subsys/s_item */ | ||
| 947 | |||
| 948 | /* Scan the tree, locking i_mutex recursively, return 0 if found */ | ||
| 949 | ret = configfs_depend_prep(subsys_sd->s_dentry, target); | ||
| 950 | if (ret) | ||
| 951 | goto out_unlock_fs; | ||
| 952 | |||
| 953 | /* We hold all i_mutexes from the subsystem down to the target */ | ||
| 954 | p = target->ci_dentry->d_fsdata; | ||
| 955 | p->s_dependent_count += 1; | ||
| 956 | |||
| 957 | configfs_depend_rollback(subsys_sd->s_dentry, target); | ||
| 958 | |||
| 959 | out_unlock_fs: | ||
| 960 | mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); | ||
| 961 | |||
| 962 | /* | ||
| 963 | * If we succeeded, the fs is pinned via other methods. If not, | ||
| 964 | * we're done with it anyway. So release_fs() is always right. | ||
| 965 | */ | ||
| 966 | configfs_release_fs(); | ||
| 967 | |||
| 968 | return ret; | ||
| 969 | } | ||
| 970 | EXPORT_SYMBOL(configfs_depend_item); | ||
| 971 | |||
| 972 | /* | ||
| 973 | * Release the dependent linkage. This is much simpler than | ||
| 974 | * configfs_depend_item() because we know that that the client driver is | ||
| 975 | * pinned, thus the subsystem is pinned, and therefore configfs is pinned. | ||
| 976 | */ | ||
| 977 | void configfs_undepend_item(struct configfs_subsystem *subsys, | ||
| 978 | struct config_item *target) | ||
| 979 | { | ||
| 980 | struct configfs_dirent *sd; | ||
| 981 | |||
| 982 | /* | ||
| 983 | * Since we can trust everything is pinned, we just need i_mutex | ||
| 984 | * on the item. | ||
| 985 | */ | ||
| 986 | mutex_lock(&target->ci_dentry->d_inode->i_mutex); | ||
| 987 | |||
| 988 | sd = target->ci_dentry->d_fsdata; | ||
| 989 | BUG_ON(sd->s_dependent_count < 1); | ||
| 990 | |||
| 991 | sd->s_dependent_count -= 1; | ||
| 992 | |||
| 993 | /* | ||
| 994 | * After this unlock, we cannot trust the item to stay alive! | ||
| 995 | * DO NOT REFERENCE item after this unlock. | ||
| 996 | */ | ||
| 997 | mutex_unlock(&target->ci_dentry->d_inode->i_mutex); | ||
| 998 | } | ||
| 999 | EXPORT_SYMBOL(configfs_undepend_item); | ||
| 741 | 1000 | ||
| 742 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 1001 | static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
| 743 | { | 1002 | { |
| @@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 783 | 1042 | ||
| 784 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); | 1043 | snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); |
| 785 | 1044 | ||
| 786 | down(&subsys->su_sem); | 1045 | mutex_lock(&subsys->su_mutex); |
| 787 | group = NULL; | 1046 | group = NULL; |
| 788 | item = NULL; | 1047 | item = NULL; |
| 789 | if (type->ct_group_ops->make_group) { | 1048 | if (type->ct_group_ops->make_group) { |
| @@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 797 | if (item) | 1056 | if (item) |
| 798 | link_obj(parent_item, item); | 1057 | link_obj(parent_item, item); |
| 799 | } | 1058 | } |
| 800 | up(&subsys->su_sem); | 1059 | mutex_unlock(&subsys->su_mutex); |
| 801 | 1060 | ||
| 802 | kfree(name); | 1061 | kfree(name); |
| 803 | if (!item) { | 1062 | if (!item) { |
| @@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 841 | out_unlink: | 1100 | out_unlink: |
| 842 | if (ret) { | 1101 | if (ret) { |
| 843 | /* Tear down everything we built up */ | 1102 | /* Tear down everything we built up */ |
| 844 | down(&subsys->su_sem); | 1103 | mutex_lock(&subsys->su_mutex); |
| 1104 | |||
| 1105 | client_disconnect_notify(parent_item, item); | ||
| 845 | if (group) | 1106 | if (group) |
| 846 | unlink_group(group); | 1107 | unlink_group(group); |
| 847 | else | 1108 | else |
| 848 | unlink_obj(item); | 1109 | unlink_obj(item); |
| 849 | client_drop_item(parent_item, item); | 1110 | client_drop_item(parent_item, item); |
| 850 | up(&subsys->su_sem); | 1111 | |
| 1112 | mutex_unlock(&subsys->su_mutex); | ||
| 851 | 1113 | ||
| 852 | if (module_got) | 1114 | if (module_got) |
| 853 | module_put(owner); | 1115 | module_put(owner); |
| @@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 881 | if (sd->s_type & CONFIGFS_USET_DEFAULT) | 1143 | if (sd->s_type & CONFIGFS_USET_DEFAULT) |
| 882 | return -EPERM; | 1144 | return -EPERM; |
| 883 | 1145 | ||
| 1146 | /* | ||
| 1147 | * Here's where we check for dependents. We're protected by | ||
| 1148 | * i_mutex. | ||
| 1149 | */ | ||
| 1150 | if (sd->s_dependent_count) | ||
| 1151 | return -EBUSY; | ||
| 1152 | |||
| 884 | /* Get a working ref until we have the child */ | 1153 | /* Get a working ref until we have the child */ |
| 885 | parent_item = configfs_get_config_item(dentry->d_parent); | 1154 | parent_item = configfs_get_config_item(dentry->d_parent); |
| 886 | subsys = to_config_group(parent_item)->cg_subsys; | 1155 | subsys = to_config_group(parent_item)->cg_subsys; |
| @@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 910 | if (sd->s_type & CONFIGFS_USET_DIR) { | 1179 | if (sd->s_type & CONFIGFS_USET_DIR) { |
| 911 | configfs_detach_group(item); | 1180 | configfs_detach_group(item); |
| 912 | 1181 | ||
| 913 | down(&subsys->su_sem); | 1182 | mutex_lock(&subsys->su_mutex); |
| 1183 | client_disconnect_notify(parent_item, item); | ||
| 914 | unlink_group(to_config_group(item)); | 1184 | unlink_group(to_config_group(item)); |
| 915 | } else { | 1185 | } else { |
| 916 | configfs_detach_item(item); | 1186 | configfs_detach_item(item); |
| 917 | 1187 | ||
| 918 | down(&subsys->su_sem); | 1188 | mutex_lock(&subsys->su_mutex); |
| 1189 | client_disconnect_notify(parent_item, item); | ||
| 919 | unlink_obj(item); | 1190 | unlink_obj(item); |
| 920 | } | 1191 | } |
| 921 | 1192 | ||
| 922 | client_drop_item(parent_item, item); | 1193 | client_drop_item(parent_item, item); |
| 923 | up(&subsys->su_sem); | 1194 | mutex_unlock(&subsys->su_mutex); |
| 924 | 1195 | ||
| 925 | /* Drop our reference from above */ | 1196 | /* Drop our reference from above */ |
| 926 | config_item_put(item); | 1197 | config_item_put(item); |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 3527c7c6def8..a3658f9a082c 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
| @@ -27,19 +27,26 @@ | |||
| 27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
| 28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/mutex.h> | ||
| 30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
| 31 | #include <asm/semaphore.h> | ||
| 32 | 32 | ||
| 33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
| 34 | #include "configfs_internal.h" | 34 | #include "configfs_internal.h" |
| 35 | 35 | ||
| 36 | /* | ||
| 37 | * A simple attribute can only be 4096 characters. Why 4k? Because the | ||
| 38 | * original code limited it to PAGE_SIZE. That's a bad idea, though, | ||
| 39 | * because an attribute of 16k on ia64 won't work on x86. So we limit to | ||
| 40 | * 4k, our minimum common page size. | ||
| 41 | */ | ||
| 42 | #define SIMPLE_ATTR_SIZE 4096 | ||
| 36 | 43 | ||
| 37 | struct configfs_buffer { | 44 | struct configfs_buffer { |
| 38 | size_t count; | 45 | size_t count; |
| 39 | loff_t pos; | 46 | loff_t pos; |
| 40 | char * page; | 47 | char * page; |
| 41 | struct configfs_item_operations * ops; | 48 | struct configfs_item_operations * ops; |
| 42 | struct semaphore sem; | 49 | struct mutex mutex; |
| 43 | int needs_read_fill; | 50 | int needs_read_fill; |
| 44 | }; | 51 | }; |
| 45 | 52 | ||
| @@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf | |||
| 69 | 76 | ||
| 70 | count = ops->show_attribute(item,attr,buffer->page); | 77 | count = ops->show_attribute(item,attr,buffer->page); |
| 71 | buffer->needs_read_fill = 0; | 78 | buffer->needs_read_fill = 0; |
| 72 | BUG_ON(count > (ssize_t)PAGE_SIZE); | 79 | BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); |
| 73 | if (count >= 0) | 80 | if (count >= 0) |
| 74 | buffer->count = count; | 81 | buffer->count = count; |
| 75 | else | 82 | else |
| @@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
| 102 | struct configfs_buffer * buffer = file->private_data; | 109 | struct configfs_buffer * buffer = file->private_data; |
| 103 | ssize_t retval = 0; | 110 | ssize_t retval = 0; |
| 104 | 111 | ||
| 105 | down(&buffer->sem); | 112 | mutex_lock(&buffer->mutex); |
| 106 | if (buffer->needs_read_fill) { | 113 | if (buffer->needs_read_fill) { |
| 107 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) | 114 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) |
| 108 | goto out; | 115 | goto out; |
| @@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
| 112 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, | 119 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, |
| 113 | buffer->count); | 120 | buffer->count); |
| 114 | out: | 121 | out: |
| 115 | up(&buffer->sem); | 122 | mutex_unlock(&buffer->mutex); |
| 116 | return retval; | 123 | return retval; |
| 117 | } | 124 | } |
| 118 | 125 | ||
| @@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size | |||
| 137 | if (!buffer->page) | 144 | if (!buffer->page) |
| 138 | return -ENOMEM; | 145 | return -ENOMEM; |
| 139 | 146 | ||
| 140 | if (count >= PAGE_SIZE) | 147 | if (count >= SIMPLE_ATTR_SIZE) |
| 141 | count = PAGE_SIZE - 1; | 148 | count = SIMPLE_ATTR_SIZE - 1; |
| 142 | error = copy_from_user(buffer->page,buf,count); | 149 | error = copy_from_user(buffer->page,buf,count); |
| 143 | buffer->needs_read_fill = 1; | 150 | buffer->needs_read_fill = 1; |
| 144 | /* if buf is assumed to contain a string, terminate it by \0, | 151 | /* if buf is assumed to contain a string, terminate it by \0, |
| @@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof | |||
| 193 | struct configfs_buffer * buffer = file->private_data; | 200 | struct configfs_buffer * buffer = file->private_data; |
| 194 | ssize_t len; | 201 | ssize_t len; |
| 195 | 202 | ||
| 196 | down(&buffer->sem); | 203 | mutex_lock(&buffer->mutex); |
| 197 | len = fill_write_buffer(buffer, buf, count); | 204 | len = fill_write_buffer(buffer, buf, count); |
| 198 | if (len > 0) | 205 | if (len > 0) |
| 199 | len = flush_write_buffer(file->f_path.dentry, buffer, count); | 206 | len = flush_write_buffer(file->f_path.dentry, buffer, count); |
| 200 | if (len > 0) | 207 | if (len > 0) |
| 201 | *ppos += len; | 208 | *ppos += len; |
| 202 | up(&buffer->sem); | 209 | mutex_unlock(&buffer->mutex); |
| 203 | return len; | 210 | return len; |
| 204 | } | 211 | } |
| 205 | 212 | ||
| @@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file) | |||
| 253 | error = -ENOMEM; | 260 | error = -ENOMEM; |
| 254 | goto Enomem; | 261 | goto Enomem; |
| 255 | } | 262 | } |
| 256 | init_MUTEX(&buffer->sem); | 263 | mutex_init(&buffer->mutex); |
| 257 | buffer->needs_read_fill = 1; | 264 | buffer->needs_read_fill = 1; |
| 258 | buffer->ops = ops; | 265 | buffer->ops = ops; |
| 259 | file->private_data = buffer; | 266 | file->private_data = buffer; |
| @@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp) | |||
| 292 | if (buffer) { | 299 | if (buffer) { |
| 293 | if (buffer->page) | 300 | if (buffer->page) |
| 294 | free_page((unsigned long)buffer->page); | 301 | free_page((unsigned long)buffer->page); |
| 302 | mutex_destroy(&buffer->mutex); | ||
| 295 | kfree(buffer); | 303 | kfree(buffer); |
| 296 | } | 304 | } |
| 297 | return 0; | 305 | return 0; |
diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 24421209f854..76dc4c3e5d51 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c | |||
| @@ -62,7 +62,6 @@ void config_item_init(struct config_item * item) | |||
| 62 | * dynamically allocated string that @item->ci_name points to. | 62 | * dynamically allocated string that @item->ci_name points to. |
| 63 | * Otherwise, use the static @item->ci_namebuf array. | 63 | * Otherwise, use the static @item->ci_namebuf array. |
| 64 | */ | 64 | */ |
| 65 | |||
| 66 | int config_item_set_name(struct config_item * item, const char * fmt, ...) | 65 | int config_item_set_name(struct config_item * item, const char * fmt, ...) |
| 67 | { | 66 | { |
| 68 | int error = 0; | 67 | int error = 0; |
| @@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item) | |||
| 139 | return item; | 138 | return item; |
| 140 | } | 139 | } |
| 141 | 140 | ||
| 142 | /** | 141 | static void config_item_cleanup(struct config_item * item) |
| 143 | * config_item_cleanup - free config_item resources. | ||
| 144 | * @item: item. | ||
| 145 | */ | ||
| 146 | |||
| 147 | void config_item_cleanup(struct config_item * item) | ||
| 148 | { | 142 | { |
| 149 | struct config_item_type * t = item->ci_type; | 143 | struct config_item_type * t = item->ci_type; |
| 150 | struct config_group * s = item->ci_group; | 144 | struct config_group * s = item->ci_group; |
| @@ -179,39 +173,35 @@ void config_item_put(struct config_item * item) | |||
| 179 | kref_put(&item->ci_kref, config_item_release); | 173 | kref_put(&item->ci_kref, config_item_release); |
| 180 | } | 174 | } |
| 181 | 175 | ||
| 182 | |||
| 183 | /** | 176 | /** |
| 184 | * config_group_init - initialize a group for use | 177 | * config_group_init - initialize a group for use |
| 185 | * @k: group | 178 | * @k: group |
| 186 | */ | 179 | */ |
| 187 | |||
| 188 | void config_group_init(struct config_group *group) | 180 | void config_group_init(struct config_group *group) |
| 189 | { | 181 | { |
| 190 | config_item_init(&group->cg_item); | 182 | config_item_init(&group->cg_item); |
| 191 | INIT_LIST_HEAD(&group->cg_children); | 183 | INIT_LIST_HEAD(&group->cg_children); |
| 192 | } | 184 | } |
| 193 | 185 | ||
| 194 | |||
| 195 | /** | 186 | /** |
| 196 | * config_group_find_obj - search for item in group. | 187 | * config_group_find_item - search for item in group. |
| 197 | * @group: group we're looking in. | 188 | * @group: group we're looking in. |
| 198 | * @name: item's name. | 189 | * @name: item's name. |
| 199 | * | 190 | * |
| 200 | * Lock group via @group->cg_subsys, and iterate over @group->cg_list, | 191 | * Iterate over @group->cg_list, looking for a matching config_item. |
| 201 | * looking for a matching config_item. If matching item is found | 192 | * If matching item is found take a reference and return the item. |
| 202 | * take a reference and return the item. | 193 | * Caller must have locked group via @group->cg_subsys->su_mtx. |
| 203 | */ | 194 | */ |
| 204 | 195 | struct config_item *config_group_find_item(struct config_group *group, | |
| 205 | struct config_item * config_group_find_obj(struct config_group * group, const char * name) | 196 | const char *name) |
| 206 | { | 197 | { |
| 207 | struct list_head * entry; | 198 | struct list_head * entry; |
| 208 | struct config_item * ret = NULL; | 199 | struct config_item * ret = NULL; |
| 209 | 200 | ||
| 210 | /* XXX LOCKING! */ | ||
| 211 | list_for_each(entry,&group->cg_children) { | 201 | list_for_each(entry,&group->cg_children) { |
| 212 | struct config_item * item = to_item(entry); | 202 | struct config_item * item = to_item(entry); |
| 213 | if (config_item_name(item) && | 203 | if (config_item_name(item) && |
| 214 | !strcmp(config_item_name(item), name)) { | 204 | !strcmp(config_item_name(item), name)) { |
| 215 | ret = config_item_get(item); | 205 | ret = config_item_get(item); |
| 216 | break; | 206 | break; |
| 217 | } | 207 | } |
| @@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch | |||
| 219 | return ret; | 209 | return ret; |
| 220 | } | 210 | } |
| 221 | 211 | ||
| 222 | |||
| 223 | EXPORT_SYMBOL(config_item_init); | 212 | EXPORT_SYMBOL(config_item_init); |
| 224 | EXPORT_SYMBOL(config_group_init); | 213 | EXPORT_SYMBOL(config_group_init); |
| 225 | EXPORT_SYMBOL(config_item_get); | 214 | EXPORT_SYMBOL(config_item_get); |
| 226 | EXPORT_SYMBOL(config_item_put); | 215 | EXPORT_SYMBOL(config_item_put); |
| 227 | EXPORT_SYMBOL(config_group_find_obj); | 216 | EXPORT_SYMBOL(config_group_find_item); |
diff --git a/fs/dcache.c b/fs/dcache.c index 0e73aa0a0e8b..cb9d05056b54 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -883,6 +883,11 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) | |||
| 883 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 883 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; |
| 884 | } | 884 | } |
| 885 | 885 | ||
| 886 | static struct shrinker dcache_shrinker = { | ||
| 887 | .shrink = shrink_dcache_memory, | ||
| 888 | .seeks = DEFAULT_SEEKS, | ||
| 889 | }; | ||
| 890 | |||
| 886 | /** | 891 | /** |
| 887 | * d_alloc - allocate a dcache entry | 892 | * d_alloc - allocate a dcache entry |
| 888 | * @parent: parent of entry to allocate | 893 | * @parent: parent of entry to allocate |
| @@ -2115,7 +2120,7 @@ static void __init dcache_init(unsigned long mempages) | |||
| 2115 | dentry_cache = KMEM_CACHE(dentry, | 2120 | dentry_cache = KMEM_CACHE(dentry, |
| 2116 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); | 2121 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); |
| 2117 | 2122 | ||
| 2118 | set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); | 2123 | register_shrinker(&dcache_shrinker); |
| 2119 | 2124 | ||
| 2120 | /* Hash may have been set up in dcache_init_early */ | 2125 | /* Hash may have been set up in dcache_init_early */ |
| 2121 | if (!hashdist) | 2126 | if (!hashdist) |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 5069b2cb5a1f..2f8e3c81bc19 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | |||
| 133 | return len; | 133 | return len; |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | #define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ | ||
| 137 | .attr = { .ca_name = __stringify(_name), \ | ||
| 138 | .ca_mode = _mode, \ | ||
| 139 | .ca_owner = THIS_MODULE }, \ | ||
| 140 | .show = _read, \ | ||
| 141 | .store = _write, \ | ||
| 142 | } | ||
| 143 | |||
| 144 | #define CLUSTER_ATTR(name, check_zero) \ | 136 | #define CLUSTER_ATTR(name, check_zero) \ |
| 145 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ | 137 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ |
| 146 | { \ | 138 | { \ |
| @@ -615,7 +607,7 @@ static struct clusters clusters_root = { | |||
| 615 | int dlm_config_init(void) | 607 | int dlm_config_init(void) |
| 616 | { | 608 | { |
| 617 | config_group_init(&clusters_root.subsys.su_group); | 609 | config_group_init(&clusters_root.subsys.su_group); |
| 618 | init_MUTEX(&clusters_root.subsys.su_sem); | 610 | mutex_init(&clusters_root.subsys.su_mutex); |
| 619 | return configfs_register_subsystem(&clusters_root.subsys); | 611 | return configfs_register_subsystem(&clusters_root.subsys); |
| 620 | } | 612 | } |
| 621 | 613 | ||
| @@ -759,9 +751,9 @@ static struct space *get_space(char *name) | |||
| 759 | if (!space_list) | 751 | if (!space_list) |
| 760 | return NULL; | 752 | return NULL; |
| 761 | 753 | ||
| 762 | down(&space_list->cg_subsys->su_sem); | 754 | mutex_lock(&space_list->cg_subsys->su_mutex); |
| 763 | i = config_group_find_obj(space_list, name); | 755 | i = config_group_find_item(space_list, name); |
| 764 | up(&space_list->cg_subsys->su_sem); | 756 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
| 765 | 757 | ||
| 766 | return to_space(i); | 758 | return to_space(i); |
| 767 | } | 759 | } |
| @@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
| 780 | if (!comm_list) | 772 | if (!comm_list) |
| 781 | return NULL; | 773 | return NULL; |
| 782 | 774 | ||
| 783 | down(&clusters_root.subsys.su_sem); | 775 | mutex_lock(&clusters_root.subsys.su_mutex); |
| 784 | 776 | ||
| 785 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 777 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
| 786 | cm = to_comm(i); | 778 | cm = to_comm(i); |
| @@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
| 800 | break; | 792 | break; |
| 801 | } | 793 | } |
| 802 | } | 794 | } |
| 803 | up(&clusters_root.subsys.su_sem); | 795 | mutex_unlock(&clusters_root.subsys.su_mutex); |
| 804 | 796 | ||
| 805 | if (!found) | 797 | if (!found) |
| 806 | cm = NULL; | 798 | cm = NULL; |
diff --git a/fs/dquot.c b/fs/dquot.c index 8819d281500c..7e273151f589 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
| @@ -538,6 +538,11 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | |||
| 538 | return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; | 538 | return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; |
| 539 | } | 539 | } |
| 540 | 540 | ||
| 541 | static struct shrinker dqcache_shrinker = { | ||
| 542 | .shrink = shrink_dqcache_memory, | ||
| 543 | .seeks = DEFAULT_SEEKS, | ||
| 544 | }; | ||
| 545 | |||
| 541 | /* | 546 | /* |
| 542 | * Put reference to dquot | 547 | * Put reference to dquot |
| 543 | * NOTE: If you change this function please check whether dqput_blocks() works right... | 548 | * NOTE: If you change this function please check whether dqput_blocks() works right... |
| @@ -1870,7 +1875,7 @@ static int __init dquot_init(void) | |||
| 1870 | printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", | 1875 | printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", |
| 1871 | nr_hash, order, (PAGE_SIZE << order)); | 1876 | nr_hash, order, (PAGE_SIZE << order)); |
| 1872 | 1877 | ||
| 1873 | set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory); | 1878 | register_shrinker(&dqcache_shrinker); |
| 1874 | 1879 | ||
| 1875 | return 0; | 1880 | return 0; |
| 1876 | } | 1881 | } |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 03ea7696fe39..59375efcf39d 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
| @@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb) | |||
| 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
| 21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | 21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) |
| 22 | continue; | 22 | continue; |
| 23 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 23 | __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); |
| 24 | } | 24 | } |
| 25 | spin_unlock(&inode_lock); | 25 | spin_unlock(&inode_lock); |
| 26 | } | 26 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 83e94fedd4e9..e77a2ec71aa5 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
| @@ -282,7 +282,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 282 | struct dentry *lower_dentry; | 282 | struct dentry *lower_dentry; |
| 283 | struct vfsmount *lower_mnt; | 283 | struct vfsmount *lower_mnt; |
| 284 | char *encoded_name; | 284 | char *encoded_name; |
| 285 | unsigned int encoded_namelen; | 285 | int encoded_namelen; |
| 286 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | 286 | struct ecryptfs_crypt_stat *crypt_stat = NULL; |
| 287 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | 287 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; |
| 288 | char *page_virt = NULL; | 288 | char *page_virt = NULL; |
| @@ -473,7 +473,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 473 | struct dentry *lower_dir_dentry; | 473 | struct dentry *lower_dir_dentry; |
| 474 | umode_t mode; | 474 | umode_t mode; |
| 475 | char *encoded_symname; | 475 | char *encoded_symname; |
| 476 | unsigned int encoded_symlen; | 476 | int encoded_symlen; |
| 477 | struct ecryptfs_crypt_stat *crypt_stat = NULL; | 477 | struct ecryptfs_crypt_stat *crypt_stat = NULL; |
| 478 | 478 | ||
| 479 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 479 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index ed4a207fe22a..5276b19423c1 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
| @@ -75,6 +75,38 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
| 75 | return NULL; | 75 | return NULL; |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp) | ||
| 79 | { | ||
| 80 | __u32 *objp = vobjp; | ||
| 81 | unsigned long ino = objp[0]; | ||
| 82 | __u32 generation = objp[1]; | ||
| 83 | struct inode *inode; | ||
| 84 | struct dentry *result; | ||
| 85 | |||
| 86 | if (ino == 0) | ||
| 87 | return ERR_PTR(-ESTALE); | ||
| 88 | inode = iget(sb, ino); | ||
| 89 | if (inode == NULL) | ||
| 90 | return ERR_PTR(-ENOMEM); | ||
| 91 | |||
| 92 | if (is_bad_inode(inode) || | ||
| 93 | (generation && inode->i_generation != generation)) { | ||
| 94 | result = ERR_PTR(-ESTALE); | ||
| 95 | goto out_iput; | ||
| 96 | } | ||
| 97 | |||
| 98 | result = d_alloc_anon(inode); | ||
| 99 | if (!result) { | ||
| 100 | result = ERR_PTR(-ENOMEM); | ||
| 101 | goto out_iput; | ||
| 102 | } | ||
| 103 | return result; | ||
| 104 | |||
| 105 | out_iput: | ||
| 106 | iput(inode); | ||
| 107 | return result; | ||
| 108 | } | ||
| 109 | |||
| 78 | struct dentry *efs_get_parent(struct dentry *child) | 110 | struct dentry *efs_get_parent(struct dentry *child) |
| 79 | { | 111 | { |
| 80 | struct dentry *parent; | 112 | struct dentry *parent; |
diff --git a/fs/efs/super.c b/fs/efs/super.c index e0a6839e68ae..d360c81f3a72 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/efs_fs.h> | 11 | #include <linux/efs_fs.h> |
| 12 | #include <linux/efs_vh.h> | 12 | #include <linux/efs_vh.h> |
| 13 | #include <linux/efs_fs_sb.h> | 13 | #include <linux/efs_fs_sb.h> |
| 14 | #include <linux/exportfs.h> | ||
| 14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 15 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
| 16 | #include <linux/vfs.h> | 17 | #include <linux/vfs.h> |
| @@ -113,6 +114,7 @@ static const struct super_operations efs_superblock_operations = { | |||
| 113 | }; | 114 | }; |
| 114 | 115 | ||
| 115 | static struct export_operations efs_export_ops = { | 116 | static struct export_operations efs_export_ops = { |
| 117 | .get_dentry = efs_get_dentry, | ||
| 116 | .get_parent = efs_get_parent, | 118 | .get_parent = efs_get_parent, |
| 117 | }; | 119 | }; |
| 118 | 120 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index e98f6cd7200c..8adb32a9387a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
| @@ -1,15 +1,45 @@ | |||
| 1 | 1 | ||
| 2 | #include <linux/exportfs.h> | ||
| 2 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
| 3 | #include <linux/file.h> | 4 | #include <linux/file.h> |
| 4 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/mount.h> | ||
| 5 | #include <linux/namei.h> | 7 | #include <linux/namei.h> |
| 6 | 8 | ||
| 7 | struct export_operations export_op_default; | 9 | #define dprintk(fmt, args...) do{}while(0) |
| 8 | 10 | ||
| 9 | #define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun) | ||
| 10 | 11 | ||
| 11 | #define dprintk(fmt, args...) do{}while(0) | 12 | static int get_name(struct dentry *dentry, char *name, |
| 13 | struct dentry *child); | ||
| 14 | |||
| 15 | |||
| 16 | static struct dentry *exportfs_get_dentry(struct super_block *sb, void *obj) | ||
| 17 | { | ||
| 18 | struct dentry *result = ERR_PTR(-ESTALE); | ||
| 19 | |||
| 20 | if (sb->s_export_op->get_dentry) { | ||
| 21 | result = sb->s_export_op->get_dentry(sb, obj); | ||
| 22 | if (!result) | ||
| 23 | result = ERR_PTR(-ESTALE); | ||
| 24 | } | ||
| 25 | |||
| 26 | return result; | ||
| 27 | } | ||
| 28 | |||
| 29 | static int exportfs_get_name(struct dentry *dir, char *name, | ||
| 30 | struct dentry *child) | ||
| 31 | { | ||
| 32 | struct export_operations *nop = dir->d_sb->s_export_op; | ||
| 12 | 33 | ||
| 34 | if (nop->get_name) | ||
| 35 | return nop->get_name(dir, name, child); | ||
| 36 | else | ||
| 37 | return get_name(dir, name, child); | ||
| 38 | } | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Check if the dentry or any of it's aliases is acceptable. | ||
| 42 | */ | ||
| 13 | static struct dentry * | 43 | static struct dentry * |
| 14 | find_acceptable_alias(struct dentry *result, | 44 | find_acceptable_alias(struct dentry *result, |
| 15 | int (*acceptable)(void *context, struct dentry *dentry), | 45 | int (*acceptable)(void *context, struct dentry *dentry), |
| @@ -17,6 +47,9 @@ find_acceptable_alias(struct dentry *result, | |||
| 17 | { | 47 | { |
| 18 | struct dentry *dentry, *toput = NULL; | 48 | struct dentry *dentry, *toput = NULL; |
| 19 | 49 | ||
| 50 | if (acceptable(context, result)) | ||
| 51 | return result; | ||
| 52 | |||
| 20 | spin_lock(&dcache_lock); | 53 | spin_lock(&dcache_lock); |
| 21 | list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { | 54 | list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { |
| 22 | dget_locked(dentry); | 55 | dget_locked(dentry); |
| @@ -37,130 +70,50 @@ find_acceptable_alias(struct dentry *result, | |||
| 37 | return NULL; | 70 | return NULL; |
| 38 | } | 71 | } |
| 39 | 72 | ||
| 40 | /** | 73 | /* |
| 41 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | 74 | * Find root of a disconnected subtree and return a reference to it. |
| 42 | * @sb: The &super_block identifying the filesystem | ||
| 43 | * @obj: An opaque identifier of the object to be found - passed to | ||
| 44 | * get_inode | ||
| 45 | * @parent: An optional opqaue identifier of the parent of the object. | ||
| 46 | * @acceptable: A function used to test possible &dentries to see if they are | ||
| 47 | * acceptable | ||
| 48 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
| 49 | * judge. | ||
| 50 | * | ||
| 51 | * find_exported_dentry is the central helper routine to enable file systems | ||
| 52 | * to provide the decode_fh() export_operation. It's main task is to take | ||
| 53 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
| 54 | * splice this into the dcache in the correct place. | ||
| 55 | * | ||
| 56 | * The decode_fh() operation provided by the filesystem should call | ||
| 57 | * find_exported_dentry() with the same parameters that it received except | ||
| 58 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
| 59 | * for the object and optionally its parent are passed. The default decode_fh | ||
| 60 | * routine passes one pointer to the start of the filehandle fragment, and | ||
| 61 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
| 62 | * take this approach, though the offset to the parent identifier may well be | ||
| 63 | * different. | ||
| 64 | * | ||
| 65 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
| 66 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
| 67 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
| 68 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
| 69 | * appropriate place. | ||
| 70 | */ | 75 | */ |
| 71 | 76 | static struct dentry * | |
| 72 | struct dentry * | 77 | find_disconnected_root(struct dentry *dentry) |
| 73 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
| 74 | int (*acceptable)(void *context, struct dentry *de), | ||
| 75 | void *context) | ||
| 76 | { | 78 | { |
| 77 | struct dentry *result = NULL; | 79 | dget(dentry); |
| 78 | struct dentry *target_dir; | 80 | spin_lock(&dentry->d_lock); |
| 79 | int err; | 81 | while (!IS_ROOT(dentry) && |
| 80 | struct export_operations *nops = sb->s_export_op; | 82 | (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { |
| 81 | struct dentry *alias; | 83 | struct dentry *parent = dentry->d_parent; |
| 82 | int noprogress; | 84 | dget(parent); |
| 83 | char nbuf[NAME_MAX+1]; | 85 | spin_unlock(&dentry->d_lock); |
| 84 | 86 | dput(dentry); | |
| 85 | /* | 87 | dentry = parent; |
| 86 | * Attempt to find the inode. | 88 | spin_lock(&dentry->d_lock); |
| 87 | */ | ||
| 88 | result = CALL(sb->s_export_op,get_dentry)(sb,obj); | ||
| 89 | err = -ESTALE; | ||
| 90 | if (result == NULL) | ||
| 91 | goto err_out; | ||
| 92 | if (IS_ERR(result)) { | ||
| 93 | err = PTR_ERR(result); | ||
| 94 | goto err_out; | ||
| 95 | } | 89 | } |
| 96 | if (S_ISDIR(result->d_inode->i_mode) && | 90 | spin_unlock(&dentry->d_lock); |
| 97 | (result->d_flags & DCACHE_DISCONNECTED)) { | 91 | return dentry; |
| 98 | /* it is an unconnected directory, we must connect it */ | 92 | } |
| 99 | ; | ||
| 100 | } else { | ||
| 101 | if (acceptable(context, result)) | ||
| 102 | return result; | ||
| 103 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
| 104 | err = -EACCES; | ||
| 105 | goto err_result; | ||
| 106 | } | ||
| 107 | 93 | ||
| 108 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 109 | if (alias) | ||
| 110 | return alias; | ||
| 111 | } | ||
| 112 | |||
| 113 | /* It's a directory, or we are required to confirm the file's | ||
| 114 | * location in the tree based on the parent information | ||
| 115 | */ | ||
| 116 | dprintk("find_exported_dentry: need to look harder for %s/%d\n",sb->s_id,*(int*)obj); | ||
| 117 | if (S_ISDIR(result->d_inode->i_mode)) | ||
| 118 | target_dir = dget(result); | ||
| 119 | else { | ||
| 120 | if (parent == NULL) | ||
| 121 | goto err_result; | ||
| 122 | 94 | ||
| 123 | target_dir = CALL(sb->s_export_op,get_dentry)(sb,parent); | 95 | /* |
| 124 | if (IS_ERR(target_dir)) | 96 | * Make sure target_dir is fully connected to the dentry tree. |
| 125 | err = PTR_ERR(target_dir); | 97 | * |
| 126 | if (target_dir == NULL || IS_ERR(target_dir)) | 98 | * It may already be, as the flag isn't always updated when connection happens. |
| 127 | goto err_result; | 99 | */ |
| 128 | } | 100 | static int |
| 129 | /* | 101 | reconnect_path(struct super_block *sb, struct dentry *target_dir) |
| 130 | * Now we need to make sure that target_dir is properly connected. | 102 | { |
| 131 | * It may already be, as the flag isn't always updated when connection | 103 | char nbuf[NAME_MAX+1]; |
| 132 | * happens. | 104 | int noprogress = 0; |
| 133 | * So, we walk up parent links until we find a connected directory, | 105 | int err = -ESTALE; |
| 134 | * or we run out of directories. Then we find the parent, find | ||
| 135 | * the name of the child in that parent, and do a lookup. | ||
| 136 | * This should connect the child into the parent | ||
| 137 | * We then repeat. | ||
| 138 | */ | ||
| 139 | 106 | ||
| 140 | /* it is possible that a confused file system might not let us complete | 107 | /* |
| 108 | * It is possible that a confused file system might not let us complete | ||
| 141 | * the path to the root. For example, if get_parent returns a directory | 109 | * the path to the root. For example, if get_parent returns a directory |
| 142 | * in which we cannot find a name for the child. While this implies a | 110 | * in which we cannot find a name for the child. While this implies a |
| 143 | * very sick filesystem we don't want it to cause knfsd to spin. Hence | 111 | * very sick filesystem we don't want it to cause knfsd to spin. Hence |
| 144 | * the noprogress counter. If we go through the loop 10 times (2 is | 112 | * the noprogress counter. If we go through the loop 10 times (2 is |
| 145 | * probably enough) without getting anywhere, we just give up | 113 | * probably enough) without getting anywhere, we just give up |
| 146 | */ | 114 | */ |
| 147 | noprogress= 0; | ||
| 148 | while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { | 115 | while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { |
| 149 | struct dentry *pd = target_dir; | 116 | struct dentry *pd = find_disconnected_root(target_dir); |
| 150 | |||
| 151 | dget(pd); | ||
| 152 | spin_lock(&pd->d_lock); | ||
| 153 | while (!IS_ROOT(pd) && | ||
| 154 | (pd->d_parent->d_flags&DCACHE_DISCONNECTED)) { | ||
| 155 | struct dentry *parent = pd->d_parent; | ||
| 156 | |||
| 157 | dget(parent); | ||
| 158 | spin_unlock(&pd->d_lock); | ||
| 159 | dput(pd); | ||
| 160 | pd = parent; | ||
| 161 | spin_lock(&pd->d_lock); | ||
| 162 | } | ||
| 163 | spin_unlock(&pd->d_lock); | ||
| 164 | 117 | ||
| 165 | if (!IS_ROOT(pd)) { | 118 | if (!IS_ROOT(pd)) { |
| 166 | /* must have found a connected parent - great */ | 119 | /* must have found a connected parent - great */ |
| @@ -175,29 +128,40 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 175 | spin_unlock(&pd->d_lock); | 128 | spin_unlock(&pd->d_lock); |
| 176 | noprogress = 0; | 129 | noprogress = 0; |
| 177 | } else { | 130 | } else { |
| 178 | /* we have hit the top of a disconnected path. Try | 131 | /* |
| 179 | * to find parent and connect | 132 | * We have hit the top of a disconnected path, try to |
| 180 | * note: racing with some other process renaming a | 133 | * find parent and connect. |
| 181 | * directory isn't much of a problem here. If someone | 134 | * |
| 182 | * renames the directory, it will end up properly | 135 | * Racing with some other process renaming a directory |
| 183 | * connected, which is what we want | 136 | * isn't much of a problem here. If someone renames |
| 137 | * the directory, it will end up properly connected, | ||
| 138 | * which is what we want | ||
| 139 | * | ||
| 140 | * Getting the parent can't be supported generically, | ||
| 141 | * the locking is too icky. | ||
| 142 | * | ||
| 143 | * Instead we just return EACCES. If server reboots | ||
| 144 | * or inodes get flushed, you lose | ||
| 184 | */ | 145 | */ |
| 185 | struct dentry *ppd; | 146 | struct dentry *ppd = ERR_PTR(-EACCES); |
| 186 | struct dentry *npd; | 147 | struct dentry *npd; |
| 187 | 148 | ||
| 188 | mutex_lock(&pd->d_inode->i_mutex); | 149 | mutex_lock(&pd->d_inode->i_mutex); |
| 189 | ppd = CALL(nops,get_parent)(pd); | 150 | if (sb->s_export_op->get_parent) |
| 151 | ppd = sb->s_export_op->get_parent(pd); | ||
| 190 | mutex_unlock(&pd->d_inode->i_mutex); | 152 | mutex_unlock(&pd->d_inode->i_mutex); |
| 191 | 153 | ||
| 192 | if (IS_ERR(ppd)) { | 154 | if (IS_ERR(ppd)) { |
| 193 | err = PTR_ERR(ppd); | 155 | err = PTR_ERR(ppd); |
| 194 | dprintk("find_exported_dentry: get_parent of %ld failed, err %d\n", | 156 | dprintk("%s: get_parent of %ld failed, err %d\n", |
| 195 | pd->d_inode->i_ino, err); | 157 | __FUNCTION__, pd->d_inode->i_ino, err); |
| 196 | dput(pd); | 158 | dput(pd); |
| 197 | break; | 159 | break; |
| 198 | } | 160 | } |
| 199 | dprintk("find_exported_dentry: find name of %lu in %lu\n", pd->d_inode->i_ino, ppd->d_inode->i_ino); | 161 | |
| 200 | err = CALL(nops,get_name)(ppd, nbuf, pd); | 162 | dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, |
| 163 | pd->d_inode->i_ino, ppd->d_inode->i_ino); | ||
| 164 | err = exportfs_get_name(ppd, nbuf, pd); | ||
| 201 | if (err) { | 165 | if (err) { |
| 202 | dput(ppd); | 166 | dput(ppd); |
| 203 | dput(pd); | 167 | dput(pd); |
| @@ -208,13 +172,14 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 208 | continue; | 172 | continue; |
| 209 | break; | 173 | break; |
| 210 | } | 174 | } |
| 211 | dprintk("find_exported_dentry: found name: %s\n", nbuf); | 175 | dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); |
| 212 | mutex_lock(&ppd->d_inode->i_mutex); | 176 | mutex_lock(&ppd->d_inode->i_mutex); |
| 213 | npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); | 177 | npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); |
| 214 | mutex_unlock(&ppd->d_inode->i_mutex); | 178 | mutex_unlock(&ppd->d_inode->i_mutex); |
| 215 | if (IS_ERR(npd)) { | 179 | if (IS_ERR(npd)) { |
| 216 | err = PTR_ERR(npd); | 180 | err = PTR_ERR(npd); |
| 217 | dprintk("find_exported_dentry: lookup failed: %d\n", err); | 181 | dprintk("%s: lookup failed: %d\n", |
| 182 | __FUNCTION__, err); | ||
| 218 | dput(ppd); | 183 | dput(ppd); |
| 219 | dput(pd); | 184 | dput(pd); |
| 220 | break; | 185 | break; |
| @@ -227,7 +192,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 227 | if (npd == pd) | 192 | if (npd == pd) |
| 228 | noprogress = 0; | 193 | noprogress = 0; |
| 229 | else | 194 | else |
| 230 | printk("find_exported_dentry: npd != pd\n"); | 195 | printk("%s: npd != pd\n", __FUNCTION__); |
| 231 | dput(npd); | 196 | dput(npd); |
| 232 | dput(ppd); | 197 | dput(ppd); |
| 233 | if (IS_ROOT(pd)) { | 198 | if (IS_ROOT(pd)) { |
| @@ -243,15 +208,101 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 243 | /* something went wrong - oh-well */ | 208 | /* something went wrong - oh-well */ |
| 244 | if (!err) | 209 | if (!err) |
| 245 | err = -ESTALE; | 210 | err = -ESTALE; |
| 246 | goto err_target; | 211 | return err; |
| 247 | } | 212 | } |
| 248 | /* if we weren't after a directory, have one more step to go */ | 213 | |
| 249 | if (result != target_dir) { | 214 | return 0; |
| 250 | struct dentry *nresult; | 215 | } |
| 251 | err = CALL(nops,get_name)(target_dir, nbuf, result); | 216 | |
| 217 | /** | ||
| 218 | * find_exported_dentry - helper routine to implement export_operations->decode_fh | ||
| 219 | * @sb: The &super_block identifying the filesystem | ||
| 220 | * @obj: An opaque identifier of the object to be found - passed to | ||
| 221 | * get_inode | ||
| 222 | * @parent: An optional opqaue identifier of the parent of the object. | ||
| 223 | * @acceptable: A function used to test possible &dentries to see if they are | ||
| 224 | * acceptable | ||
| 225 | * @context: A parameter to @acceptable so that it knows on what basis to | ||
| 226 | * judge. | ||
| 227 | * | ||
| 228 | * find_exported_dentry is the central helper routine to enable file systems | ||
| 229 | * to provide the decode_fh() export_operation. It's main task is to take | ||
| 230 | * an &inode, find or create an appropriate &dentry structure, and possibly | ||
| 231 | * splice this into the dcache in the correct place. | ||
| 232 | * | ||
| 233 | * The decode_fh() operation provided by the filesystem should call | ||
| 234 | * find_exported_dentry() with the same parameters that it received except | ||
| 235 | * that instead of the file handle fragment, pointers to opaque identifiers | ||
| 236 | * for the object and optionally its parent are passed. The default decode_fh | ||
| 237 | * routine passes one pointer to the start of the filehandle fragment, and | ||
| 238 | * one 8 bytes into the fragment. It is expected that most filesystems will | ||
| 239 | * take this approach, though the offset to the parent identifier may well be | ||
| 240 | * different. | ||
| 241 | * | ||
| 242 | * find_exported_dentry() will call get_dentry to get an dentry pointer from | ||
| 243 | * the file system. If any &dentry in the d_alias list is acceptable, it will | ||
| 244 | * be returned. Otherwise find_exported_dentry() will attempt to splice a new | ||
| 245 | * &dentry into the dcache using get_name() and get_parent() to find the | ||
| 246 | * appropriate place. | ||
| 247 | */ | ||
| 248 | |||
| 249 | struct dentry * | ||
| 250 | find_exported_dentry(struct super_block *sb, void *obj, void *parent, | ||
| 251 | int (*acceptable)(void *context, struct dentry *de), | ||
| 252 | void *context) | ||
| 253 | { | ||
| 254 | struct dentry *result, *alias; | ||
| 255 | int err = -ESTALE; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Attempt to find the inode. | ||
| 259 | */ | ||
| 260 | result = exportfs_get_dentry(sb, obj); | ||
| 261 | if (IS_ERR(result)) | ||
| 262 | return result; | ||
| 263 | |||
| 264 | if (S_ISDIR(result->d_inode->i_mode)) { | ||
| 265 | if (!(result->d_flags & DCACHE_DISCONNECTED)) { | ||
| 266 | if (acceptable(context, result)) | ||
| 267 | return result; | ||
| 268 | err = -EACCES; | ||
| 269 | goto err_result; | ||
| 270 | } | ||
| 271 | |||
| 272 | err = reconnect_path(sb, result); | ||
| 273 | if (err) | ||
| 274 | goto err_result; | ||
| 275 | } else { | ||
| 276 | struct dentry *target_dir, *nresult; | ||
| 277 | char nbuf[NAME_MAX+1]; | ||
| 278 | |||
| 279 | alias = find_acceptable_alias(result, acceptable, context); | ||
| 280 | if (alias) | ||
| 281 | return alias; | ||
| 282 | |||
| 283 | if (parent == NULL) | ||
| 284 | goto err_result; | ||
| 285 | |||
| 286 | target_dir = exportfs_get_dentry(sb,parent); | ||
| 287 | if (IS_ERR(target_dir)) { | ||
| 288 | err = PTR_ERR(target_dir); | ||
| 289 | goto err_result; | ||
| 290 | } | ||
| 291 | |||
| 292 | err = reconnect_path(sb, target_dir); | ||
| 293 | if (err) { | ||
| 294 | dput(target_dir); | ||
| 295 | goto err_result; | ||
| 296 | } | ||
| 297 | |||
| 298 | /* | ||
| 299 | * As we weren't after a directory, have one more step to go. | ||
| 300 | */ | ||
| 301 | err = exportfs_get_name(target_dir, nbuf, result); | ||
| 252 | if (!err) { | 302 | if (!err) { |
| 253 | mutex_lock(&target_dir->d_inode->i_mutex); | 303 | mutex_lock(&target_dir->d_inode->i_mutex); |
| 254 | nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); | 304 | nresult = lookup_one_len(nbuf, target_dir, |
| 305 | strlen(nbuf)); | ||
| 255 | mutex_unlock(&target_dir->d_inode->i_mutex); | 306 | mutex_unlock(&target_dir->d_inode->i_mutex); |
| 256 | if (!IS_ERR(nresult)) { | 307 | if (!IS_ERR(nresult)) { |
| 257 | if (nresult->d_inode) { | 308 | if (nresult->d_inode) { |
| @@ -261,11 +312,8 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 261 | dput(nresult); | 312 | dput(nresult); |
| 262 | } | 313 | } |
| 263 | } | 314 | } |
| 315 | dput(target_dir); | ||
| 264 | } | 316 | } |
| 265 | dput(target_dir); | ||
| 266 | /* now result is properly connected, it is our best bet */ | ||
| 267 | if (acceptable(context, result)) | ||
| 268 | return result; | ||
| 269 | 317 | ||
| 270 | alias = find_acceptable_alias(result, acceptable, context); | 318 | alias = find_acceptable_alias(result, acceptable, context); |
| 271 | if (alias) | 319 | if (alias) |
| @@ -275,32 +323,16 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, | |||
| 275 | dput(result); | 323 | dput(result); |
| 276 | /* It might be justifiable to return ESTALE here, | 324 | /* It might be justifiable to return ESTALE here, |
| 277 | * but the filehandle at-least looks reasonable good | 325 | * but the filehandle at-least looks reasonable good |
| 278 | * and it just be a permission problem, so returning | 326 | * and it may just be a permission problem, so returning |
| 279 | * -EACCESS is safer | 327 | * -EACCESS is safer |
| 280 | */ | 328 | */ |
| 281 | return ERR_PTR(-EACCES); | 329 | return ERR_PTR(-EACCES); |
| 282 | 330 | ||
| 283 | err_target: | ||
| 284 | dput(target_dir); | ||
| 285 | err_result: | 331 | err_result: |
| 286 | dput(result); | 332 | dput(result); |
| 287 | err_out: | ||
| 288 | return ERR_PTR(err); | 333 | return ERR_PTR(err); |
| 289 | } | 334 | } |
| 290 | 335 | ||
| 291 | |||
| 292 | |||
| 293 | static struct dentry *get_parent(struct dentry *child) | ||
| 294 | { | ||
| 295 | /* get_parent cannot be supported generically, the locking | ||
| 296 | * is too icky. | ||
| 297 | * instead, we just return EACCES. If server reboots or inodes | ||
| 298 | * get flushed, you lose | ||
| 299 | */ | ||
| 300 | return ERR_PTR(-EACCES); | ||
| 301 | } | ||
| 302 | |||
| 303 | |||
| 304 | struct getdents_callback { | 336 | struct getdents_callback { |
| 305 | char *name; /* name that was found. It already points to a | 337 | char *name; /* name that was found. It already points to a |
| 306 | buffer NAME_MAX+1 is size */ | 338 | buffer NAME_MAX+1 is size */ |
| @@ -390,61 +422,6 @@ out: | |||
| 390 | return error; | 422 | return error; |
| 391 | } | 423 | } |
| 392 | 424 | ||
| 393 | |||
| 394 | static struct dentry *export_iget(struct super_block *sb, unsigned long ino, __u32 generation) | ||
| 395 | { | ||
| 396 | |||
| 397 | /* iget isn't really right if the inode is currently unallocated!! | ||
| 398 | * This should really all be done inside each filesystem | ||
| 399 | * | ||
| 400 | * ext2fs' read_inode has been strengthed to return a bad_inode if | ||
| 401 | * the inode had been deleted. | ||
| 402 | * | ||
| 403 | * Currently we don't know the generation for parent directory, so | ||
| 404 | * a generation of 0 means "accept any" | ||
| 405 | */ | ||
| 406 | struct inode *inode; | ||
| 407 | struct dentry *result; | ||
| 408 | if (ino == 0) | ||
| 409 | return ERR_PTR(-ESTALE); | ||
| 410 | inode = iget(sb, ino); | ||
| 411 | if (inode == NULL) | ||
| 412 | return ERR_PTR(-ENOMEM); | ||
| 413 | if (is_bad_inode(inode) | ||
| 414 | || (generation && inode->i_generation != generation) | ||
| 415 | ) { | ||
| 416 | /* we didn't find the right inode.. */ | ||
| 417 | dprintk("fh_verify: Inode %lu, Bad count: %d %d or version %u %u\n", | ||
| 418 | inode->i_ino, | ||
| 419 | inode->i_nlink, atomic_read(&inode->i_count), | ||
| 420 | inode->i_generation, | ||
| 421 | generation); | ||
| 422 | |||
| 423 | iput(inode); | ||
| 424 | return ERR_PTR(-ESTALE); | ||
| 425 | } | ||
| 426 | /* now to find a dentry. | ||
| 427 | * If possible, get a well-connected one | ||
| 428 | */ | ||
| 429 | result = d_alloc_anon(inode); | ||
| 430 | if (!result) { | ||
| 431 | iput(inode); | ||
| 432 | return ERR_PTR(-ENOMEM); | ||
| 433 | } | ||
| 434 | return result; | ||
| 435 | } | ||
| 436 | |||
| 437 | |||
| 438 | static struct dentry *get_object(struct super_block *sb, void *vobjp) | ||
| 439 | { | ||
| 440 | __u32 *objp = vobjp; | ||
| 441 | unsigned long ino = objp[0]; | ||
| 442 | __u32 generation = objp[1]; | ||
| 443 | |||
| 444 | return export_iget(sb, ino, generation); | ||
| 445 | } | ||
| 446 | |||
| 447 | |||
| 448 | /** | 425 | /** |
| 449 | * export_encode_fh - default export_operations->encode_fh function | 426 | * export_encode_fh - default export_operations->encode_fh function |
| 450 | * @dentry: the dentry to encode | 427 | * @dentry: the dentry to encode |
| @@ -517,16 +494,40 @@ static struct dentry *export_decode_fh(struct super_block *sb, __u32 *fh, int fh | |||
| 517 | acceptable, context); | 494 | acceptable, context); |
| 518 | } | 495 | } |
| 519 | 496 | ||
| 520 | struct export_operations export_op_default = { | 497 | int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, |
| 521 | .decode_fh = export_decode_fh, | 498 | int connectable) |
| 522 | .encode_fh = export_encode_fh, | 499 | { |
| 500 | struct export_operations *nop = dentry->d_sb->s_export_op; | ||
| 501 | int error; | ||
| 502 | |||
| 503 | if (nop->encode_fh) | ||
| 504 | error = nop->encode_fh(dentry, fh, max_len, connectable); | ||
| 505 | else | ||
| 506 | error = export_encode_fh(dentry, fh, max_len, connectable); | ||
| 523 | 507 | ||
| 524 | .get_name = get_name, | 508 | return error; |
| 525 | .get_parent = get_parent, | 509 | } |
| 526 | .get_dentry = get_object, | 510 | EXPORT_SYMBOL_GPL(exportfs_encode_fh); |
| 527 | }; | 511 | |
| 512 | struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, int fh_len, | ||
| 513 | int fileid_type, int (*acceptable)(void *, struct dentry *), | ||
| 514 | void *context) | ||
| 515 | { | ||
| 516 | struct export_operations *nop = mnt->mnt_sb->s_export_op; | ||
| 517 | struct dentry *result; | ||
| 518 | |||
| 519 | if (nop->decode_fh) { | ||
| 520 | result = nop->decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | ||
| 521 | acceptable, context); | ||
| 522 | } else { | ||
| 523 | result = export_decode_fh(mnt->mnt_sb, fh, fh_len, fileid_type, | ||
| 524 | acceptable, context); | ||
| 525 | } | ||
| 526 | |||
| 527 | return result; | ||
| 528 | } | ||
| 529 | EXPORT_SYMBOL_GPL(exportfs_decode_fh); | ||
| 528 | 530 | ||
| 529 | EXPORT_SYMBOL(export_op_default); | ||
| 530 | EXPORT_SYMBOL(find_exported_dentry); | 531 | EXPORT_SYMBOL(find_exported_dentry); |
| 531 | 532 | ||
| 532 | MODULE_LICENSE("GPL"); | 533 | MODULE_LICENSE("GPL"); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 04afeecaaef3..ab7961260c49 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
| @@ -24,9 +24,9 @@ | |||
| 24 | #include "acl.h" | 24 | #include "acl.h" |
| 25 | 25 | ||
| 26 | /* | 26 | /* |
| 27 | * Called when an inode is released. Note that this is different | 27 | * Called when filp is released. This happens when all file descriptors |
| 28 | * from ext2_open_file: open gets called at every open, but release | 28 | * for a single struct file are closed. Note that different open() calls |
| 29 | * gets called only when /all/ the files are closed. | 29 | * for the same file yield different struct file structures. |
| 30 | */ | 30 | */ |
| 31 | static int ext2_release_file (struct inode * inode, struct file * filp) | 31 | static int ext2_release_file (struct inode * inode, struct file * filp) |
| 32 | { | 32 | { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5de5061eb331..3eefa97fe204 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/parser.h> | 25 | #include <linux/parser.h> |
| 26 | #include <linux/random.h> | 26 | #include <linux/random.h> |
| 27 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
| 28 | #include <linux/exportfs.h> | ||
| 28 | #include <linux/smp_lock.h> | 29 | #include <linux/smp_lock.h> |
| 29 | #include <linux/vfs.h> | 30 | #include <linux/vfs.h> |
| 30 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
| @@ -1099,15 +1100,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 1099 | struct super_block *sb = dentry->d_sb; | 1100 | struct super_block *sb = dentry->d_sb; |
| 1100 | struct ext2_sb_info *sbi = EXT2_SB(sb); | 1101 | struct ext2_sb_info *sbi = EXT2_SB(sb); |
| 1101 | struct ext2_super_block *es = sbi->s_es; | 1102 | struct ext2_super_block *es = sbi->s_es; |
| 1102 | unsigned long overhead; | ||
| 1103 | int i; | ||
| 1104 | u64 fsid; | 1103 | u64 fsid; |
| 1105 | 1104 | ||
| 1106 | if (test_opt (sb, MINIX_DF)) | 1105 | if (test_opt (sb, MINIX_DF)) |
| 1107 | overhead = 0; | 1106 | sbi->s_overhead_last = 0; |
| 1108 | else { | 1107 | else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
| 1108 | unsigned long i, overhead = 0; | ||
| 1109 | smp_rmb(); | ||
| 1110 | |||
| 1109 | /* | 1111 | /* |
| 1110 | * Compute the overhead (FS structures) | 1112 | * Compute the overhead (FS structures). This is constant |
| 1113 | * for a given filesystem unless the number of block groups | ||
| 1114 | * changes so we cache the previous value until it does. | ||
| 1111 | */ | 1115 | */ |
| 1112 | 1116 | ||
| 1113 | /* | 1117 | /* |
| @@ -1131,17 +1135,22 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 1131 | */ | 1135 | */ |
| 1132 | overhead += (sbi->s_groups_count * | 1136 | overhead += (sbi->s_groups_count * |
| 1133 | (2 + sbi->s_itb_per_group)); | 1137 | (2 + sbi->s_itb_per_group)); |
| 1138 | sbi->s_overhead_last = overhead; | ||
| 1139 | smp_wmb(); | ||
| 1140 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
| 1134 | } | 1141 | } |
| 1135 | 1142 | ||
| 1136 | buf->f_type = EXT2_SUPER_MAGIC; | 1143 | buf->f_type = EXT2_SUPER_MAGIC; |
| 1137 | buf->f_bsize = sb->s_blocksize; | 1144 | buf->f_bsize = sb->s_blocksize; |
| 1138 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; | 1145 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; |
| 1139 | buf->f_bfree = ext2_count_free_blocks(sb); | 1146 | buf->f_bfree = ext2_count_free_blocks(sb); |
| 1147 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
| 1140 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); | 1148 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); |
| 1141 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) | 1149 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) |
| 1142 | buf->f_bavail = 0; | 1150 | buf->f_bavail = 0; |
| 1143 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 1151 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
| 1144 | buf->f_ffree = ext2_count_free_inodes(sb); | 1152 | buf->f_ffree = ext2_count_free_inodes(sb); |
| 1153 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
| 1145 | buf->f_namelen = EXT2_NAME_LEN; | 1154 | buf->f_namelen = EXT2_NAME_LEN; |
| 1146 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 1155 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
| 1147 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 1156 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2a85ddee4740..de4e3161e479 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -3195,7 +3195,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val) | |||
| 3195 | */ | 3195 | */ |
| 3196 | 3196 | ||
| 3197 | journal = EXT3_JOURNAL(inode); | 3197 | journal = EXT3_JOURNAL(inode); |
| 3198 | if (is_journal_aborted(journal) || IS_RDONLY(inode)) | 3198 | if (is_journal_aborted(journal)) |
| 3199 | return -EROFS; | 3199 | return -EROFS; |
| 3200 | 3200 | ||
| 3201 | journal_lock_updates(journal); | 3201 | journal_lock_updates(journal); |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 9bb046df827a..1586807b8177 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
| @@ -1019,6 +1019,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
| 1019 | 1019 | ||
| 1020 | if (!inode) | 1020 | if (!inode) |
| 1021 | return ERR_PTR(-EACCES); | 1021 | return ERR_PTR(-EACCES); |
| 1022 | |||
| 1023 | if (is_bad_inode(inode)) { | ||
| 1024 | iput(inode); | ||
| 1025 | return ERR_PTR(-ENOENT); | ||
| 1026 | } | ||
| 1022 | } | 1027 | } |
| 1023 | return d_splice_alias(inode, dentry); | 1028 | return d_splice_alias(inode, dentry); |
| 1024 | } | 1029 | } |
| @@ -1054,6 +1059,11 @@ struct dentry *ext3_get_parent(struct dentry *child) | |||
| 1054 | if (!inode) | 1059 | if (!inode) |
| 1055 | return ERR_PTR(-EACCES); | 1060 | return ERR_PTR(-EACCES); |
| 1056 | 1061 | ||
| 1062 | if (is_bad_inode(inode)) { | ||
| 1063 | iput(inode); | ||
| 1064 | return ERR_PTR(-ENOENT); | ||
| 1065 | } | ||
| 1066 | |||
| 1057 | parent = d_alloc_anon(inode); | 1067 | parent = d_alloc_anon(inode); |
| 1058 | if (!parent) { | 1068 | if (!parent) { |
| 1059 | iput(inode); | 1069 | iput(inode); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6e3062913a92..4f84dc86628a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -29,12 +29,14 @@ | |||
| 29 | #include <linux/parser.h> | 29 | #include <linux/parser.h> |
| 30 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
| 31 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
| 32 | #include <linux/exportfs.h> | ||
| 32 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
| 33 | #include <linux/random.h> | 34 | #include <linux/random.h> |
| 34 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
| 35 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
| 36 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
| 37 | #include <linux/seq_file.h> | 38 | #include <linux/seq_file.h> |
| 39 | #include <linux/log2.h> | ||
| 38 | 40 | ||
| 39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| 40 | 42 | ||
| @@ -459,6 +461,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) | |||
| 459 | 461 | ||
| 460 | static void ext3_destroy_inode(struct inode *inode) | 462 | static void ext3_destroy_inode(struct inode *inode) |
| 461 | { | 463 | { |
| 464 | if (!list_empty(&(EXT3_I(inode)->i_orphan))) { | ||
| 465 | printk("EXT3 Inode %p: orphan list check failed!\n", | ||
| 466 | EXT3_I(inode)); | ||
| 467 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | ||
| 468 | EXT3_I(inode), sizeof(struct ext3_inode_info), | ||
| 469 | false); | ||
| 470 | dump_stack(); | ||
| 471 | } | ||
| 462 | kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); | 472 | kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); |
| 463 | } | 473 | } |
| 464 | 474 | ||
| @@ -1566,7 +1576,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1566 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); | 1576 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); |
| 1567 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); | 1577 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); |
| 1568 | if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || | 1578 | if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || |
| 1569 | (sbi->s_inode_size & (sbi->s_inode_size - 1)) || | 1579 | (!is_power_of_2(sbi->s_inode_size)) || |
| 1570 | (sbi->s_inode_size > blocksize)) { | 1580 | (sbi->s_inode_size > blocksize)) { |
| 1571 | printk (KERN_ERR | 1581 | printk (KERN_ERR |
| 1572 | "EXT3-fs: unsupported inode size: %d\n", | 1582 | "EXT3-fs: unsupported inode size: %d\n", |
| @@ -2075,6 +2085,7 @@ static int ext3_create_journal(struct super_block * sb, | |||
| 2075 | unsigned int journal_inum) | 2085 | unsigned int journal_inum) |
| 2076 | { | 2086 | { |
| 2077 | journal_t *journal; | 2087 | journal_t *journal; |
| 2088 | int err; | ||
| 2078 | 2089 | ||
| 2079 | if (sb->s_flags & MS_RDONLY) { | 2090 | if (sb->s_flags & MS_RDONLY) { |
| 2080 | printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " | 2091 | printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " |
| @@ -2082,13 +2093,15 @@ static int ext3_create_journal(struct super_block * sb, | |||
| 2082 | return -EROFS; | 2093 | return -EROFS; |
| 2083 | } | 2094 | } |
| 2084 | 2095 | ||
| 2085 | if (!(journal = ext3_get_journal(sb, journal_inum))) | 2096 | journal = ext3_get_journal(sb, journal_inum); |
| 2097 | if (!journal) | ||
| 2086 | return -EINVAL; | 2098 | return -EINVAL; |
| 2087 | 2099 | ||
| 2088 | printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", | 2100 | printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", |
| 2089 | journal_inum); | 2101 | journal_inum); |
| 2090 | 2102 | ||
| 2091 | if (journal_create(journal)) { | 2103 | err = journal_create(journal); |
| 2104 | if (err) { | ||
| 2092 | printk(KERN_ERR "EXT3-fs: error creating journal.\n"); | 2105 | printk(KERN_ERR "EXT3-fs: error creating journal.\n"); |
| 2093 | journal_destroy(journal); | 2106 | journal_destroy(journal); |
| 2094 | return -EIO; | 2107 | return -EIO; |
| @@ -2139,12 +2152,14 @@ static void ext3_mark_recovery_complete(struct super_block * sb, | |||
| 2139 | 2152 | ||
| 2140 | journal_lock_updates(journal); | 2153 | journal_lock_updates(journal); |
| 2141 | journal_flush(journal); | 2154 | journal_flush(journal); |
| 2155 | lock_super(sb); | ||
| 2142 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && | 2156 | if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && |
| 2143 | sb->s_flags & MS_RDONLY) { | 2157 | sb->s_flags & MS_RDONLY) { |
| 2144 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 2158 | EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
| 2145 | sb->s_dirt = 0; | 2159 | sb->s_dirt = 0; |
| 2146 | ext3_commit_super(sb, es, 1); | 2160 | ext3_commit_super(sb, es, 1); |
| 2147 | } | 2161 | } |
| 2162 | unlock_super(sb); | ||
| 2148 | journal_unlock_updates(journal); | 2163 | journal_unlock_updates(journal); |
| 2149 | } | 2164 | } |
| 2150 | 2165 | ||
| @@ -2333,7 +2348,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
| 2333 | (sbi->s_mount_state & EXT3_VALID_FS)) | 2348 | (sbi->s_mount_state & EXT3_VALID_FS)) |
| 2334 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 2349 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
| 2335 | 2350 | ||
| 2351 | /* | ||
| 2352 | * We have to unlock super so that we can wait for | ||
| 2353 | * transactions. | ||
| 2354 | */ | ||
| 2355 | unlock_super(sb); | ||
| 2336 | ext3_mark_recovery_complete(sb, es); | 2356 | ext3_mark_recovery_complete(sb, es); |
| 2357 | lock_super(sb); | ||
| 2337 | } else { | 2358 | } else { |
| 2338 | __le32 ret; | 2359 | __le32 ret; |
| 2339 | if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, | 2360 | if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, |
| @@ -2406,19 +2427,19 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 2406 | struct super_block *sb = dentry->d_sb; | 2427 | struct super_block *sb = dentry->d_sb; |
| 2407 | struct ext3_sb_info *sbi = EXT3_SB(sb); | 2428 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
| 2408 | struct ext3_super_block *es = sbi->s_es; | 2429 | struct ext3_super_block *es = sbi->s_es; |
| 2409 | ext3_fsblk_t overhead; | ||
| 2410 | int i; | ||
| 2411 | u64 fsid; | 2430 | u64 fsid; |
| 2412 | 2431 | ||
| 2413 | if (test_opt (sb, MINIX_DF)) | 2432 | if (test_opt(sb, MINIX_DF)) { |
| 2414 | overhead = 0; | 2433 | sbi->s_overhead_last = 0; |
| 2415 | else { | 2434 | } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
| 2416 | unsigned long ngroups; | 2435 | unsigned long ngroups = sbi->s_groups_count, i; |
| 2417 | ngroups = EXT3_SB(sb)->s_groups_count; | 2436 | ext3_fsblk_t overhead = 0; |
| 2418 | smp_rmb(); | 2437 | smp_rmb(); |
| 2419 | 2438 | ||
| 2420 | /* | 2439 | /* |
| 2421 | * Compute the overhead (FS structures) | 2440 | * Compute the overhead (FS structures). This is constant |
| 2441 | * for a given filesystem unless the number of block groups | ||
| 2442 | * changes so we cache the previous value until it does. | ||
| 2422 | */ | 2443 | */ |
| 2423 | 2444 | ||
| 2424 | /* | 2445 | /* |
| @@ -2442,18 +2463,23 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 2442 | * Every block group has an inode bitmap, a block | 2463 | * Every block group has an inode bitmap, a block |
| 2443 | * bitmap, and an inode table. | 2464 | * bitmap, and an inode table. |
| 2444 | */ | 2465 | */ |
| 2445 | overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group)); | 2466 | overhead += ngroups * (2 + sbi->s_itb_per_group); |
| 2467 | sbi->s_overhead_last = overhead; | ||
| 2468 | smp_wmb(); | ||
| 2469 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
| 2446 | } | 2470 | } |
| 2447 | 2471 | ||
| 2448 | buf->f_type = EXT3_SUPER_MAGIC; | 2472 | buf->f_type = EXT3_SUPER_MAGIC; |
| 2449 | buf->f_bsize = sb->s_blocksize; | 2473 | buf->f_bsize = sb->s_blocksize; |
| 2450 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; | 2474 | buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; |
| 2451 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); | 2475 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); |
| 2476 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
| 2452 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); | 2477 | buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); |
| 2453 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) | 2478 | if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) |
| 2454 | buf->f_bavail = 0; | 2479 | buf->f_bavail = 0; |
| 2455 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 2480 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
| 2456 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); | 2481 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); |
| 2482 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
| 2457 | buf->f_namelen = EXT3_NAME_LEN; | 2483 | buf->f_namelen = EXT3_NAME_LEN; |
| 2458 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 2484 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
| 2459 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 2485 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3b64bb16c727..9de54ae48dee 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -1585,7 +1585,7 @@ allocated: | |||
| 1585 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | 1585 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); |
| 1586 | 1586 | ||
| 1587 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | 1587 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || |
| 1588 | in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | 1588 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || |
| 1589 | in_range(ret_block, ext4_inode_table(sb, gdp), | 1589 | in_range(ret_block, ext4_inode_table(sb, gdp), |
| 1590 | EXT4_SB(sb)->s_itb_per_group) || | 1590 | EXT4_SB(sb)->s_itb_per_group) || |
| 1591 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | 1591 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2811e5720ad0..2de339dd7554 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -1017,6 +1017,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str | |||
| 1017 | 1017 | ||
| 1018 | if (!inode) | 1018 | if (!inode) |
| 1019 | return ERR_PTR(-EACCES); | 1019 | return ERR_PTR(-EACCES); |
| 1020 | |||
| 1021 | if (is_bad_inode(inode)) { | ||
| 1022 | iput(inode); | ||
| 1023 | return ERR_PTR(-ENOENT); | ||
| 1024 | } | ||
| 1020 | } | 1025 | } |
| 1021 | return d_splice_alias(inode, dentry); | 1026 | return d_splice_alias(inode, dentry); |
| 1022 | } | 1027 | } |
| @@ -1052,6 +1057,11 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
| 1052 | if (!inode) | 1057 | if (!inode) |
| 1053 | return ERR_PTR(-EACCES); | 1058 | return ERR_PTR(-EACCES); |
| 1054 | 1059 | ||
| 1060 | if (is_bad_inode(inode)) { | ||
| 1061 | iput(inode); | ||
| 1062 | return ERR_PTR(-ENOENT); | ||
| 1063 | } | ||
| 1064 | |||
| 1055 | parent = d_alloc_anon(inode); | 1065 | parent = d_alloc_anon(inode); |
| 1056 | if (!parent) { | 1066 | if (!parent) { |
| 1057 | iput(inode); | 1067 | iput(inode); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 175b68c60968..b806e689c4aa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/parser.h> | 29 | #include <linux/parser.h> |
| 30 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
| 31 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
| 32 | #include <linux/exportfs.h> | ||
| 32 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
| 33 | #include <linux/random.h> | 34 | #include <linux/random.h> |
| 34 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
| @@ -510,6 +511,14 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 510 | 511 | ||
| 511 | static void ext4_destroy_inode(struct inode *inode) | 512 | static void ext4_destroy_inode(struct inode *inode) |
| 512 | { | 513 | { |
| 514 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | ||
| 515 | printk("EXT4 Inode %p: orphan list check failed!\n", | ||
| 516 | EXT4_I(inode)); | ||
| 517 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | ||
| 518 | EXT4_I(inode), sizeof(struct ext4_inode_info), | ||
| 519 | true); | ||
| 520 | dump_stack(); | ||
| 521 | } | ||
| 513 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); | 522 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); |
| 514 | } | 523 | } |
| 515 | 524 | ||
| @@ -2150,6 +2159,7 @@ static int ext4_create_journal(struct super_block * sb, | |||
| 2150 | unsigned int journal_inum) | 2159 | unsigned int journal_inum) |
| 2151 | { | 2160 | { |
| 2152 | journal_t *journal; | 2161 | journal_t *journal; |
| 2162 | int err; | ||
| 2153 | 2163 | ||
| 2154 | if (sb->s_flags & MS_RDONLY) { | 2164 | if (sb->s_flags & MS_RDONLY) { |
| 2155 | printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " | 2165 | printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " |
| @@ -2157,13 +2167,15 @@ static int ext4_create_journal(struct super_block * sb, | |||
| 2157 | return -EROFS; | 2167 | return -EROFS; |
| 2158 | } | 2168 | } |
| 2159 | 2169 | ||
| 2160 | if (!(journal = ext4_get_journal(sb, journal_inum))) | 2170 | journal = ext4_get_journal(sb, journal_inum); |
| 2171 | if (!journal) | ||
| 2161 | return -EINVAL; | 2172 | return -EINVAL; |
| 2162 | 2173 | ||
| 2163 | printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", | 2174 | printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", |
| 2164 | journal_inum); | 2175 | journal_inum); |
| 2165 | 2176 | ||
| 2166 | if (jbd2_journal_create(journal)) { | 2177 | err = jbd2_journal_create(journal); |
| 2178 | if (err) { | ||
| 2167 | printk(KERN_ERR "EXT4-fs: error creating journal.\n"); | 2179 | printk(KERN_ERR "EXT4-fs: error creating journal.\n"); |
| 2168 | jbd2_journal_destroy(journal); | 2180 | jbd2_journal_destroy(journal); |
| 2169 | return -EIO; | 2181 | return -EIO; |
| @@ -2214,12 +2226,14 @@ static void ext4_mark_recovery_complete(struct super_block * sb, | |||
| 2214 | 2226 | ||
| 2215 | jbd2_journal_lock_updates(journal); | 2227 | jbd2_journal_lock_updates(journal); |
| 2216 | jbd2_journal_flush(journal); | 2228 | jbd2_journal_flush(journal); |
| 2229 | lock_super(sb); | ||
| 2217 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2230 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
| 2218 | sb->s_flags & MS_RDONLY) { | 2231 | sb->s_flags & MS_RDONLY) { |
| 2219 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2232 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 2220 | sb->s_dirt = 0; | 2233 | sb->s_dirt = 0; |
| 2221 | ext4_commit_super(sb, es, 1); | 2234 | ext4_commit_super(sb, es, 1); |
| 2222 | } | 2235 | } |
| 2236 | unlock_super(sb); | ||
| 2223 | jbd2_journal_unlock_updates(journal); | 2237 | jbd2_journal_unlock_updates(journal); |
| 2224 | } | 2238 | } |
| 2225 | 2239 | ||
| @@ -2408,7 +2422,13 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) | |||
| 2408 | (sbi->s_mount_state & EXT4_VALID_FS)) | 2422 | (sbi->s_mount_state & EXT4_VALID_FS)) |
| 2409 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 2423 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
| 2410 | 2424 | ||
| 2425 | /* | ||
| 2426 | * We have to unlock super so that we can wait for | ||
| 2427 | * transactions. | ||
| 2428 | */ | ||
| 2429 | unlock_super(sb); | ||
| 2411 | ext4_mark_recovery_complete(sb, es); | 2430 | ext4_mark_recovery_complete(sb, es); |
| 2431 | lock_super(sb); | ||
| 2412 | } else { | 2432 | } else { |
| 2413 | __le32 ret; | 2433 | __le32 ret; |
| 2414 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2434 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| @@ -2481,19 +2501,19 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 2481 | struct super_block *sb = dentry->d_sb; | 2501 | struct super_block *sb = dentry->d_sb; |
| 2482 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2502 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2483 | struct ext4_super_block *es = sbi->s_es; | 2503 | struct ext4_super_block *es = sbi->s_es; |
| 2484 | ext4_fsblk_t overhead; | ||
| 2485 | int i; | ||
| 2486 | u64 fsid; | 2504 | u64 fsid; |
| 2487 | 2505 | ||
| 2488 | if (test_opt (sb, MINIX_DF)) | 2506 | if (test_opt(sb, MINIX_DF)) { |
| 2489 | overhead = 0; | 2507 | sbi->s_overhead_last = 0; |
| 2490 | else { | 2508 | } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { |
| 2491 | unsigned long ngroups; | 2509 | unsigned long ngroups = sbi->s_groups_count, i; |
| 2492 | ngroups = EXT4_SB(sb)->s_groups_count; | 2510 | ext4_fsblk_t overhead = 0; |
| 2493 | smp_rmb(); | 2511 | smp_rmb(); |
| 2494 | 2512 | ||
| 2495 | /* | 2513 | /* |
| 2496 | * Compute the overhead (FS structures) | 2514 | * Compute the overhead (FS structures). This is constant |
| 2515 | * for a given filesystem unless the number of block groups | ||
| 2516 | * changes so we cache the previous value until it does. | ||
| 2497 | */ | 2517 | */ |
| 2498 | 2518 | ||
| 2499 | /* | 2519 | /* |
| @@ -2517,18 +2537,23 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
| 2517 | * Every block group has an inode bitmap, a block | 2537 | * Every block group has an inode bitmap, a block |
| 2518 | * bitmap, and an inode table. | 2538 | * bitmap, and an inode table. |
| 2519 | */ | 2539 | */ |
| 2520 | overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); | 2540 | overhead += ngroups * (2 + sbi->s_itb_per_group); |
| 2541 | sbi->s_overhead_last = overhead; | ||
| 2542 | smp_wmb(); | ||
| 2543 | sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); | ||
| 2521 | } | 2544 | } |
| 2522 | 2545 | ||
| 2523 | buf->f_type = EXT4_SUPER_MAGIC; | 2546 | buf->f_type = EXT4_SUPER_MAGIC; |
| 2524 | buf->f_bsize = sb->s_blocksize; | 2547 | buf->f_bsize = sb->s_blocksize; |
| 2525 | buf->f_blocks = ext4_blocks_count(es) - overhead; | 2548 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
| 2526 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); | 2549 | buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); |
| 2550 | es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); | ||
| 2527 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 2551 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
| 2528 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 2552 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
| 2529 | buf->f_bavail = 0; | 2553 | buf->f_bavail = 0; |
| 2530 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 2554 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
| 2531 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); | 2555 | buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); |
| 2556 | es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); | ||
| 2532 | buf->f_namelen = EXT4_NAME_LEN; | 2557 | buf->f_namelen = EXT4_NAME_LEN; |
| 2533 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 2558 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
| 2534 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 2559 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index ccf161dffb63..72cbcd61bd95 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
| @@ -313,7 +313,7 @@ int fat_search_long(struct inode *inode, const unsigned char *name, | |||
| 313 | wchar_t bufuname[14]; | 313 | wchar_t bufuname[14]; |
| 314 | unsigned char xlate_len, nr_slots; | 314 | unsigned char xlate_len, nr_slots; |
| 315 | wchar_t *unicode = NULL; | 315 | wchar_t *unicode = NULL; |
| 316 | unsigned char work[8], bufname[260]; /* 256 + 4 */ | 316 | unsigned char work[MSDOS_NAME], bufname[260]; /* 256 + 4 */ |
| 317 | int uni_xlate = sbi->options.unicode_xlate; | 317 | int uni_xlate = sbi->options.unicode_xlate; |
| 318 | int utf8 = sbi->options.utf8; | 318 | int utf8 = sbi->options.utf8; |
| 319 | int anycase = (sbi->options.name_check != 's'); | 319 | int anycase = (sbi->options.name_check != 's'); |
| @@ -351,7 +351,8 @@ parse_record: | |||
| 351 | if (work[0] == 0x05) | 351 | if (work[0] == 0x05) |
| 352 | work[0] = 0xE5; | 352 | work[0] = 0xE5; |
| 353 | for (i = 0, j = 0, last_u = 0; i < 8;) { | 353 | for (i = 0, j = 0, last_u = 0; i < 8;) { |
| 354 | if (!work[i]) break; | 354 | if (!work[i]) |
| 355 | break; | ||
| 355 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, | 356 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, |
| 356 | &bufuname[j++], opt_shortname, | 357 | &bufuname[j++], opt_shortname, |
| 357 | de->lcase & CASE_LOWER_BASE); | 358 | de->lcase & CASE_LOWER_BASE); |
| @@ -365,13 +366,15 @@ parse_record: | |||
| 365 | } | 366 | } |
| 366 | j = last_u; | 367 | j = last_u; |
| 367 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); | 368 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); |
| 368 | for (i = 0; i < 3;) { | 369 | for (i = 8; i < MSDOS_NAME;) { |
| 369 | if (!de->ext[i]) break; | 370 | if (!work[i]) |
| 370 | chl = fat_shortname2uni(nls_disk, &de->ext[i], 3 - i, | 371 | break; |
| 372 | chl = fat_shortname2uni(nls_disk, &work[i], | ||
| 373 | MSDOS_NAME - i, | ||
| 371 | &bufuname[j++], opt_shortname, | 374 | &bufuname[j++], opt_shortname, |
| 372 | de->lcase & CASE_LOWER_EXT); | 375 | de->lcase & CASE_LOWER_EXT); |
| 373 | if (chl <= 1) { | 376 | if (chl <= 1) { |
| 374 | if (de->ext[i] != ' ') | 377 | if (work[i] != ' ') |
| 375 | last_u = j; | 378 | last_u = j; |
| 376 | } else { | 379 | } else { |
| 377 | last_u = j; | 380 | last_u = j; |
| @@ -445,7 +448,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
| 445 | int fill_len; | 448 | int fill_len; |
| 446 | wchar_t bufuname[14]; | 449 | wchar_t bufuname[14]; |
| 447 | wchar_t *unicode = NULL; | 450 | wchar_t *unicode = NULL; |
| 448 | unsigned char c, work[8], bufname[56], *ptname = bufname; | 451 | unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; |
| 449 | unsigned long lpos, dummy, *furrfu = &lpos; | 452 | unsigned long lpos, dummy, *furrfu = &lpos; |
| 450 | int uni_xlate = sbi->options.unicode_xlate; | 453 | int uni_xlate = sbi->options.unicode_xlate; |
| 451 | int isvfat = sbi->options.isvfat; | 454 | int isvfat = sbi->options.isvfat; |
| @@ -527,7 +530,8 @@ parse_record: | |||
| 527 | if (work[0] == 0x05) | 530 | if (work[0] == 0x05) |
| 528 | work[0] = 0xE5; | 531 | work[0] = 0xE5; |
| 529 | for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { | 532 | for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { |
| 530 | if (!(c = work[i])) break; | 533 | if (!(c = work[i])) |
| 534 | break; | ||
| 531 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, | 535 | chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, |
| 532 | &bufuname[j++], opt_shortname, | 536 | &bufuname[j++], opt_shortname, |
| 533 | de->lcase & CASE_LOWER_BASE); | 537 | de->lcase & CASE_LOWER_BASE); |
| @@ -549,9 +553,10 @@ parse_record: | |||
| 549 | j = last_u; | 553 | j = last_u; |
| 550 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); | 554 | fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); |
| 551 | ptname[i++] = '.'; | 555 | ptname[i++] = '.'; |
| 552 | for (i2 = 0; i2 < 3;) { | 556 | for (i2 = 8; i2 < MSDOS_NAME;) { |
| 553 | if (!(c = de->ext[i2])) break; | 557 | if (!(c = work[i2])) |
| 554 | chl = fat_shortname2uni(nls_disk, &de->ext[i2], 3 - i2, | 558 | break; |
| 559 | chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2, | ||
| 555 | &bufuname[j++], opt_shortname, | 560 | &bufuname[j++], opt_shortname, |
| 556 | de->lcase & CASE_LOWER_EXT); | 561 | de->lcase & CASE_LOWER_EXT); |
| 557 | if (chl <= 1) { | 562 | if (chl <= 1) { |
| @@ -563,8 +568,8 @@ parse_record: | |||
| 563 | } | 568 | } |
| 564 | } else { | 569 | } else { |
| 565 | last_u = j; | 570 | last_u = j; |
| 566 | for (chi = 0; chi < chl && i2 < 3; chi++) { | 571 | for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) { |
| 567 | ptname[i++] = de->ext[i2++]; | 572 | ptname[i++] = work[i2++]; |
| 568 | last = i; | 573 | last = i; |
| 569 | } | 574 | } |
| 570 | } | 575 | } |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index ab171ea8e869..2c1b73fb82ae 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
| @@ -17,6 +17,8 @@ struct fatent_operations { | |||
| 17 | int (*ent_next)(struct fat_entry *); | 17 | int (*ent_next)(struct fat_entry *); |
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | static DEFINE_SPINLOCK(fat12_entry_lock); | ||
| 21 | |||
| 20 | static void fat12_ent_blocknr(struct super_block *sb, int entry, | 22 | static void fat12_ent_blocknr(struct super_block *sb, int entry, |
| 21 | int *offset, sector_t *blocknr) | 23 | int *offset, sector_t *blocknr) |
| 22 | { | 24 | { |
| @@ -116,10 +118,13 @@ static int fat12_ent_get(struct fat_entry *fatent) | |||
| 116 | u8 **ent12_p = fatent->u.ent12_p; | 118 | u8 **ent12_p = fatent->u.ent12_p; |
| 117 | int next; | 119 | int next; |
| 118 | 120 | ||
| 121 | spin_lock(&fat12_entry_lock); | ||
| 119 | if (fatent->entry & 1) | 122 | if (fatent->entry & 1) |
| 120 | next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); | 123 | next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); |
| 121 | else | 124 | else |
| 122 | next = (*ent12_p[1] << 8) | *ent12_p[0]; | 125 | next = (*ent12_p[1] << 8) | *ent12_p[0]; |
| 126 | spin_unlock(&fat12_entry_lock); | ||
| 127 | |||
| 123 | next &= 0x0fff; | 128 | next &= 0x0fff; |
| 124 | if (next >= BAD_FAT12) | 129 | if (next >= BAD_FAT12) |
| 125 | next = FAT_ENT_EOF; | 130 | next = FAT_ENT_EOF; |
| @@ -151,6 +156,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) | |||
| 151 | if (new == FAT_ENT_EOF) | 156 | if (new == FAT_ENT_EOF) |
| 152 | new = EOF_FAT12; | 157 | new = EOF_FAT12; |
| 153 | 158 | ||
| 159 | spin_lock(&fat12_entry_lock); | ||
| 154 | if (fatent->entry & 1) { | 160 | if (fatent->entry & 1) { |
| 155 | *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); | 161 | *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); |
| 156 | *ent12_p[1] = new >> 4; | 162 | *ent12_p[1] = new >> 4; |
| @@ -158,6 +164,7 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) | |||
| 158 | *ent12_p[0] = new & 0xff; | 164 | *ent12_p[0] = new & 0xff; |
| 159 | *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); | 165 | *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); |
| 160 | } | 166 | } |
| 167 | spin_unlock(&fat12_entry_lock); | ||
| 161 | 168 | ||
| 162 | mark_buffer_dirty(fatent->bhs[0]); | 169 | mark_buffer_dirty(fatent->bhs[0]); |
| 163 | if (fatent->nr_bhs == 2) | 170 | if (fatent->nr_bhs == 2) |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 479722d89667..0a7ddb39a593 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
| 21 | #include <linux/mpage.h> | 21 | #include <linux/mpage.h> |
| 22 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
| 23 | #include <linux/exportfs.h> | ||
| 23 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
| 24 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
| 25 | #include <linux/parser.h> | 26 | #include <linux/parser.h> |
| @@ -354,8 +355,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) | |||
| 354 | } else { /* not a directory */ | 355 | } else { /* not a directory */ |
| 355 | inode->i_generation |= 1; | 356 | inode->i_generation |= 1; |
| 356 | inode->i_mode = MSDOS_MKMODE(de->attr, | 357 | inode->i_mode = MSDOS_MKMODE(de->attr, |
| 357 | ((sbi->options.showexec && | 358 | ((sbi->options.showexec && !is_exec(de->name + 8)) |
| 358 | !is_exec(de->ext)) | ||
| 359 | ? S_IRUGO|S_IWUGO : S_IRWXUGO) | 359 | ? S_IRUGO|S_IWUGO : S_IRWXUGO) |
| 360 | & ~sbi->options.fs_fmask) | S_IFREG; | 360 | & ~sbi->options.fs_fmask) | S_IFREG; |
| 361 | MSDOS_I(inode)->i_start = le16_to_cpu(de->start); | 361 | MSDOS_I(inode)->i_start = le16_to_cpu(de->start); |
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h index 8a4dfef1ddad..3c96d6e63978 100644 --- a/fs/freevxfs/vxfs_dir.h +++ b/fs/freevxfs/vxfs_dir.h | |||
| @@ -80,7 +80,7 @@ struct vxfs_direct { | |||
| 80 | * a d_name with size len. | 80 | * a d_name with size len. |
| 81 | */ | 81 | */ |
| 82 | #define VXFS_DIRPAD 4 | 82 | #define VXFS_DIRPAD 4 |
| 83 | #define VXFS_NAMEMIN ((int)((struct vxfs_direct *)0)->d_name) | 83 | #define VXFS_NAMEMIN offsetof(struct vxfs_direct, d_name) |
| 84 | #define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1)) | 84 | #define VXFS_DIRROUND(len) ((VXFS_DIRPAD + (len) - 1) & ~(VXFS_DIRPAD -1)) |
| 85 | #define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len))) | 85 | #define VXFS_DIRLEN(len) (VXFS_DIRROUND(VXFS_NAMEMIN + (len))) |
| 86 | 86 | ||
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index c1f44009853f..1ab3e9d73886 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
| 12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
| 14 | #include <linux/capability.h> | ||
| 14 | #include <linux/xattr.h> | 15 | #include <linux/xattr.h> |
| 15 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
| 16 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 99ea5659bc2c..b8312edee0e4 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
| 12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
| 14 | #include <linux/exportfs.h> | ||
| 14 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
| 15 | #include <linux/crc32.h> | 16 | #include <linux/crc32.h> |
| 16 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 90ebab753d30..050d29c0a5b5 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
| @@ -62,8 +62,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 62 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 62 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && |
| 63 | (head->key_type == HFSPLUS_KEY_BINARY)) | 63 | (head->key_type == HFSPLUS_KEY_BINARY)) |
| 64 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 64 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
| 65 | else | 65 | else { |
| 66 | tree->keycmp = hfsplus_cat_case_cmp_key; | 66 | tree->keycmp = hfsplus_cat_case_cmp_key; |
| 67 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | ||
| 68 | } | ||
| 67 | } else { | 69 | } else { |
| 68 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 70 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
| 69 | goto fail_page; | 71 | goto fail_page; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 80b5682a2273..1955ee61251c 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
| @@ -36,6 +36,8 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
| 36 | u16 type; | 36 | u16 type; |
| 37 | 37 | ||
| 38 | sb = dir->i_sb; | 38 | sb = dir->i_sb; |
| 39 | |||
| 40 | dentry->d_op = &hfsplus_dentry_operations; | ||
| 39 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
| 40 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); |
| 41 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 3915635b4470..d9f5eda6d039 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
| @@ -150,6 +150,7 @@ struct hfsplus_sb_info { | |||
| 150 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 150 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 |
| 151 | #define HFSPLUS_SB_FORCE 0x0004 | 151 | #define HFSPLUS_SB_FORCE 0x0004 |
| 152 | #define HFSPLUS_SB_HFSX 0x0008 | 152 | #define HFSPLUS_SB_HFSX 0x0008 |
| 153 | #define HFSPLUS_SB_CASEFOLD 0x0010 | ||
| 153 | 154 | ||
| 154 | 155 | ||
| 155 | struct hfsplus_inode_info { | 156 | struct hfsplus_inode_info { |
| @@ -321,6 +322,7 @@ void hfsplus_file_truncate(struct inode *); | |||
| 321 | /* inode.c */ | 322 | /* inode.c */ |
| 322 | extern const struct address_space_operations hfsplus_aops; | 323 | extern const struct address_space_operations hfsplus_aops; |
| 323 | extern const struct address_space_operations hfsplus_btree_aops; | 324 | extern const struct address_space_operations hfsplus_btree_aops; |
| 325 | extern struct dentry_operations hfsplus_dentry_operations; | ||
| 324 | 326 | ||
| 325 | void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); | 327 | void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); |
| 326 | void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); | 328 | void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); |
| @@ -353,6 +355,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist | |||
| 353 | int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); | 355 | int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); |
| 354 | int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); | 356 | int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); |
| 355 | int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); | 357 | int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); |
| 358 | int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); | ||
| 359 | int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); | ||
| 356 | 360 | ||
| 357 | /* wrapper.c */ | 361 | /* wrapper.c */ |
| 358 | int hfsplus_read_wrapper(struct super_block *); | 362 | int hfsplus_read_wrapper(struct super_block *); |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 409ce5429c91..6f7c662174db 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -131,6 +131,11 @@ const struct address_space_operations hfsplus_aops = { | |||
| 131 | .writepages = hfsplus_writepages, | 131 | .writepages = hfsplus_writepages, |
| 132 | }; | 132 | }; |
| 133 | 133 | ||
| 134 | struct dentry_operations hfsplus_dentry_operations = { | ||
| 135 | .d_hash = hfsplus_hash_dentry, | ||
| 136 | .d_compare = hfsplus_compare_dentry, | ||
| 137 | }; | ||
| 138 | |||
| 134 | static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, | 139 | static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, |
| 135 | struct nameidata *nd) | 140 | struct nameidata *nd) |
| 136 | { | 141 | { |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ebd1b380cbbc..6d87a2a9534d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
| @@ -283,11 +283,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 283 | struct nls_table *nls = NULL; | 283 | struct nls_table *nls = NULL; |
| 284 | int err = -EINVAL; | 284 | int err = -EINVAL; |
| 285 | 285 | ||
| 286 | sbi = kmalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); | 286 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
| 287 | if (!sbi) | 287 | if (!sbi) |
| 288 | return -ENOMEM; | 288 | return -ENOMEM; |
| 289 | 289 | ||
| 290 | memset(sbi, 0, sizeof(HFSPLUS_SB(sb))); | ||
| 291 | sb->s_fs_info = sbi; | 290 | sb->s_fs_info = sbi; |
| 292 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 291 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); |
| 293 | hfsplus_fill_defaults(sbi); | 292 | hfsplus_fill_defaults(sbi); |
| @@ -381,6 +380,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 381 | iput(root); | 380 | iput(root); |
| 382 | goto cleanup; | 381 | goto cleanup; |
| 383 | } | 382 | } |
| 383 | sb->s_root->d_op = &hfsplus_dentry_operations; | ||
| 384 | 384 | ||
| 385 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 385 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
| 386 | str.name = HFSP_HIDDENDIR_NAME; | 386 | str.name = HFSP_HIDDENDIR_NAME; |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 689c8bd721fb..9e10f9444b64 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
| @@ -239,61 +239,201 @@ out: | |||
| 239 | return res; | 239 | return res; |
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len) | 242 | /* |
| 243 | * Convert one or more ASCII characters into a single unicode character. | ||
| 244 | * Returns the number of ASCII characters corresponding to the unicode char. | ||
| 245 | */ | ||
| 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | ||
| 247 | wchar_t *uc) | ||
| 243 | { | 248 | { |
| 244 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); |
| 245 | int size, off, decompose; | 250 | if (size <= 0) { |
| 251 | *uc = '?'; | ||
| 252 | size = 1; | ||
| 253 | } | ||
| 254 | switch (*uc) { | ||
| 255 | case 0x2400: | ||
| 256 | *uc = 0; | ||
| 257 | break; | ||
| 258 | case ':': | ||
| 259 | *uc = '/'; | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | return size; | ||
| 263 | } | ||
| 264 | |||
| 265 | /* Decomposes a single unicode character. */ | ||
| 266 | static inline u16 *decompose_unichar(wchar_t uc, int *size) | ||
| 267 | { | ||
| 268 | int off; | ||
| 269 | |||
| 270 | off = hfsplus_decompose_table[(uc >> 12) & 0xf]; | ||
| 271 | if (off == 0 || off == 0xffff) | ||
| 272 | return NULL; | ||
| 273 | |||
| 274 | off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; | ||
| 275 | if (!off) | ||
| 276 | return NULL; | ||
| 277 | |||
| 278 | off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; | ||
| 279 | if (!off) | ||
| 280 | return NULL; | ||
| 281 | |||
| 282 | off = hfsplus_decompose_table[off + (uc & 0xf)]; | ||
| 283 | *size = off & 3; | ||
| 284 | if (*size == 0) | ||
| 285 | return NULL; | ||
| 286 | return hfsplus_decompose_table + (off / 4); | ||
| 287 | } | ||
| 288 | |||
| 289 | int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | ||
| 290 | const char *astr, int len) | ||
| 291 | { | ||
| 292 | int size, dsize, decompose; | ||
| 293 | u16 *dstr, outlen = 0; | ||
| 246 | wchar_t c; | 294 | wchar_t c; |
| 247 | u16 outlen = 0; | ||
| 248 | 295 | ||
| 249 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); |
| 250 | |||
| 251 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
| 252 | size = nls->char2uni(astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
| 253 | if (size <= 0) { | 299 | |
| 254 | c = '?'; | 300 | if (decompose && (dstr = decompose_unichar(c, &dsize))) { |
| 255 | size = 1; | 301 | if (outlen + dsize > HFSPLUS_MAX_STRLEN) |
| 256 | } | ||
| 257 | astr += size; | ||
| 258 | len -= size; | ||
| 259 | switch (c) { | ||
| 260 | case 0x2400: | ||
| 261 | c = 0; | ||
| 262 | break; | ||
| 263 | case ':': | ||
| 264 | c = '/'; | ||
| 265 | break; | ||
| 266 | } | ||
| 267 | if (c >= 0xc0 && decompose) { | ||
| 268 | off = hfsplus_decompose_table[(c >> 12) & 0xf]; | ||
| 269 | if (!off) | ||
| 270 | goto done; | ||
| 271 | if (off == 0xffff) { | ||
| 272 | goto done; | ||
| 273 | } | ||
| 274 | off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)]; | ||
| 275 | if (!off) | ||
| 276 | goto done; | ||
| 277 | off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)]; | ||
| 278 | if (!off) | ||
| 279 | goto done; | ||
| 280 | off = hfsplus_decompose_table[off + (c & 0xf)]; | ||
| 281 | size = off & 3; | ||
| 282 | if (!size) | ||
| 283 | goto done; | ||
| 284 | off /= 4; | ||
| 285 | if (outlen + size > HFSPLUS_MAX_STRLEN) | ||
| 286 | break; | 302 | break; |
| 287 | do { | 303 | do { |
| 288 | ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]); | 304 | ustr->unicode[outlen++] = cpu_to_be16(*dstr++); |
| 289 | } while (--size > 0); | 305 | } while (--dsize > 0); |
| 290 | continue; | 306 | } else |
| 291 | } | 307 | ustr->unicode[outlen++] = cpu_to_be16(c); |
| 292 | done: | 308 | |
| 293 | ustr->unicode[outlen++] = cpu_to_be16(c); | 309 | astr += size; |
| 310 | len -= size; | ||
| 294 | } | 311 | } |
| 295 | ustr->length = cpu_to_be16(outlen); | 312 | ustr->length = cpu_to_be16(outlen); |
| 296 | if (len > 0) | 313 | if (len > 0) |
| 297 | return -ENAMETOOLONG; | 314 | return -ENAMETOOLONG; |
| 298 | return 0; | 315 | return 0; |
| 299 | } | 316 | } |
| 317 | |||
| 318 | /* | ||
| 319 | * Hash a string to an integer as appropriate for the HFS+ filesystem. | ||
| 320 | * Composed unicode characters are decomposed and case-folding is performed | ||
| 321 | * if the appropriate bits are (un)set on the superblock. | ||
| 322 | */ | ||
| 323 | int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | ||
| 324 | { | ||
| 325 | struct super_block *sb = dentry->d_sb; | ||
| 326 | const char *astr; | ||
| 327 | const u16 *dstr; | ||
| 328 | int casefold, decompose, size, dsize, len; | ||
| 329 | unsigned long hash; | ||
| 330 | wchar_t c; | ||
| 331 | u16 c2; | ||
| 332 | |||
| 333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | ||
| 334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | ||
| 335 | hash = init_name_hash(); | ||
| 336 | astr = str->name; | ||
| 337 | len = str->len; | ||
| 338 | while (len > 0) { | ||
| 339 | size = asc2unichar(sb, astr, len, &c); | ||
| 340 | astr += size; | ||
| 341 | len -= size; | ||
| 342 | |||
| 343 | if (decompose && (dstr = decompose_unichar(c, &dsize))) { | ||
| 344 | do { | ||
| 345 | c2 = *dstr++; | ||
| 346 | if (!casefold || (c2 = case_fold(c2))) | ||
| 347 | hash = partial_name_hash(c2, hash); | ||
| 348 | } while (--dsize > 0); | ||
| 349 | } else { | ||
| 350 | c2 = c; | ||
| 351 | if (!casefold || (c2 = case_fold(c2))) | ||
| 352 | hash = partial_name_hash(c2, hash); | ||
| 353 | } | ||
| 354 | } | ||
| 355 | str->hash = end_name_hash(hash); | ||
| 356 | |||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Compare strings with HFS+ filename ordering. | ||
| 362 | * Composed unicode characters are decomposed and case-folding is performed | ||
| 363 | * if the appropriate bits are (un)set on the superblock. | ||
| 364 | */ | ||
| 365 | int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) | ||
| 366 | { | ||
| 367 | struct super_block *sb = dentry->d_sb; | ||
| 368 | int casefold, decompose, size; | ||
| 369 | int dsize1, dsize2, len1, len2; | ||
| 370 | const u16 *dstr1, *dstr2; | ||
| 371 | const char *astr1, *astr2; | ||
| 372 | u16 c1, c2; | ||
| 373 | wchar_t c; | ||
| 374 | |||
| 375 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | ||
| 376 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | ||
| 377 | astr1 = s1->name; | ||
| 378 | len1 = s1->len; | ||
| 379 | astr2 = s2->name; | ||
| 380 | len2 = s2->len; | ||
| 381 | dsize1 = dsize2 = 0; | ||
| 382 | dstr1 = dstr2 = NULL; | ||
| 383 | |||
| 384 | while (len1 > 0 && len2 > 0) { | ||
| 385 | if (!dsize1) { | ||
| 386 | size = asc2unichar(sb, astr1, len1, &c); | ||
| 387 | astr1 += size; | ||
| 388 | len1 -= size; | ||
| 389 | |||
| 390 | if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) { | ||
| 391 | c1 = c; | ||
| 392 | dstr1 = &c1; | ||
| 393 | dsize1 = 1; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | if (!dsize2) { | ||
| 398 | size = asc2unichar(sb, astr2, len2, &c); | ||
| 399 | astr2 += size; | ||
| 400 | len2 -= size; | ||
| 401 | |||
| 402 | if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) { | ||
| 403 | c2 = c; | ||
| 404 | dstr2 = &c2; | ||
| 405 | dsize2 = 1; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | c1 = *dstr1; | ||
| 410 | c2 = *dstr2; | ||
| 411 | if (casefold) { | ||
| 412 | if (!(c1 = case_fold(c1))) { | ||
| 413 | dstr1++; | ||
| 414 | dsize1--; | ||
| 415 | continue; | ||
| 416 | } | ||
| 417 | if (!(c2 = case_fold(c2))) { | ||
| 418 | dstr2++; | ||
| 419 | dsize2--; | ||
| 420 | continue; | ||
| 421 | } | ||
| 422 | } | ||
| 423 | if (c1 < c2) | ||
| 424 | return -1; | ||
| 425 | else if (c1 > c2) | ||
| 426 | return 1; | ||
| 427 | |||
| 428 | dstr1++; | ||
| 429 | dsize1--; | ||
| 430 | dstr2++; | ||
| 431 | dsize2--; | ||
| 432 | } | ||
| 433 | |||
| 434 | if (len1 < len2) | ||
| 435 | return -1; | ||
| 436 | if (len1 > len2) | ||
| 437 | return 1; | ||
| 438 | return 0; | ||
| 439 | } | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e6b46b3ac2fe..d145cb79c30a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -13,15 +13,18 @@ | |||
| 13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 14 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
| 15 | #include <linux/file.h> | 15 | #include <linux/file.h> |
| 16 | #include <linux/kernel.h> | ||
| 16 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
| 17 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
| 18 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
| 19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
| 20 | #include <linux/string.h> | 21 | #include <linux/string.h> |
| 21 | #include <linux/capability.h> | 22 | #include <linux/capability.h> |
| 23 | #include <linux/ctype.h> | ||
| 22 | #include <linux/backing-dev.h> | 24 | #include <linux/backing-dev.h> |
| 23 | #include <linux/hugetlb.h> | 25 | #include <linux/hugetlb.h> |
| 24 | #include <linux/pagevec.h> | 26 | #include <linux/pagevec.h> |
| 27 | #include <linux/parser.h> | ||
| 25 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
| 26 | #include <linux/quotaops.h> | 29 | #include <linux/quotaops.h> |
| 27 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| @@ -47,6 +50,21 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = { | |||
| 47 | 50 | ||
| 48 | int sysctl_hugetlb_shm_group; | 51 | int sysctl_hugetlb_shm_group; |
| 49 | 52 | ||
| 53 | enum { | ||
| 54 | Opt_size, Opt_nr_inodes, | ||
| 55 | Opt_mode, Opt_uid, Opt_gid, | ||
| 56 | Opt_err, | ||
| 57 | }; | ||
| 58 | |||
| 59 | static match_table_t tokens = { | ||
| 60 | {Opt_size, "size=%s"}, | ||
| 61 | {Opt_nr_inodes, "nr_inodes=%s"}, | ||
| 62 | {Opt_mode, "mode=%o"}, | ||
| 63 | {Opt_uid, "uid=%u"}, | ||
| 64 | {Opt_gid, "gid=%u"}, | ||
| 65 | {Opt_err, NULL}, | ||
| 66 | }; | ||
| 67 | |||
| 50 | static void huge_pagevec_release(struct pagevec *pvec) | 68 | static void huge_pagevec_release(struct pagevec *pvec) |
| 51 | { | 69 | { |
| 52 | int i; | 70 | int i; |
| @@ -594,46 +612,73 @@ static const struct super_operations hugetlbfs_ops = { | |||
| 594 | static int | 612 | static int |
| 595 | hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | 613 | hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) |
| 596 | { | 614 | { |
| 597 | char *opt, *value, *rest; | 615 | char *p, *rest; |
| 616 | substring_t args[MAX_OPT_ARGS]; | ||
| 617 | int option; | ||
| 598 | 618 | ||
| 599 | if (!options) | 619 | if (!options) |
| 600 | return 0; | 620 | return 0; |
| 601 | while ((opt = strsep(&options, ",")) != NULL) { | 621 | |
| 602 | if (!*opt) | 622 | while ((p = strsep(&options, ",")) != NULL) { |
| 623 | int token; | ||
| 624 | if (!*p) | ||
| 603 | continue; | 625 | continue; |
| 604 | 626 | ||
| 605 | value = strchr(opt, '='); | 627 | token = match_token(p, tokens, args); |
| 606 | if (!value || !*value) | 628 | switch (token) { |
| 607 | return -EINVAL; | 629 | case Opt_uid: |
| 608 | else | 630 | if (match_int(&args[0], &option)) |
| 609 | *value++ = '\0'; | 631 | goto bad_val; |
| 610 | 632 | pconfig->uid = option; | |
| 611 | if (!strcmp(opt, "uid")) | 633 | break; |
| 612 | pconfig->uid = simple_strtoul(value, &value, 0); | 634 | |
| 613 | else if (!strcmp(opt, "gid")) | 635 | case Opt_gid: |
| 614 | pconfig->gid = simple_strtoul(value, &value, 0); | 636 | if (match_int(&args[0], &option)) |
| 615 | else if (!strcmp(opt, "mode")) | 637 | goto bad_val; |
| 616 | pconfig->mode = simple_strtoul(value,&value,0) & 0777U; | 638 | pconfig->gid = option; |
| 617 | else if (!strcmp(opt, "size")) { | 639 | break; |
| 618 | unsigned long long size = memparse(value, &rest); | 640 | |
| 641 | case Opt_mode: | ||
| 642 | if (match_octal(&args[0], &option)) | ||
| 643 | goto bad_val; | ||
| 644 | pconfig->mode = option & 0777U; | ||
| 645 | break; | ||
| 646 | |||
| 647 | case Opt_size: { | ||
| 648 | unsigned long long size; | ||
| 649 | /* memparse() will accept a K/M/G without a digit */ | ||
| 650 | if (!isdigit(*args[0].from)) | ||
| 651 | goto bad_val; | ||
| 652 | size = memparse(args[0].from, &rest); | ||
| 619 | if (*rest == '%') { | 653 | if (*rest == '%') { |
| 620 | size <<= HPAGE_SHIFT; | 654 | size <<= HPAGE_SHIFT; |
| 621 | size *= max_huge_pages; | 655 | size *= max_huge_pages; |
| 622 | do_div(size, 100); | 656 | do_div(size, 100); |
| 623 | rest++; | ||
| 624 | } | 657 | } |
| 625 | pconfig->nr_blocks = (size >> HPAGE_SHIFT); | 658 | pconfig->nr_blocks = (size >> HPAGE_SHIFT); |
| 626 | value = rest; | 659 | break; |
| 627 | } else if (!strcmp(opt,"nr_inodes")) { | 660 | } |
| 628 | pconfig->nr_inodes = memparse(value, &rest); | 661 | |
| 629 | value = rest; | 662 | case Opt_nr_inodes: |
| 630 | } else | 663 | /* memparse() will accept a K/M/G without a digit */ |
| 631 | return -EINVAL; | 664 | if (!isdigit(*args[0].from)) |
| 665 | goto bad_val; | ||
| 666 | pconfig->nr_inodes = memparse(args[0].from, &rest); | ||
| 667 | break; | ||
| 632 | 668 | ||
| 633 | if (*value) | 669 | default: |
| 670 | printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", | ||
| 671 | p); | ||
| 634 | return -EINVAL; | 672 | return -EINVAL; |
| 673 | break; | ||
| 674 | } | ||
| 635 | } | 675 | } |
| 636 | return 0; | 676 | return 0; |
| 677 | |||
| 678 | bad_val: | ||
| 679 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", | ||
| 680 | args[0].from, p); | ||
| 681 | return 1; | ||
| 637 | } | 682 | } |
| 638 | 683 | ||
| 639 | static int | 684 | static int |
| @@ -651,7 +696,6 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 651 | config.gid = current->fsgid; | 696 | config.gid = current->fsgid; |
| 652 | config.mode = 0755; | 697 | config.mode = 0755; |
| 653 | ret = hugetlbfs_parse_options(data, &config); | 698 | ret = hugetlbfs_parse_options(data, &config); |
| 654 | |||
| 655 | if (ret) | 699 | if (ret) |
| 656 | return ret; | 700 | return ret; |
| 657 | 701 | ||
diff --git a/fs/inode.c b/fs/inode.c index 9a012cc5b6cd..320e088d0b28 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -145,7 +145,7 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
| 145 | mapping->a_ops = &empty_aops; | 145 | mapping->a_ops = &empty_aops; |
| 146 | mapping->host = inode; | 146 | mapping->host = inode; |
| 147 | mapping->flags = 0; | 147 | mapping->flags = 0; |
| 148 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER); | 148 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); |
| 149 | mapping->assoc_mapping = NULL; | 149 | mapping->assoc_mapping = NULL; |
| 150 | mapping->backing_dev_info = &default_backing_dev_info; | 150 | mapping->backing_dev_info = &default_backing_dev_info; |
| 151 | 151 | ||
| @@ -462,6 +462,11 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) | |||
| 462 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 462 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; |
| 463 | } | 463 | } |
| 464 | 464 | ||
| 465 | static struct shrinker icache_shrinker = { | ||
| 466 | .shrink = shrink_icache_memory, | ||
| 467 | .seeks = DEFAULT_SEEKS, | ||
| 468 | }; | ||
| 469 | |||
| 465 | static void __wait_on_freeing_inode(struct inode *inode); | 470 | static void __wait_on_freeing_inode(struct inode *inode); |
| 466 | /* | 471 | /* |
| 467 | * Called with the inode lock held. | 472 | * Called with the inode lock held. |
| @@ -519,7 +524,13 @@ repeat: | |||
| 519 | * new_inode - obtain an inode | 524 | * new_inode - obtain an inode |
| 520 | * @sb: superblock | 525 | * @sb: superblock |
| 521 | * | 526 | * |
| 522 | * Allocates a new inode for given superblock. | 527 | * Allocates a new inode for given superblock. The default gfp_mask |
| 528 | * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. | ||
| 529 | * If HIGHMEM pages are unsuitable or it is known that pages allocated | ||
| 530 | * for the page cache are not reclaimable or migratable, | ||
| 531 | * mapping_set_gfp_mask() must be called with suitable flags on the | ||
| 532 | * newly created inode's mapping | ||
| 533 | * | ||
| 523 | */ | 534 | */ |
| 524 | struct inode *new_inode(struct super_block *sb) | 535 | struct inode *new_inode(struct super_block *sb) |
| 525 | { | 536 | { |
| @@ -1379,7 +1390,7 @@ void __init inode_init(unsigned long mempages) | |||
| 1379 | SLAB_MEM_SPREAD), | 1390 | SLAB_MEM_SPREAD), |
| 1380 | init_once, | 1391 | init_once, |
| 1381 | NULL); | 1392 | NULL); |
| 1382 | set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); | 1393 | register_shrinker(&icache_shrinker); |
| 1383 | 1394 | ||
| 1384 | /* Hash may have been set up in inode_init_early */ | 1395 | /* Hash may have been set up in inode_init_early */ |
| 1385 | if (!hashdist) | 1396 | if (!hashdist) |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 8c90cbc903fa..c2a773e8620b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
| 13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/kallsyms.h> | ||
| 16 | 15 | ||
| 17 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
| 18 | #include <asm/ioctls.h> | 17 | #include <asm/ioctls.h> |
| @@ -21,7 +20,6 @@ static long do_ioctl(struct file *filp, unsigned int cmd, | |||
| 21 | unsigned long arg) | 20 | unsigned long arg) |
| 22 | { | 21 | { |
| 23 | int error = -ENOTTY; | 22 | int error = -ENOTTY; |
| 24 | void *f; | ||
| 25 | 23 | ||
| 26 | if (!filp->f_op) | 24 | if (!filp->f_op) |
| 27 | goto out; | 25 | goto out; |
| @@ -31,16 +29,10 @@ static long do_ioctl(struct file *filp, unsigned int cmd, | |||
| 31 | if (error == -ENOIOCTLCMD) | 29 | if (error == -ENOIOCTLCMD) |
| 32 | error = -EINVAL; | 30 | error = -EINVAL; |
| 33 | goto out; | 31 | goto out; |
| 34 | } else if ((f = filp->f_op->ioctl)) { | 32 | } else if (filp->f_op->ioctl) { |
| 35 | lock_kernel(); | 33 | lock_kernel(); |
| 36 | if (!filp->f_op->ioctl) { | 34 | error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, |
| 37 | printk("%s: ioctl %p disappeared\n", __FUNCTION__, f); | 35 | filp, cmd, arg); |
| 38 | print_symbol("symbol: %s\n", (unsigned long)f); | ||
| 39 | dump_stack(); | ||
| 40 | } else { | ||
| 41 | error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, | ||
| 42 | filp, cmd, arg); | ||
| 43 | } | ||
| 44 | unlock_kernel(); | 36 | unlock_kernel(); |
| 45 | } | 37 | } |
| 46 | 38 | ||
| @@ -182,11 +174,3 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) | |||
| 182 | out: | 174 | out: |
| 183 | return error; | 175 | return error; |
| 184 | } | 176 | } |
| 185 | |||
| 186 | /* | ||
| 187 | * Platforms implementing 32 bit compatibility ioctl handlers in | ||
| 188 | * modules need this exported | ||
| 189 | */ | ||
| 190 | #ifdef CONFIG_COMPAT | ||
| 191 | EXPORT_SYMBOL(sys_ioctl); | ||
| 192 | #endif | ||
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 0e94c31cad9b..1ba407c64df1 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
| @@ -7,34 +7,18 @@ | |||
| 7 | * | 7 | * |
| 8 | * Steve Beynon : Missing last directory entries fixed | 8 | * Steve Beynon : Missing last directory entries fixed |
| 9 | * (stephen@askone.demon.co.uk) : 21st June 1996 | 9 | * (stephen@askone.demon.co.uk) : 21st June 1996 |
| 10 | * | 10 | * |
| 11 | * isofs directory handling functions | 11 | * isofs directory handling functions |
| 12 | */ | 12 | */ |
| 13 | #include <linux/smp_lock.h> | 13 | #include <linux/smp_lock.h> |
| 14 | #include "isofs.h" | 14 | #include "isofs.h" |
| 15 | 15 | ||
| 16 | static int isofs_readdir(struct file *, void *, filldir_t); | ||
| 17 | |||
| 18 | const struct file_operations isofs_dir_operations = | ||
| 19 | { | ||
| 20 | .read = generic_read_dir, | ||
| 21 | .readdir = isofs_readdir, | ||
| 22 | }; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * directories can handle most operations... | ||
| 26 | */ | ||
| 27 | const struct inode_operations isofs_dir_inode_operations = | ||
| 28 | { | ||
| 29 | .lookup = isofs_lookup, | ||
| 30 | }; | ||
| 31 | |||
| 32 | int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) | 16 | int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) |
| 33 | { | 17 | { |
| 34 | char * old = de->name; | 18 | char * old = de->name; |
| 35 | int len = de->name_len[0]; | 19 | int len = de->name_len[0]; |
| 36 | int i; | 20 | int i; |
| 37 | 21 | ||
| 38 | for (i = 0; i < len; i++) { | 22 | for (i = 0; i < len; i++) { |
| 39 | unsigned char c = old[i]; | 23 | unsigned char c = old[i]; |
| 40 | if (!c) | 24 | if (!c) |
| @@ -62,22 +46,27 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod | |||
| 62 | } | 46 | } |
| 63 | 47 | ||
| 64 | /* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */ | 48 | /* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */ |
| 65 | int get_acorn_filename(struct iso_directory_record * de, | 49 | int get_acorn_filename(struct iso_directory_record *de, |
| 66 | char * retname, struct inode * inode) | 50 | char *retname, struct inode *inode) |
| 67 | { | 51 | { |
| 68 | int std; | 52 | int std; |
| 69 | unsigned char * chr; | 53 | unsigned char *chr; |
| 70 | int retnamlen = isofs_name_translate(de, retname, inode); | 54 | int retnamlen = isofs_name_translate(de, retname, inode); |
| 71 | if (retnamlen == 0) return 0; | 55 | |
| 56 | if (retnamlen == 0) | ||
| 57 | return 0; | ||
| 72 | std = sizeof(struct iso_directory_record) + de->name_len[0]; | 58 | std = sizeof(struct iso_directory_record) + de->name_len[0]; |
| 73 | if (std & 1) std++; | 59 | if (std & 1) |
| 74 | if ((*((unsigned char *) de) - std) != 32) return retnamlen; | 60 | std++; |
| 61 | if ((*((unsigned char *) de) - std) != 32) | ||
| 62 | return retnamlen; | ||
| 75 | chr = ((unsigned char *) de) + std; | 63 | chr = ((unsigned char *) de) + std; |
| 76 | if (strncmp(chr, "ARCHIMEDES", 10)) return retnamlen; | 64 | if (strncmp(chr, "ARCHIMEDES", 10)) |
| 77 | if ((*retname == '_') && ((chr[19] & 1) == 1)) *retname = '!'; | 65 | return retnamlen; |
| 66 | if ((*retname == '_') && ((chr[19] & 1) == 1)) | ||
| 67 | *retname = '!'; | ||
| 78 | if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) | 68 | if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) |
| 79 | && ((chr[12] & 0xf0) == 0xf0)) | 69 | && ((chr[12] & 0xf0) == 0xf0)) { |
| 80 | { | ||
| 81 | retname[retnamlen] = ','; | 70 | retname[retnamlen] = ','; |
| 82 | sprintf(retname+retnamlen+1, "%3.3x", | 71 | sprintf(retname+retnamlen+1, "%3.3x", |
| 83 | ((chr[12] & 0xf) << 8) | chr[11]); | 72 | ((chr[12] & 0xf) << 8) | chr[11]); |
| @@ -91,7 +80,7 @@ int get_acorn_filename(struct iso_directory_record * de, | |||
| 91 | */ | 80 | */ |
| 92 | static int do_isofs_readdir(struct inode *inode, struct file *filp, | 81 | static int do_isofs_readdir(struct inode *inode, struct file *filp, |
| 93 | void *dirent, filldir_t filldir, | 82 | void *dirent, filldir_t filldir, |
| 94 | char * tmpname, struct iso_directory_record * tmpde) | 83 | char *tmpname, struct iso_directory_record *tmpde) |
| 95 | { | 84 | { |
| 96 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
| 97 | unsigned char bufbits = ISOFS_BUFFER_BITS(inode); | 86 | unsigned char bufbits = ISOFS_BUFFER_BITS(inode); |
| @@ -121,9 +110,11 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 121 | 110 | ||
| 122 | de_len = *(unsigned char *) de; | 111 | de_len = *(unsigned char *) de; |
| 123 | 112 | ||
| 124 | /* If the length byte is zero, we should move on to the next | 113 | /* |
| 125 | CDROM sector. If we are at the end of the directory, we | 114 | * If the length byte is zero, we should move on to the next |
| 126 | kick out of the while loop. */ | 115 | * CDROM sector. If we are at the end of the directory, we |
| 116 | * kick out of the while loop. | ||
| 117 | */ | ||
| 127 | 118 | ||
| 128 | if (de_len == 0) { | 119 | if (de_len == 0) { |
| 129 | brelse(bh); | 120 | brelse(bh); |
| @@ -157,11 +148,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 157 | 148 | ||
| 158 | if (first_de) { | 149 | if (first_de) { |
| 159 | isofs_normalize_block_and_offset(de, | 150 | isofs_normalize_block_and_offset(de, |
| 160 | &block_saved, | 151 | &block_saved, |
| 161 | &offset_saved); | 152 | &offset_saved); |
| 162 | inode_number = isofs_get_ino(block_saved, | 153 | inode_number = isofs_get_ino(block_saved, |
| 163 | offset_saved, | 154 | offset_saved, bufbits); |
| 164 | bufbits); | ||
| 165 | } | 155 | } |
| 166 | 156 | ||
| 167 | if (de->flags[-sbi->s_high_sierra] & 0x80) { | 157 | if (de->flags[-sbi->s_high_sierra] & 0x80) { |
| @@ -199,7 +189,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 199 | */ | 189 | */ |
| 200 | if ((sbi->s_hide == 'y' && | 190 | if ((sbi->s_hide == 'y' && |
| 201 | (de->flags[-sbi->s_high_sierra] & 1)) || | 191 | (de->flags[-sbi->s_high_sierra] & 1)) || |
| 202 | (sbi->s_showassoc =='n' && | 192 | (sbi->s_showassoc =='n' && |
| 203 | (de->flags[-sbi->s_high_sierra] & 4))) { | 193 | (de->flags[-sbi->s_high_sierra] & 4))) { |
| 204 | filp->f_pos += de_len; | 194 | filp->f_pos += de_len; |
| 205 | continue; | 195 | continue; |
| @@ -240,7 +230,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 240 | 230 | ||
| 241 | continue; | 231 | continue; |
| 242 | } | 232 | } |
| 243 | if (bh) brelse(bh); | 233 | if (bh) |
| 234 | brelse(bh); | ||
| 244 | return 0; | 235 | return 0; |
| 245 | } | 236 | } |
| 246 | 237 | ||
| @@ -253,8 +244,8 @@ static int isofs_readdir(struct file *filp, | |||
| 253 | void *dirent, filldir_t filldir) | 244 | void *dirent, filldir_t filldir) |
| 254 | { | 245 | { |
| 255 | int result; | 246 | int result; |
| 256 | char * tmpname; | 247 | char *tmpname; |
| 257 | struct iso_directory_record * tmpde; | 248 | struct iso_directory_record *tmpde; |
| 258 | struct inode *inode = filp->f_path.dentry->d_inode; | 249 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 259 | 250 | ||
| 260 | tmpname = (char *)__get_free_page(GFP_KERNEL); | 251 | tmpname = (char *)__get_free_page(GFP_KERNEL); |
| @@ -270,3 +261,19 @@ static int isofs_readdir(struct file *filp, | |||
| 270 | unlock_kernel(); | 261 | unlock_kernel(); |
| 271 | return result; | 262 | return result; |
| 272 | } | 263 | } |
| 264 | |||
| 265 | const struct file_operations isofs_dir_operations = | ||
| 266 | { | ||
| 267 | .read = generic_read_dir, | ||
| 268 | .readdir = isofs_readdir, | ||
| 269 | }; | ||
| 270 | |||
| 271 | /* | ||
| 272 | * directories can handle most operations... | ||
| 273 | */ | ||
| 274 | const struct inode_operations isofs_dir_inode_operations = | ||
| 275 | { | ||
| 276 | .lookup = isofs_lookup, | ||
| 277 | }; | ||
| 278 | |||
| 279 | |||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 5c3eecf7542e..4f5418be0590 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -73,20 +73,20 @@ static void isofs_destroy_inode(struct inode *inode) | |||
| 73 | kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); | 73 | kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) | 76 | static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) |
| 77 | { | 77 | { |
| 78 | struct iso_inode_info *ei = foo; | 78 | struct iso_inode_info *ei = foo; |
| 79 | 79 | ||
| 80 | inode_init_once(&ei->vfs_inode); | 80 | inode_init_once(&ei->vfs_inode); |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | static int init_inodecache(void) | 83 | static int init_inodecache(void) |
| 84 | { | 84 | { |
| 85 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", | 85 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", |
| 86 | sizeof(struct iso_inode_info), | 86 | sizeof(struct iso_inode_info), |
| 87 | 0, (SLAB_RECLAIM_ACCOUNT| | 87 | 0, (SLAB_RECLAIM_ACCOUNT| |
| 88 | SLAB_MEM_SPREAD), | 88 | SLAB_MEM_SPREAD), |
| 89 | init_once, NULL); | 89 | init_once, NULL); |
| 90 | if (isofs_inode_cachep == NULL) | 90 | if (isofs_inode_cachep == NULL) |
| 91 | return -ENOMEM; | 91 | return -ENOMEM; |
| 92 | return 0; | 92 | return 0; |
| @@ -150,9 +150,9 @@ struct iso9660_options{ | |||
| 150 | uid_t uid; | 150 | uid_t uid; |
| 151 | char *iocharset; | 151 | char *iocharset; |
| 152 | unsigned char utf8; | 152 | unsigned char utf8; |
| 153 | /* LVE */ | 153 | /* LVE */ |
| 154 | s32 session; | 154 | s32 session; |
| 155 | s32 sbsector; | 155 | s32 sbsector; |
| 156 | }; | 156 | }; |
| 157 | 157 | ||
| 158 | /* | 158 | /* |
| @@ -197,7 +197,7 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) | |||
| 197 | hash = init_name_hash(); | 197 | hash = init_name_hash(); |
| 198 | while (len--) { | 198 | while (len--) { |
| 199 | c = tolower(*name++); | 199 | c = tolower(*name++); |
| 200 | hash = partial_name_hash(tolower(c), hash); | 200 | hash = partial_name_hash(c, hash); |
| 201 | } | 201 | } |
| 202 | qstr->hash = end_name_hash(hash); | 202 | qstr->hash = end_name_hash(hash); |
| 203 | 203 | ||
| @@ -360,10 +360,12 @@ static int parse_options(char *options, struct iso9660_options *popt) | |||
| 360 | popt->check = 'u'; /* unset */ | 360 | popt->check = 'u'; /* unset */ |
| 361 | popt->nocompress = 0; | 361 | popt->nocompress = 0; |
| 362 | popt->blocksize = 1024; | 362 | popt->blocksize = 1024; |
| 363 | popt->mode = S_IRUGO | S_IXUGO; /* r-x for all. The disc could | 363 | popt->mode = S_IRUGO | S_IXUGO; /* |
| 364 | be shared with DOS machines so | 364 | * r-x for all. The disc could |
| 365 | virtually anything could be | 365 | * be shared with DOS machines so |
| 366 | a valid executable. */ | 366 | * virtually anything could be |
| 367 | * a valid executable. | ||
| 368 | */ | ||
| 367 | popt->gid = 0; | 369 | popt->gid = 0; |
| 368 | popt->uid = 0; | 370 | popt->uid = 0; |
| 369 | popt->iocharset = NULL; | 371 | popt->iocharset = NULL; |
| @@ -503,30 +505,30 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) | |||
| 503 | Te.cdte_format=CDROM_LBA; | 505 | Te.cdte_format=CDROM_LBA; |
| 504 | i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); | 506 | i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te); |
| 505 | if (!i) { | 507 | if (!i) { |
| 506 | printk(KERN_DEBUG "Session %d start %d type %d\n", | 508 | printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n", |
| 507 | session, Te.cdte_addr.lba, | 509 | session, Te.cdte_addr.lba, |
| 508 | Te.cdte_ctrl&CDROM_DATA_TRACK); | 510 | Te.cdte_ctrl&CDROM_DATA_TRACK); |
| 509 | if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) | 511 | if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4) |
| 510 | return Te.cdte_addr.lba; | 512 | return Te.cdte_addr.lba; |
| 511 | } | 513 | } |
| 512 | 514 | ||
| 513 | printk(KERN_ERR "Invalid session number or type of track\n"); | 515 | printk(KERN_ERR "ISOFS: Invalid session number or type of track\n"); |
| 514 | } | 516 | } |
| 515 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); | 517 | i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); |
| 516 | if (session > 0) | 518 | if (session > 0) |
| 517 | printk(KERN_ERR "Invalid session number\n"); | 519 | printk(KERN_ERR "ISOFS: Invalid session number\n"); |
| 518 | #if 0 | 520 | #if 0 |
| 519 | printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); | 521 | printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i); |
| 520 | if (i==0) { | 522 | if (i==0) { |
| 521 | printk("isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); | 523 | printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no"); |
| 522 | printk("isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); | 524 | printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba); |
| 523 | } | 525 | } |
| 524 | #endif | 526 | #endif |
| 525 | if (i==0) | 527 | if (i==0) |
| 526 | #if WE_OBEY_THE_WRITTEN_STANDARDS | 528 | #if WE_OBEY_THE_WRITTEN_STANDARDS |
| 527 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ | 529 | if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ |
| 528 | #endif | 530 | #endif |
| 529 | vol_desc_start=ms_info.addr.lba; | 531 | vol_desc_start=ms_info.addr.lba; |
| 530 | return vol_desc_start; | 532 | return vol_desc_start; |
| 531 | } | 533 | } |
| 532 | 534 | ||
| @@ -538,20 +540,20 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) | |||
| 538 | */ | 540 | */ |
| 539 | static int isofs_fill_super(struct super_block *s, void *data, int silent) | 541 | static int isofs_fill_super(struct super_block *s, void *data, int silent) |
| 540 | { | 542 | { |
| 541 | struct buffer_head * bh = NULL, *pri_bh = NULL; | 543 | struct buffer_head *bh = NULL, *pri_bh = NULL; |
| 542 | struct hs_primary_descriptor * h_pri = NULL; | 544 | struct hs_primary_descriptor *h_pri = NULL; |
| 543 | struct iso_primary_descriptor * pri = NULL; | 545 | struct iso_primary_descriptor *pri = NULL; |
| 544 | struct iso_supplementary_descriptor *sec = NULL; | 546 | struct iso_supplementary_descriptor *sec = NULL; |
| 545 | struct iso_directory_record * rootp; | 547 | struct iso_directory_record *rootp; |
| 546 | int joliet_level = 0; | 548 | struct inode *inode; |
| 547 | int iso_blknum, block; | 549 | struct iso9660_options opt; |
| 548 | int orig_zonesize; | 550 | struct isofs_sb_info *sbi; |
| 549 | int table; | 551 | unsigned long first_data_zone; |
| 550 | unsigned int vol_desc_start; | 552 | int joliet_level = 0; |
| 551 | unsigned long first_data_zone; | 553 | int iso_blknum, block; |
| 552 | struct inode * inode; | 554 | int orig_zonesize; |
| 553 | struct iso9660_options opt; | 555 | int table; |
| 554 | struct isofs_sb_info * sbi; | 556 | unsigned int vol_desc_start; |
| 555 | 557 | ||
| 556 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 558 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
| 557 | if (!sbi) | 559 | if (!sbi) |
| @@ -577,72 +579,73 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) | |||
| 577 | vol_desc_start = (opt.sbsector != -1) ? | 579 | vol_desc_start = (opt.sbsector != -1) ? |
| 578 | opt.sbsector : isofs_get_last_session(s,opt.session); | 580 | opt.sbsector : isofs_get_last_session(s,opt.session); |
| 579 | 581 | ||
| 580 | for (iso_blknum = vol_desc_start+16; | 582 | for (iso_blknum = vol_desc_start+16; |
| 581 | iso_blknum < vol_desc_start+100; iso_blknum++) | 583 | iso_blknum < vol_desc_start+100; iso_blknum++) { |
| 582 | { | 584 | struct hs_volume_descriptor *hdp; |
| 583 | struct hs_volume_descriptor * hdp; | 585 | struct iso_volume_descriptor *vdp; |
| 584 | struct iso_volume_descriptor * vdp; | 586 | |
| 585 | 587 | block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); | |
| 586 | block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits); | 588 | if (!(bh = sb_bread(s, block))) |
| 587 | if (!(bh = sb_bread(s, block))) | 589 | goto out_no_read; |
| 588 | goto out_no_read; | 590 | |
| 589 | 591 | vdp = (struct iso_volume_descriptor *)bh->b_data; | |
| 590 | vdp = (struct iso_volume_descriptor *)bh->b_data; | 592 | hdp = (struct hs_volume_descriptor *)bh->b_data; |
| 591 | hdp = (struct hs_volume_descriptor *)bh->b_data; | 593 | |
| 592 | 594 | /* | |
| 593 | /* Due to the overlapping physical location of the descriptors, | 595 | * Due to the overlapping physical location of the descriptors, |
| 594 | * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure | 596 | * ISO CDs can match hdp->id==HS_STANDARD_ID as well. To ensure |
| 595 | * proper identification in this case, we first check for ISO. | 597 | * proper identification in this case, we first check for ISO. |
| 596 | */ | 598 | */ |
| 597 | if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { | 599 | if (strncmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) == 0) { |
| 598 | if (isonum_711 (vdp->type) == ISO_VD_END) | 600 | if (isonum_711(vdp->type) == ISO_VD_END) |
| 599 | break; | 601 | break; |
| 600 | if (isonum_711 (vdp->type) == ISO_VD_PRIMARY) { | 602 | if (isonum_711(vdp->type) == ISO_VD_PRIMARY) { |
| 601 | if (pri == NULL) { | 603 | if (pri == NULL) { |
| 602 | pri = (struct iso_primary_descriptor *)vdp; | 604 | pri = (struct iso_primary_descriptor *)vdp; |
| 603 | /* Save the buffer in case we need it ... */ | 605 | /* Save the buffer in case we need it ... */ |
| 604 | pri_bh = bh; | 606 | pri_bh = bh; |
| 605 | bh = NULL; | 607 | bh = NULL; |
| 606 | } | 608 | } |
| 607 | } | 609 | } |
| 608 | #ifdef CONFIG_JOLIET | 610 | #ifdef CONFIG_JOLIET |
| 609 | else if (isonum_711 (vdp->type) == ISO_VD_SUPPLEMENTARY) { | 611 | else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { |
| 610 | sec = (struct iso_supplementary_descriptor *)vdp; | 612 | sec = (struct iso_supplementary_descriptor *)vdp; |
| 611 | if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { | 613 | if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { |
| 612 | if (opt.joliet == 'y') { | 614 | if (opt.joliet == 'y') { |
| 613 | if (sec->escape[2] == 0x40) { | 615 | if (sec->escape[2] == 0x40) |
| 614 | joliet_level = 1; | 616 | joliet_level = 1; |
| 615 | } else if (sec->escape[2] == 0x43) { | 617 | else if (sec->escape[2] == 0x43) |
| 616 | joliet_level = 2; | 618 | joliet_level = 2; |
| 617 | } else if (sec->escape[2] == 0x45) { | 619 | else if (sec->escape[2] == 0x45) |
| 618 | joliet_level = 3; | 620 | joliet_level = 3; |
| 619 | } | 621 | |
| 620 | printk(KERN_DEBUG"ISO 9660 Extensions: Microsoft Joliet Level %d\n", | 622 | printk(KERN_DEBUG "ISO 9660 Extensions: " |
| 621 | joliet_level); | 623 | "Microsoft Joliet Level %d\n", |
| 624 | joliet_level); | ||
| 625 | } | ||
| 626 | goto root_found; | ||
| 627 | } else { | ||
| 628 | /* Unknown supplementary volume descriptor */ | ||
| 629 | sec = NULL; | ||
| 630 | } | ||
| 622 | } | 631 | } |
| 623 | goto root_found; | ||
| 624 | } else { | ||
| 625 | /* Unknown supplementary volume descriptor */ | ||
| 626 | sec = NULL; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | #endif | 632 | #endif |
| 630 | } else { | 633 | } else { |
| 631 | if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { | 634 | if (strncmp (hdp->id, HS_STANDARD_ID, sizeof hdp->id) == 0) { |
| 632 | if (isonum_711 (hdp->type) != ISO_VD_PRIMARY) | 635 | if (isonum_711(hdp->type) != ISO_VD_PRIMARY) |
| 633 | goto out_freebh; | 636 | goto out_freebh; |
| 634 | 637 | ||
| 635 | sbi->s_high_sierra = 1; | 638 | sbi->s_high_sierra = 1; |
| 636 | opt.rock = 'n'; | 639 | opt.rock = 'n'; |
| 637 | h_pri = (struct hs_primary_descriptor *)vdp; | 640 | h_pri = (struct hs_primary_descriptor *)vdp; |
| 638 | goto root_found; | 641 | goto root_found; |
| 642 | } | ||
| 639 | } | 643 | } |
| 640 | } | ||
| 641 | 644 | ||
| 642 | /* Just skip any volume descriptors we don't recognize */ | 645 | /* Just skip any volume descriptors we don't recognize */ |
| 643 | 646 | ||
| 644 | brelse(bh); | 647 | brelse(bh); |
| 645 | bh = NULL; | 648 | bh = NULL; |
| 646 | } | 649 | } |
| 647 | /* | 650 | /* |
| 648 | * If we fall through, either no volume descriptor was found, | 651 | * If we fall through, either no volume descriptor was found, |
| @@ -657,24 +660,24 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) | |||
| 657 | root_found: | 660 | root_found: |
| 658 | 661 | ||
| 659 | if (joliet_level && (pri == NULL || opt.rock == 'n')) { | 662 | if (joliet_level && (pri == NULL || opt.rock == 'n')) { |
| 660 | /* This is the case of Joliet with the norock mount flag. | 663 | /* This is the case of Joliet with the norock mount flag. |
| 661 | * A disc with both Joliet and Rock Ridge is handled later | 664 | * A disc with both Joliet and Rock Ridge is handled later |
| 662 | */ | 665 | */ |
| 663 | pri = (struct iso_primary_descriptor *) sec; | 666 | pri = (struct iso_primary_descriptor *) sec; |
| 664 | } | 667 | } |
| 665 | 668 | ||
| 666 | if(sbi->s_high_sierra){ | 669 | if(sbi->s_high_sierra){ |
| 667 | rootp = (struct iso_directory_record *) h_pri->root_directory_record; | 670 | rootp = (struct iso_directory_record *) h_pri->root_directory_record; |
| 668 | sbi->s_nzones = isonum_733 (h_pri->volume_space_size); | 671 | sbi->s_nzones = isonum_733(h_pri->volume_space_size); |
| 669 | sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size); | 672 | sbi->s_log_zone_size = isonum_723(h_pri->logical_block_size); |
| 670 | sbi->s_max_size = isonum_733(h_pri->volume_space_size); | 673 | sbi->s_max_size = isonum_733(h_pri->volume_space_size); |
| 671 | } else { | 674 | } else { |
| 672 | if (!pri) | 675 | if (!pri) |
| 673 | goto out_freebh; | 676 | goto out_freebh; |
| 674 | rootp = (struct iso_directory_record *) pri->root_directory_record; | 677 | rootp = (struct iso_directory_record *) pri->root_directory_record; |
| 675 | sbi->s_nzones = isonum_733 (pri->volume_space_size); | 678 | sbi->s_nzones = isonum_733(pri->volume_space_size); |
| 676 | sbi->s_log_zone_size = isonum_723 (pri->logical_block_size); | 679 | sbi->s_log_zone_size = isonum_723(pri->logical_block_size); |
| 677 | sbi->s_max_size = isonum_733(pri->volume_space_size); | 680 | sbi->s_max_size = isonum_733(pri->volume_space_size); |
| 678 | } | 681 | } |
| 679 | 682 | ||
| 680 | sbi->s_ninodes = 0; /* No way to figure this out easily */ | 683 | sbi->s_ninodes = 0; /* No way to figure this out easily */ |
| @@ -687,42 +690,43 @@ root_found: | |||
| 687 | * blocks that were 512 bytes (which should only very rarely | 690 | * blocks that were 512 bytes (which should only very rarely |
| 688 | * happen.) | 691 | * happen.) |
| 689 | */ | 692 | */ |
| 690 | if(orig_zonesize < opt.blocksize) | 693 | if (orig_zonesize < opt.blocksize) |
| 691 | goto out_bad_size; | 694 | goto out_bad_size; |
| 692 | 695 | ||
| 693 | /* RDE: convert log zone size to bit shift */ | 696 | /* RDE: convert log zone size to bit shift */ |
| 694 | switch (sbi->s_log_zone_size) | 697 | switch (sbi->s_log_zone_size) { |
| 695 | { case 512: sbi->s_log_zone_size = 9; break; | 698 | case 512: sbi->s_log_zone_size = 9; break; |
| 696 | case 1024: sbi->s_log_zone_size = 10; break; | 699 | case 1024: sbi->s_log_zone_size = 10; break; |
| 697 | case 2048: sbi->s_log_zone_size = 11; break; | 700 | case 2048: sbi->s_log_zone_size = 11; break; |
| 698 | 701 | ||
| 699 | default: | 702 | default: |
| 700 | goto out_bad_zone_size; | 703 | goto out_bad_zone_size; |
| 701 | } | 704 | } |
| 702 | 705 | ||
| 703 | s->s_magic = ISOFS_SUPER_MAGIC; | 706 | s->s_magic = ISOFS_SUPER_MAGIC; |
| 704 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ | 707 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ |
| 705 | 708 | ||
| 706 | /* The CDROM is read-only, has no nodes (devices) on it, and since | 709 | /* |
| 707 | all of the files appear to be owned by root, we really do not want | 710 | * The CDROM is read-only, has no nodes (devices) on it, and since |
| 708 | to allow suid. (suid or devices will not show up unless we have | 711 | * all of the files appear to be owned by root, we really do not want |
| 709 | Rock Ridge extensions) */ | 712 | * to allow suid. (suid or devices will not show up unless we have |
| 713 | * Rock Ridge extensions) | ||
| 714 | */ | ||
| 710 | 715 | ||
| 711 | s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; | 716 | s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; |
| 712 | 717 | ||
| 713 | /* Set this for reference. Its not currently used except on write | 718 | /* Set this for reference. Its not currently used except on write |
| 714 | which we don't have .. */ | 719 | which we don't have .. */ |
| 715 | 720 | ||
| 716 | first_data_zone = isonum_733 (rootp->extent) + | 721 | first_data_zone = isonum_733(rootp->extent) + |
| 717 | isonum_711 (rootp->ext_attr_length); | 722 | isonum_711(rootp->ext_attr_length); |
| 718 | sbi->s_firstdatazone = first_data_zone; | 723 | sbi->s_firstdatazone = first_data_zone; |
| 719 | #ifndef BEQUIET | 724 | #ifndef BEQUIET |
| 720 | printk(KERN_DEBUG "Max size:%ld Log zone size:%ld\n", | 725 | printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n", |
| 721 | sbi->s_max_size, | 726 | sbi->s_max_size, 1UL << sbi->s_log_zone_size); |
| 722 | 1UL << sbi->s_log_zone_size); | 727 | printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone); |
| 723 | printk(KERN_DEBUG "First datazone:%ld\n", sbi->s_firstdatazone); | ||
| 724 | if(sbi->s_high_sierra) | 728 | if(sbi->s_high_sierra) |
| 725 | printk(KERN_DEBUG "Disc in High Sierra format.\n"); | 729 | printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n"); |
| 726 | #endif | 730 | #endif |
| 727 | 731 | ||
| 728 | /* | 732 | /* |
| @@ -737,8 +741,8 @@ root_found: | |||
| 737 | pri = (struct iso_primary_descriptor *) sec; | 741 | pri = (struct iso_primary_descriptor *) sec; |
| 738 | rootp = (struct iso_directory_record *) | 742 | rootp = (struct iso_directory_record *) |
| 739 | pri->root_directory_record; | 743 | pri->root_directory_record; |
| 740 | first_data_zone = isonum_733 (rootp->extent) + | 744 | first_data_zone = isonum_733(rootp->extent) + |
| 741 | isonum_711 (rootp->ext_attr_length); | 745 | isonum_711(rootp->ext_attr_length); |
| 742 | } | 746 | } |
| 743 | 747 | ||
| 744 | /* | 748 | /* |
| @@ -771,7 +775,7 @@ root_found: | |||
| 771 | 775 | ||
| 772 | #ifdef CONFIG_JOLIET | 776 | #ifdef CONFIG_JOLIET |
| 773 | if (joliet_level && opt.utf8 == 0) { | 777 | if (joliet_level && opt.utf8 == 0) { |
| 774 | char * p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; | 778 | char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; |
| 775 | sbi->s_nls_iocharset = load_nls(p); | 779 | sbi->s_nls_iocharset = load_nls(p); |
| 776 | if (! sbi->s_nls_iocharset) { | 780 | if (! sbi->s_nls_iocharset) { |
| 777 | /* Fail only if explicit charset specified */ | 781 | /* Fail only if explicit charset specified */ |
| @@ -821,7 +825,7 @@ root_found: | |||
| 821 | sbi->s_rock = 0; | 825 | sbi->s_rock = 0; |
| 822 | if (sbi->s_firstdatazone != first_data_zone) { | 826 | if (sbi->s_firstdatazone != first_data_zone) { |
| 823 | sbi->s_firstdatazone = first_data_zone; | 827 | sbi->s_firstdatazone = first_data_zone; |
| 824 | printk(KERN_DEBUG | 828 | printk(KERN_DEBUG |
| 825 | "ISOFS: changing to secondary root\n"); | 829 | "ISOFS: changing to secondary root\n"); |
| 826 | iput(inode); | 830 | iput(inode); |
| 827 | inode = isofs_iget(s, sbi->s_firstdatazone, 0); | 831 | inode = isofs_iget(s, sbi->s_firstdatazone, 0); |
| @@ -830,8 +834,10 @@ root_found: | |||
| 830 | 834 | ||
| 831 | if (opt.check == 'u') { | 835 | if (opt.check == 'u') { |
| 832 | /* Only Joliet is case insensitive by default */ | 836 | /* Only Joliet is case insensitive by default */ |
| 833 | if (joliet_level) opt.check = 'r'; | 837 | if (joliet_level) |
| 834 | else opt.check = 's'; | 838 | opt.check = 'r'; |
| 839 | else | ||
| 840 | opt.check = 's'; | ||
| 835 | } | 841 | } |
| 836 | sbi->s_joliet_level = joliet_level; | 842 | sbi->s_joliet_level = joliet_level; |
| 837 | 843 | ||
| @@ -846,8 +852,10 @@ root_found: | |||
| 846 | goto out_no_root; | 852 | goto out_no_root; |
| 847 | 853 | ||
| 848 | table = 0; | 854 | table = 0; |
| 849 | if (joliet_level) table += 2; | 855 | if (joliet_level) |
| 850 | if (opt.check == 'r') table++; | 856 | table += 2; |
| 857 | if (opt.check == 'r') | ||
| 858 | table++; | ||
| 851 | s->s_root->d_op = &isofs_dentry_ops[table]; | 859 | s->s_root->d_op = &isofs_dentry_ops[table]; |
| 852 | 860 | ||
| 853 | kfree(opt.iocharset); | 861 | kfree(opt.iocharset); |
| @@ -858,10 +866,10 @@ root_found: | |||
| 858 | * Display error messages and free resources. | 866 | * Display error messages and free resources. |
| 859 | */ | 867 | */ |
| 860 | out_bad_root: | 868 | out_bad_root: |
| 861 | printk(KERN_WARNING "isofs_fill_super: root inode not initialized\n"); | 869 | printk(KERN_WARNING "%s: root inode not initialized\n", __func__); |
| 862 | goto out_iput; | 870 | goto out_iput; |
| 863 | out_no_root: | 871 | out_no_root: |
| 864 | printk(KERN_WARNING "isofs_fill_super: get root inode failed\n"); | 872 | printk(KERN_WARNING "%s: get root inode failed\n", __func__); |
| 865 | out_iput: | 873 | out_iput: |
| 866 | iput(inode); | 874 | iput(inode); |
| 867 | #ifdef CONFIG_JOLIET | 875 | #ifdef CONFIG_JOLIET |
| @@ -870,21 +878,20 @@ out_iput: | |||
| 870 | #endif | 878 | #endif |
| 871 | goto out_freesbi; | 879 | goto out_freesbi; |
| 872 | out_no_read: | 880 | out_no_read: |
| 873 | printk(KERN_WARNING "isofs_fill_super: " | 881 | printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", |
| 874 | "bread failed, dev=%s, iso_blknum=%d, block=%d\n", | 882 | __func__, s->s_id, iso_blknum, block); |
| 875 | s->s_id, iso_blknum, block); | ||
| 876 | goto out_freesbi; | 883 | goto out_freesbi; |
| 877 | out_bad_zone_size: | 884 | out_bad_zone_size: |
| 878 | printk(KERN_WARNING "Bad logical zone size %ld\n", | 885 | printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", |
| 879 | sbi->s_log_zone_size); | 886 | sbi->s_log_zone_size); |
| 880 | goto out_freebh; | 887 | goto out_freebh; |
| 881 | out_bad_size: | 888 | out_bad_size: |
| 882 | printk(KERN_WARNING "Logical zone size(%d) < hardware blocksize(%u)\n", | 889 | printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n", |
| 883 | orig_zonesize, opt.blocksize); | 890 | orig_zonesize, opt.blocksize); |
| 884 | goto out_freebh; | 891 | goto out_freebh; |
| 885 | out_unknown_format: | 892 | out_unknown_format: |
| 886 | if (!silent) | 893 | if (!silent) |
| 887 | printk(KERN_WARNING "Unable to identify CD-ROM format.\n"); | 894 | printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n"); |
| 888 | 895 | ||
| 889 | out_freebh: | 896 | out_freebh: |
| 890 | brelse(bh); | 897 | brelse(bh); |
| @@ -902,7 +909,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) | |||
| 902 | buf->f_type = ISOFS_SUPER_MAGIC; | 909 | buf->f_type = ISOFS_SUPER_MAGIC; |
| 903 | buf->f_bsize = sb->s_blocksize; | 910 | buf->f_bsize = sb->s_blocksize; |
| 904 | buf->f_blocks = (ISOFS_SB(sb)->s_nzones | 911 | buf->f_blocks = (ISOFS_SB(sb)->s_nzones |
| 905 | << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); | 912 | << (ISOFS_SB(sb)->s_log_zone_size - sb->s_blocksize_bits)); |
| 906 | buf->f_bfree = 0; | 913 | buf->f_bfree = 0; |
| 907 | buf->f_bavail = 0; | 914 | buf->f_bavail = 0; |
| 908 | buf->f_files = ISOFS_SB(sb)->s_ninodes; | 915 | buf->f_files = ISOFS_SB(sb)->s_ninodes; |
| @@ -931,20 +938,20 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
| 931 | 938 | ||
| 932 | rv = 0; | 939 | rv = 0; |
| 933 | if (iblock < 0 || iblock != iblock_s) { | 940 | if (iblock < 0 || iblock != iblock_s) { |
| 934 | printk("isofs_get_blocks: block number too large\n"); | 941 | printk(KERN_DEBUG "%s: block number too large\n", __func__); |
| 935 | goto abort; | 942 | goto abort; |
| 936 | } | 943 | } |
| 937 | 944 | ||
| 938 | b_off = iblock; | 945 | b_off = iblock; |
| 939 | 946 | ||
| 940 | offset = 0; | 947 | offset = 0; |
| 941 | firstext = ei->i_first_extent; | 948 | firstext = ei->i_first_extent; |
| 942 | sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); | 949 | sect_size = ei->i_section_size >> ISOFS_BUFFER_BITS(inode); |
| 943 | nextblk = ei->i_next_section_block; | 950 | nextblk = ei->i_next_section_block; |
| 944 | nextoff = ei->i_next_section_offset; | 951 | nextoff = ei->i_next_section_offset; |
| 945 | section = 0; | 952 | section = 0; |
| 946 | 953 | ||
| 947 | while ( nblocks ) { | 954 | while (nblocks) { |
| 948 | /* If we are *way* beyond the end of the file, print a message. | 955 | /* If we are *way* beyond the end of the file, print a message. |
| 949 | * Access beyond the end of the file up to the next page boundary | 956 | * Access beyond the end of the file up to the next page boundary |
| 950 | * is normal, however because of the way the page cache works. | 957 | * is normal, however because of the way the page cache works. |
| @@ -953,11 +960,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
| 953 | * I/O errors. | 960 | * I/O errors. |
| 954 | */ | 961 | */ |
| 955 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { | 962 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { |
| 956 | printk("isofs_get_blocks: block >= EOF (%ld, %ld)\n", | 963 | printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n", |
| 957 | iblock, (unsigned long) inode->i_size); | 964 | __func__, iblock, (unsigned long) inode->i_size); |
| 958 | goto abort; | 965 | goto abort; |
| 959 | } | 966 | } |
| 960 | 967 | ||
| 961 | /* On the last section, nextblk == 0, section size is likely to | 968 | /* On the last section, nextblk == 0, section size is likely to |
| 962 | * exceed sect_size by a partial block, and access beyond the | 969 | * exceed sect_size by a partial block, and access beyond the |
| 963 | * end of the file will reach beyond the section size, too. | 970 | * end of the file will reach beyond the section size, too. |
| @@ -976,20 +983,21 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
| 976 | iput(ninode); | 983 | iput(ninode); |
| 977 | 984 | ||
| 978 | if (++section > 100) { | 985 | if (++section > 100) { |
| 979 | printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); | 986 | printk(KERN_DEBUG "%s: More than 100 file sections ?!?" |
| 980 | printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " | 987 | " aborting...\n", __func__); |
| 981 | "nextblk=%lu nextoff=%lu\n", | 988 | printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u " |
| 982 | iblock, firstext, (unsigned) sect_size, | 989 | "nextblk=%lu nextoff=%lu\n", __func__, |
| 983 | nextblk, nextoff); | 990 | iblock, firstext, (unsigned) sect_size, |
| 991 | nextblk, nextoff); | ||
| 984 | goto abort; | 992 | goto abort; |
| 985 | } | 993 | } |
| 986 | } | 994 | } |
| 987 | 995 | ||
| 988 | if ( *bh ) { | 996 | if (*bh) { |
| 989 | map_bh(*bh, inode->i_sb, firstext + b_off - offset); | 997 | map_bh(*bh, inode->i_sb, firstext + b_off - offset); |
| 990 | } else { | 998 | } else { |
| 991 | *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); | 999 | *bh = sb_getblk(inode->i_sb, firstext+b_off-offset); |
| 992 | if ( !*bh ) | 1000 | if (!*bh) |
| 993 | goto abort; | 1001 | goto abort; |
| 994 | } | 1002 | } |
| 995 | bh++; /* Next buffer head */ | 1003 | bh++; /* Next buffer head */ |
| @@ -1010,7 +1018,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock, | |||
| 1010 | struct buffer_head *bh_result, int create) | 1018 | struct buffer_head *bh_result, int create) |
| 1011 | { | 1019 | { |
| 1012 | if (create) { | 1020 | if (create) { |
| 1013 | printk("isofs_get_block: Kernel tries to allocate a block\n"); | 1021 | printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__); |
| 1014 | return -EROFS; | 1022 | return -EROFS; |
| 1015 | } | 1023 | } |
| 1016 | 1024 | ||
| @@ -1070,11 +1078,11 @@ static int isofs_read_level3_size(struct inode *inode) | |||
| 1070 | { | 1078 | { |
| 1071 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 1079 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
| 1072 | int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; | 1080 | int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; |
| 1073 | struct buffer_head * bh = NULL; | 1081 | struct buffer_head *bh = NULL; |
| 1074 | unsigned long block, offset, block_saved, offset_saved; | 1082 | unsigned long block, offset, block_saved, offset_saved; |
| 1075 | int i = 0; | 1083 | int i = 0; |
| 1076 | int more_entries = 0; | 1084 | int more_entries = 0; |
| 1077 | struct iso_directory_record * tmpde = NULL; | 1085 | struct iso_directory_record *tmpde = NULL; |
| 1078 | struct iso_inode_info *ei = ISOFS_I(inode); | 1086 | struct iso_inode_info *ei = ISOFS_I(inode); |
| 1079 | 1087 | ||
| 1080 | inode->i_size = 0; | 1088 | inode->i_size = 0; |
| @@ -1089,7 +1097,7 @@ static int isofs_read_level3_size(struct inode *inode) | |||
| 1089 | offset = ei->i_iget5_offset; | 1097 | offset = ei->i_iget5_offset; |
| 1090 | 1098 | ||
| 1091 | do { | 1099 | do { |
| 1092 | struct iso_directory_record * de; | 1100 | struct iso_directory_record *de; |
| 1093 | unsigned int de_len; | 1101 | unsigned int de_len; |
| 1094 | 1102 | ||
| 1095 | if (!bh) { | 1103 | if (!bh) { |
| @@ -1163,10 +1171,9 @@ out_noread: | |||
| 1163 | return -EIO; | 1171 | return -EIO; |
| 1164 | 1172 | ||
| 1165 | out_toomany: | 1173 | out_toomany: |
| 1166 | printk(KERN_INFO "isofs_read_level3_size: " | 1174 | printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n" |
| 1167 | "More than 100 file sections ?!?, aborting...\n" | 1175 | "isofs_read_level3_size: inode=%lu\n", |
| 1168 | "isofs_read_level3_size: inode=%lu\n", | 1176 | __func__, inode->i_ino); |
| 1169 | inode->i_ino); | ||
| 1170 | goto out; | 1177 | goto out; |
| 1171 | } | 1178 | } |
| 1172 | 1179 | ||
| @@ -1177,9 +1184,9 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1177 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 1184 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
| 1178 | unsigned long block; | 1185 | unsigned long block; |
| 1179 | int high_sierra = sbi->s_high_sierra; | 1186 | int high_sierra = sbi->s_high_sierra; |
| 1180 | struct buffer_head * bh = NULL; | 1187 | struct buffer_head *bh = NULL; |
| 1181 | struct iso_directory_record * de; | 1188 | struct iso_directory_record *de; |
| 1182 | struct iso_directory_record * tmpde = NULL; | 1189 | struct iso_directory_record *tmpde = NULL; |
| 1183 | unsigned int de_len; | 1190 | unsigned int de_len; |
| 1184 | unsigned long offset; | 1191 | unsigned long offset; |
| 1185 | struct iso_inode_info *ei = ISOFS_I(inode); | 1192 | struct iso_inode_info *ei = ISOFS_I(inode); |
| @@ -1199,7 +1206,7 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1199 | 1206 | ||
| 1200 | tmpde = kmalloc(de_len, GFP_KERNEL); | 1207 | tmpde = kmalloc(de_len, GFP_KERNEL); |
| 1201 | if (tmpde == NULL) { | 1208 | if (tmpde == NULL) { |
| 1202 | printk(KERN_INFO "isofs_read_inode: out of memory\n"); | 1209 | printk(KERN_INFO "%s: out of memory\n", __func__); |
| 1203 | goto fail; | 1210 | goto fail; |
| 1204 | } | 1211 | } |
| 1205 | memcpy(tmpde, bh->b_data + offset, frag1); | 1212 | memcpy(tmpde, bh->b_data + offset, frag1); |
| @@ -1212,24 +1219,26 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1212 | } | 1219 | } |
| 1213 | 1220 | ||
| 1214 | inode->i_ino = isofs_get_ino(ei->i_iget5_block, | 1221 | inode->i_ino = isofs_get_ino(ei->i_iget5_block, |
| 1215 | ei->i_iget5_offset, | 1222 | ei->i_iget5_offset, |
| 1216 | ISOFS_BUFFER_BITS(inode)); | 1223 | ISOFS_BUFFER_BITS(inode)); |
| 1217 | 1224 | ||
| 1218 | /* Assume it is a normal-format file unless told otherwise */ | 1225 | /* Assume it is a normal-format file unless told otherwise */ |
| 1219 | ei->i_file_format = isofs_file_normal; | 1226 | ei->i_file_format = isofs_file_normal; |
| 1220 | 1227 | ||
| 1221 | if (de->flags[-high_sierra] & 2) { | 1228 | if (de->flags[-high_sierra] & 2) { |
| 1222 | inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; | 1229 | inode->i_mode = S_IRUGO | S_IXUGO | S_IFDIR; |
| 1223 | inode->i_nlink = 1; /* Set to 1. We know there are 2, but | 1230 | inode->i_nlink = 1; /* |
| 1224 | the find utility tries to optimize | 1231 | * Set to 1. We know there are 2, but |
| 1225 | if it is 2, and it screws up. It is | 1232 | * the find utility tries to optimize |
| 1226 | easier to give 1 which tells find to | 1233 | * if it is 2, and it screws up. It is |
| 1227 | do it the hard way. */ | 1234 | * easier to give 1 which tells find to |
| 1235 | * do it the hard way. | ||
| 1236 | */ | ||
| 1228 | } else { | 1237 | } else { |
| 1229 | /* Everybody gets to read the file. */ | 1238 | /* Everybody gets to read the file. */ |
| 1230 | inode->i_mode = sbi->s_mode; | 1239 | inode->i_mode = sbi->s_mode; |
| 1231 | inode->i_nlink = 1; | 1240 | inode->i_nlink = 1; |
| 1232 | inode->i_mode |= S_IFREG; | 1241 | inode->i_mode |= S_IFREG; |
| 1233 | } | 1242 | } |
| 1234 | inode->i_uid = sbi->s_uid; | 1243 | inode->i_uid = sbi->s_uid; |
| 1235 | inode->i_gid = sbi->s_gid; | 1244 | inode->i_gid = sbi->s_gid; |
| @@ -1239,13 +1248,14 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1239 | ei->i_format_parm[1] = 0; | 1248 | ei->i_format_parm[1] = 0; |
| 1240 | ei->i_format_parm[2] = 0; | 1249 | ei->i_format_parm[2] = 0; |
| 1241 | 1250 | ||
| 1242 | ei->i_section_size = isonum_733 (de->size); | 1251 | ei->i_section_size = isonum_733(de->size); |
| 1243 | if (de->flags[-high_sierra] & 0x80) { | 1252 | if (de->flags[-high_sierra] & 0x80) { |
| 1244 | if(isofs_read_level3_size(inode)) goto fail; | 1253 | if(isofs_read_level3_size(inode)) |
| 1254 | goto fail; | ||
| 1245 | } else { | 1255 | } else { |
| 1246 | ei->i_next_section_block = 0; | 1256 | ei->i_next_section_block = 0; |
| 1247 | ei->i_next_section_offset = 0; | 1257 | ei->i_next_section_offset = 0; |
| 1248 | inode->i_size = isonum_733 (de->size); | 1258 | inode->i_size = isonum_733(de->size); |
| 1249 | } | 1259 | } |
| 1250 | 1260 | ||
| 1251 | /* | 1261 | /* |
| @@ -1258,23 +1268,24 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1258 | inode->i_size &= 0x00ffffff; | 1268 | inode->i_size &= 0x00ffffff; |
| 1259 | 1269 | ||
| 1260 | if (de->interleave[0]) { | 1270 | if (de->interleave[0]) { |
| 1261 | printk("Interleaved files not (yet) supported.\n"); | 1271 | printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n"); |
| 1262 | inode->i_size = 0; | 1272 | inode->i_size = 0; |
| 1263 | } | 1273 | } |
| 1264 | 1274 | ||
| 1265 | /* I have no idea what file_unit_size is used for, so | 1275 | /* I have no idea what file_unit_size is used for, so |
| 1266 | we will flag it for now */ | 1276 | we will flag it for now */ |
| 1267 | if (de->file_unit_size[0] != 0) { | 1277 | if (de->file_unit_size[0] != 0) { |
| 1268 | printk("File unit size != 0 for ISO file (%ld).\n", | 1278 | printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n", |
| 1269 | inode->i_ino); | 1279 | inode->i_ino); |
| 1270 | } | 1280 | } |
| 1271 | 1281 | ||
| 1272 | /* I have no idea what other flag bits are used for, so | 1282 | /* I have no idea what other flag bits are used for, so |
| 1273 | we will flag it for now */ | 1283 | we will flag it for now */ |
| 1274 | #ifdef DEBUG | 1284 | #ifdef DEBUG |
| 1275 | if((de->flags[-high_sierra] & ~2)!= 0){ | 1285 | if((de->flags[-high_sierra] & ~2)!= 0){ |
| 1276 | printk("Unusual flag settings for ISO file (%ld %x).\n", | 1286 | printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file " |
| 1277 | inode->i_ino, de->flags[-high_sierra]); | 1287 | "(%ld %x).\n", |
| 1288 | inode->i_ino, de->flags[-high_sierra]); | ||
| 1278 | } | 1289 | } |
| 1279 | #endif | 1290 | #endif |
| 1280 | 1291 | ||
| @@ -1285,11 +1296,11 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1285 | inode->i_atime.tv_nsec = | 1296 | inode->i_atime.tv_nsec = |
| 1286 | inode->i_ctime.tv_nsec = 0; | 1297 | inode->i_ctime.tv_nsec = 0; |
| 1287 | 1298 | ||
| 1288 | ei->i_first_extent = (isonum_733 (de->extent) + | 1299 | ei->i_first_extent = (isonum_733(de->extent) + |
| 1289 | isonum_711 (de->ext_attr_length)); | 1300 | isonum_711(de->ext_attr_length)); |
| 1290 | 1301 | ||
| 1291 | /* Set the number of blocks for stat() - should be done before RR */ | 1302 | /* Set the number of blocks for stat() - should be done before RR */ |
| 1292 | inode->i_blocks = (inode->i_size + 511) >> 9; | 1303 | inode->i_blocks = (inode->i_size + 511) >> 9; |
| 1293 | 1304 | ||
| 1294 | /* | 1305 | /* |
| 1295 | * Now test for possible Rock Ridge extensions which will override | 1306 | * Now test for possible Rock Ridge extensions which will override |
| @@ -1306,7 +1317,7 @@ static void isofs_read_inode(struct inode *inode) | |||
| 1306 | /* Install the inode operations vector */ | 1317 | /* Install the inode operations vector */ |
| 1307 | if (S_ISREG(inode->i_mode)) { | 1318 | if (S_ISREG(inode->i_mode)) { |
| 1308 | inode->i_fop = &generic_ro_fops; | 1319 | inode->i_fop = &generic_ro_fops; |
| 1309 | switch ( ei->i_file_format ) { | 1320 | switch (ei->i_file_format) { |
| 1310 | #ifdef CONFIG_ZISOFS | 1321 | #ifdef CONFIG_ZISOFS |
| 1311 | case isofs_file_compressed: | 1322 | case isofs_file_compressed: |
| 1312 | inode->i_data.a_ops = &zisofs_aops; | 1323 | inode->i_data.a_ops = &zisofs_aops; |
| @@ -1350,7 +1361,7 @@ static int isofs_iget5_test(struct inode *ino, void *data) | |||
| 1350 | struct isofs_iget5_callback_data *d = | 1361 | struct isofs_iget5_callback_data *d = |
| 1351 | (struct isofs_iget5_callback_data*)data; | 1362 | (struct isofs_iget5_callback_data*)data; |
| 1352 | return (i->i_iget5_block == d->block) | 1363 | return (i->i_iget5_block == d->block) |
| 1353 | && (i->i_iget5_offset == d->offset); | 1364 | && (i->i_iget5_offset == d->offset); |
| 1354 | } | 1365 | } |
| 1355 | 1366 | ||
| 1356 | static int isofs_iget5_set(struct inode *ino, void *data) | 1367 | static int isofs_iget5_set(struct inode *ino, void *data) |
| @@ -1384,7 +1395,7 @@ struct inode *isofs_iget(struct super_block *sb, | |||
| 1384 | hashval = (block << sb->s_blocksize_bits) | offset; | 1395 | hashval = (block << sb->s_blocksize_bits) | offset; |
| 1385 | 1396 | ||
| 1386 | inode = iget5_locked(sb, hashval, &isofs_iget5_test, | 1397 | inode = iget5_locked(sb, hashval, &isofs_iget5_test, |
| 1387 | &isofs_iget5_set, &data); | 1398 | &isofs_iget5_set, &data); |
| 1388 | 1399 | ||
| 1389 | if (inode && (inode->i_state & I_NEW)) { | 1400 | if (inode && (inode->i_state & I_NEW)) { |
| 1390 | sb->s_op->read_inode(inode); | 1401 | sb->s_op->read_inode(inode); |
| @@ -1398,7 +1409,7 @@ static int isofs_get_sb(struct file_system_type *fs_type, | |||
| 1398 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 1409 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
| 1399 | { | 1410 | { |
| 1400 | return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, | 1411 | return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, |
| 1401 | mnt); | 1412 | mnt); |
| 1402 | } | 1413 | } |
| 1403 | 1414 | ||
| 1404 | static struct file_system_type iso9660_fs_type = { | 1415 | static struct file_system_type iso9660_fs_type = { |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index efe2872cd4e3..a07e67b1ea7f 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | #include <linux/fs.h> | 1 | #include <linux/fs.h> |
| 2 | #include <linux/buffer_head.h> | 2 | #include <linux/buffer_head.h> |
| 3 | #include <linux/exportfs.h> | ||
| 3 | #include <linux/iso_fs.h> | 4 | #include <linux/iso_fs.h> |
| 4 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
| 5 | 6 | ||
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index fb8fe7a9ddc6..92c14b850e9c 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c | |||
| @@ -80,22 +80,20 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st | |||
| 80 | 80 | ||
| 81 | if (utf8) { | 81 | if (utf8) { |
| 82 | len = wcsntombs_be(outname, de->name, | 82 | len = wcsntombs_be(outname, de->name, |
| 83 | de->name_len[0] >> 1, PAGE_SIZE); | 83 | de->name_len[0] >> 1, PAGE_SIZE); |
| 84 | } else { | 84 | } else { |
| 85 | len = uni16_to_x8(outname, (__be16 *) de->name, | 85 | len = uni16_to_x8(outname, (__be16 *) de->name, |
| 86 | de->name_len[0] >> 1, nls); | 86 | de->name_len[0] >> 1, nls); |
| 87 | } | 87 | } |
| 88 | if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) { | 88 | if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) |
| 89 | len -= 2; | 89 | len -= 2; |
| 90 | } | ||
| 91 | 90 | ||
| 92 | /* | 91 | /* |
| 93 | * Windows doesn't like periods at the end of a name, | 92 | * Windows doesn't like periods at the end of a name, |
| 94 | * so neither do we | 93 | * so neither do we |
| 95 | */ | 94 | */ |
| 96 | while (len >= 2 && (outname[len-1] == '.')) { | 95 | while (len >= 2 && (outname[len-1] == '.')) |
| 97 | len--; | 96 | len--; |
| 98 | } | ||
| 99 | 97 | ||
| 100 | return len; | 98 | return len; |
| 101 | } | 99 | } |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index c04b3a14a3e9..c8c7e5138a01 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | * some sanity tests. | 15 | * some sanity tests. |
| 16 | */ | 16 | */ |
| 17 | static int | 17 | static int |
| 18 | isofs_cmp(struct dentry * dentry, const char * compare, int dlen) | 18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) |
| 19 | { | 19 | { |
| 20 | struct qstr qstr; | 20 | struct qstr qstr; |
| 21 | 21 | ||
| @@ -48,24 +48,24 @@ isofs_cmp(struct dentry * dentry, const char * compare, int dlen) | |||
| 48 | */ | 48 | */ |
| 49 | static unsigned long | 49 | static unsigned long |
| 50 | isofs_find_entry(struct inode *dir, struct dentry *dentry, | 50 | isofs_find_entry(struct inode *dir, struct dentry *dentry, |
| 51 | unsigned long *block_rv, unsigned long* offset_rv, | 51 | unsigned long *block_rv, unsigned long *offset_rv, |
| 52 | char * tmpname, struct iso_directory_record * tmpde) | 52 | char *tmpname, struct iso_directory_record *tmpde) |
| 53 | { | 53 | { |
| 54 | unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); | 54 | unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); |
| 55 | unsigned char bufbits = ISOFS_BUFFER_BITS(dir); | 55 | unsigned char bufbits = ISOFS_BUFFER_BITS(dir); |
| 56 | unsigned long block, f_pos, offset, block_saved, offset_saved; | 56 | unsigned long block, f_pos, offset, block_saved, offset_saved; |
| 57 | struct buffer_head * bh = NULL; | 57 | struct buffer_head *bh = NULL; |
| 58 | struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); | 58 | struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); |
| 59 | 59 | ||
| 60 | if (!ISOFS_I(dir)->i_first_extent) | 60 | if (!ISOFS_I(dir)->i_first_extent) |
| 61 | return 0; | 61 | return 0; |
| 62 | 62 | ||
| 63 | f_pos = 0; | 63 | f_pos = 0; |
| 64 | offset = 0; | 64 | offset = 0; |
| 65 | block = 0; | 65 | block = 0; |
| 66 | 66 | ||
| 67 | while (f_pos < dir->i_size) { | 67 | while (f_pos < dir->i_size) { |
| 68 | struct iso_directory_record * de; | 68 | struct iso_directory_record *de; |
| 69 | int de_len, match, i, dlen; | 69 | int de_len, match, i, dlen; |
| 70 | char *dpnt; | 70 | char *dpnt; |
| 71 | 71 | ||
| @@ -114,7 +114,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
| 114 | 114 | ||
| 115 | if (sbi->s_rock && | 115 | if (sbi->s_rock && |
| 116 | ((i = get_rock_ridge_filename(de, tmpname, dir)))) { | 116 | ((i = get_rock_ridge_filename(de, tmpname, dir)))) { |
| 117 | dlen = i; /* possibly -1 */ | 117 | dlen = i; /* possibly -1 */ |
| 118 | dpnt = tmpname; | 118 | dpnt = tmpname; |
| 119 | #ifdef CONFIG_JOLIET | 119 | #ifdef CONFIG_JOLIET |
| 120 | } else if (sbi->s_joliet_level) { | 120 | } else if (sbi->s_joliet_level) { |
| @@ -145,8 +145,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
| 145 | isofs_normalize_block_and_offset(de, | 145 | isofs_normalize_block_and_offset(de, |
| 146 | &block_saved, | 146 | &block_saved, |
| 147 | &offset_saved); | 147 | &offset_saved); |
| 148 | *block_rv = block_saved; | 148 | *block_rv = block_saved; |
| 149 | *offset_rv = offset_saved; | 149 | *offset_rv = offset_saved; |
| 150 | brelse(bh); | 150 | brelse(bh); |
| 151 | return 1; | 151 | return 1; |
| 152 | } | 152 | } |
| @@ -155,7 +155,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
| 155 | return 0; | 155 | return 0; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 158 | struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
| 159 | { | 159 | { |
| 160 | int found; | 160 | int found; |
| 161 | unsigned long block, offset; | 161 | unsigned long block, offset; |
| @@ -170,9 +170,9 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n | |||
| 170 | 170 | ||
| 171 | lock_kernel(); | 171 | lock_kernel(); |
| 172 | found = isofs_find_entry(dir, dentry, | 172 | found = isofs_find_entry(dir, dentry, |
| 173 | &block, &offset, | 173 | &block, &offset, |
| 174 | page_address(page), | 174 | page_address(page), |
| 175 | 1024 + page_address(page)); | 175 | 1024 + page_address(page)); |
| 176 | __free_page(page); | 176 | __free_page(page); |
| 177 | 177 | ||
| 178 | inode = NULL; | 178 | inode = NULL; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 1facfaff97cb..a003d50edcdb 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -887,7 +887,8 @@ restart_loop: | |||
| 887 | journal->j_committing_transaction = NULL; | 887 | journal->j_committing_transaction = NULL; |
| 888 | spin_unlock(&journal->j_state_lock); | 888 | spin_unlock(&journal->j_state_lock); |
| 889 | 889 | ||
| 890 | if (commit_transaction->t_checkpoint_list == NULL) { | 890 | if (commit_transaction->t_checkpoint_list == NULL && |
| 891 | commit_transaction->t_checkpoint_io_list == NULL) { | ||
| 891 | __journal_drop_transaction(journal, commit_transaction); | 892 | __journal_drop_transaction(journal, commit_transaction); |
| 892 | } else { | 893 | } else { |
| 893 | if (journal->j_checkpoint_transactions == NULL) { | 894 | if (journal->j_checkpoint_transactions == NULL) { |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 824e3b7d4ec1..8db2fa25170b 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
| @@ -68,6 +68,7 @@ | |||
| 68 | #include <linux/list.h> | 68 | #include <linux/list.h> |
| 69 | #include <linux/init.h> | 69 | #include <linux/init.h> |
| 70 | #endif | 70 | #endif |
| 71 | #include <linux/log2.h> | ||
| 71 | 72 | ||
| 72 | static struct kmem_cache *revoke_record_cache; | 73 | static struct kmem_cache *revoke_record_cache; |
| 73 | static struct kmem_cache *revoke_table_cache; | 74 | static struct kmem_cache *revoke_table_cache; |
| @@ -211,7 +212,7 @@ int journal_init_revoke(journal_t *journal, int hash_size) | |||
| 211 | journal->j_revoke = journal->j_revoke_table[0]; | 212 | journal->j_revoke = journal->j_revoke_table[0]; |
| 212 | 213 | ||
| 213 | /* Check that the hash_size is a power of two */ | 214 | /* Check that the hash_size is a power of two */ |
| 214 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 215 | J_ASSERT(is_power_of_2(hash_size)); |
| 215 | 216 | ||
| 216 | journal->j_revoke->hash_size = hash_size; | 217 | journal->j_revoke->hash_size = hash_size; |
| 217 | 218 | ||
| @@ -238,7 +239,7 @@ int journal_init_revoke(journal_t *journal, int hash_size) | |||
| 238 | journal->j_revoke = journal->j_revoke_table[1]; | 239 | journal->j_revoke = journal->j_revoke_table[1]; |
| 239 | 240 | ||
| 240 | /* Check that the hash_size is a power of two */ | 241 | /* Check that the hash_size is a power of two */ |
| 241 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 242 | J_ASSERT(is_power_of_2(hash_size)); |
| 242 | 243 | ||
| 243 | journal->j_revoke->hash_size = hash_size; | 244 | journal->j_revoke->hash_size = hash_size; |
| 244 | 245 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 2856e1100a5f..c0f59d1b13dc 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -896,7 +896,8 @@ restart_loop: | |||
| 896 | journal->j_committing_transaction = NULL; | 896 | journal->j_committing_transaction = NULL; |
| 897 | spin_unlock(&journal->j_state_lock); | 897 | spin_unlock(&journal->j_state_lock); |
| 898 | 898 | ||
| 899 | if (commit_transaction->t_checkpoint_list == NULL) { | 899 | if (commit_transaction->t_checkpoint_list == NULL && |
| 900 | commit_transaction->t_checkpoint_io_list == NULL) { | ||
| 900 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 901 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
| 901 | } else { | 902 | } else { |
| 902 | if (journal->j_checkpoint_transactions == NULL) { | 903 | if (journal->j_checkpoint_transactions == NULL) { |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 9246e763da78..28cac049a56b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
| @@ -68,6 +68,7 @@ | |||
| 68 | #include <linux/list.h> | 68 | #include <linux/list.h> |
| 69 | #include <linux/init.h> | 69 | #include <linux/init.h> |
| 70 | #endif | 70 | #endif |
| 71 | #include <linux/log2.h> | ||
| 71 | 72 | ||
| 72 | static struct kmem_cache *jbd2_revoke_record_cache; | 73 | static struct kmem_cache *jbd2_revoke_record_cache; |
| 73 | static struct kmem_cache *jbd2_revoke_table_cache; | 74 | static struct kmem_cache *jbd2_revoke_table_cache; |
| @@ -212,7 +213,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) | |||
| 212 | journal->j_revoke = journal->j_revoke_table[0]; | 213 | journal->j_revoke = journal->j_revoke_table[0]; |
| 213 | 214 | ||
| 214 | /* Check that the hash_size is a power of two */ | 215 | /* Check that the hash_size is a power of two */ |
| 215 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 216 | J_ASSERT(is_power_of_2(hash_size)); |
| 216 | 217 | ||
| 217 | journal->j_revoke->hash_size = hash_size; | 218 | journal->j_revoke->hash_size = hash_size; |
| 218 | 219 | ||
| @@ -239,7 +240,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) | |||
| 239 | journal->j_revoke = journal->j_revoke_table[1]; | 240 | journal->j_revoke = journal->j_revoke_table[1]; |
| 240 | 241 | ||
| 241 | /* Check that the hash_size is a power of two */ | 242 | /* Check that the hash_size is a power of two */ |
| 242 | J_ASSERT ((hash_size & (hash_size-1)) == 0); | 243 | J_ASSERT(is_power_of_2(hash_size)); |
| 243 | 244 | ||
| 244 | journal->j_revoke->hash_size = hash_size; | 245 | journal->j_revoke->hash_size = hash_size; |
| 245 | 246 | ||
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 0c82dfcfd246..143c5530caf3 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c | |||
| @@ -81,6 +81,7 @@ static int jffs2_garbage_collect_thread(void *_c) | |||
| 81 | 81 | ||
| 82 | set_user_nice(current, 10); | 82 | set_user_nice(current, 10); |
| 83 | 83 | ||
| 84 | set_freezable(); | ||
| 84 | for (;;) { | 85 | for (;;) { |
| 85 | allow_signal(SIGHUP); | 86 | allow_signal(SIGHUP); |
| 86 | 87 | ||
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 2374b595f2e1..f0ec72b263f1 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
| @@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); | |||
| 32 | extern void jfs_free_zero_link(struct inode *); | 32 | extern void jfs_free_zero_link(struct inode *); |
| 33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); | 33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); |
| 34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); | 34 | extern void jfs_get_inode_flags(struct jfs_inode_info *); |
| 35 | extern struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp); | ||
| 35 | extern void jfs_set_inode_flags(struct inode *); | 36 | extern void jfs_set_inode_flags(struct inode *); |
| 36 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 37 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
| 37 | 38 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 25161c4121e4..932797ba433b 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -1477,6 +1477,38 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
| 1477 | return dentry; | 1477 | return dentry; |
| 1478 | } | 1478 | } |
| 1479 | 1479 | ||
| 1480 | struct dentry *jfs_get_dentry(struct super_block *sb, void *vobjp) | ||
| 1481 | { | ||
| 1482 | __u32 *objp = vobjp; | ||
| 1483 | unsigned long ino = objp[0]; | ||
| 1484 | __u32 generation = objp[1]; | ||
| 1485 | struct inode *inode; | ||
| 1486 | struct dentry *result; | ||
| 1487 | |||
| 1488 | if (ino == 0) | ||
| 1489 | return ERR_PTR(-ESTALE); | ||
| 1490 | inode = iget(sb, ino); | ||
| 1491 | if (inode == NULL) | ||
| 1492 | return ERR_PTR(-ENOMEM); | ||
| 1493 | |||
| 1494 | if (is_bad_inode(inode) || | ||
| 1495 | (generation && inode->i_generation != generation)) { | ||
| 1496 | result = ERR_PTR(-ESTALE); | ||
| 1497 | goto out_iput; | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | result = d_alloc_anon(inode); | ||
| 1501 | if (!result) { | ||
| 1502 | result = ERR_PTR(-ENOMEM); | ||
| 1503 | goto out_iput; | ||
| 1504 | } | ||
| 1505 | return result; | ||
| 1506 | |||
| 1507 | out_iput: | ||
| 1508 | iput(inode); | ||
| 1509 | return result; | ||
| 1510 | } | ||
| 1511 | |||
| 1480 | struct dentry *jfs_get_parent(struct dentry *dentry) | 1512 | struct dentry *jfs_get_parent(struct dentry *dentry) |
| 1481 | { | 1513 | { |
| 1482 | struct super_block *sb = dentry->d_inode->i_sb; | 1514 | struct super_block *sb = dentry->d_inode->i_sb; |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 20e4ac1c79a3..929fceca7999 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
| 28 | #include <linux/posix_acl.h> | 28 | #include <linux/posix_acl.h> |
| 29 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
| 30 | #include <linux/exportfs.h> | ||
| 30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
| 31 | #include <linux/seq_file.h> | 32 | #include <linux/seq_file.h> |
| 32 | 33 | ||
| @@ -737,6 +738,7 @@ static const struct super_operations jfs_super_operations = { | |||
| 737 | }; | 738 | }; |
| 738 | 739 | ||
| 739 | static struct export_operations jfs_export_operations = { | 740 | static struct export_operations jfs_export_operations = { |
| 741 | .get_dentry = jfs_get_dentry, | ||
| 740 | .get_parent = jfs_get_parent, | 742 | .get_parent = jfs_get_parent, |
| 741 | }; | 743 | }; |
| 742 | 744 | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 26809325469c..82e2192a0d5c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/smp.h> | 25 | #include <linux/smp.h> |
| 26 | #include <linux/smp_lock.h> | 26 | #include <linux/smp_lock.h> |
| 27 | #include <linux/mutex.h> | 27 | #include <linux/mutex.h> |
| 28 | #include <linux/freezer.h> | ||
| 28 | 29 | ||
| 29 | #include <linux/sunrpc/types.h> | 30 | #include <linux/sunrpc/types.h> |
| 30 | #include <linux/sunrpc/stats.h> | 31 | #include <linux/sunrpc/stats.h> |
| @@ -75,18 +76,31 @@ static const int nlm_port_min = 0, nlm_port_max = 65535; | |||
| 75 | 76 | ||
| 76 | static struct ctl_table_header * nlm_sysctl_table; | 77 | static struct ctl_table_header * nlm_sysctl_table; |
| 77 | 78 | ||
| 78 | static unsigned long set_grace_period(void) | 79 | static unsigned long get_lockd_grace_period(void) |
| 79 | { | 80 | { |
| 80 | unsigned long grace_period; | ||
| 81 | |||
| 82 | /* Note: nlm_timeout should always be nonzero */ | 81 | /* Note: nlm_timeout should always be nonzero */ |
| 83 | if (nlm_grace_period) | 82 | if (nlm_grace_period) |
| 84 | grace_period = ((nlm_grace_period + nlm_timeout - 1) | 83 | return roundup(nlm_grace_period, nlm_timeout) * HZ; |
| 85 | / nlm_timeout) * nlm_timeout * HZ; | ||
| 86 | else | 84 | else |
| 87 | grace_period = nlm_timeout * 5 * HZ; | 85 | return nlm_timeout * 5 * HZ; |
| 86 | } | ||
| 87 | |||
| 88 | unsigned long get_nfs_grace_period(void) | ||
| 89 | { | ||
| 90 | unsigned long lockdgrace = get_lockd_grace_period(); | ||
| 91 | unsigned long nfsdgrace = 0; | ||
| 92 | |||
| 93 | if (nlmsvc_ops) | ||
| 94 | nfsdgrace = nlmsvc_ops->get_grace_period(); | ||
| 95 | |||
| 96 | return max(lockdgrace, nfsdgrace); | ||
| 97 | } | ||
| 98 | EXPORT_SYMBOL(get_nfs_grace_period); | ||
| 99 | |||
| 100 | static unsigned long set_grace_period(void) | ||
| 101 | { | ||
| 88 | nlmsvc_grace_period = 1; | 102 | nlmsvc_grace_period = 1; |
| 89 | return grace_period + jiffies; | 103 | return get_nfs_grace_period() + jiffies; |
| 90 | } | 104 | } |
| 91 | 105 | ||
| 92 | static inline void clear_grace_period(void) | 106 | static inline void clear_grace_period(void) |
| @@ -119,6 +133,7 @@ lockd(struct svc_rqst *rqstp) | |||
| 119 | complete(&lockd_start_done); | 133 | complete(&lockd_start_done); |
| 120 | 134 | ||
| 121 | daemonize("lockd"); | 135 | daemonize("lockd"); |
| 136 | set_freezable(); | ||
| 122 | 137 | ||
| 123 | /* Process request with signals blocked, but allow SIGKILL. */ | 138 | /* Process request with signals blocked, but allow SIGKILL. */ |
| 124 | allow_signal(SIGKILL); | 139 | allow_signal(SIGKILL); |
diff --git a/fs/mbcache.c b/fs/mbcache.c index deeb9dc062d9..fbb1d02f8791 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -100,7 +100,6 @@ struct mb_cache { | |||
| 100 | static LIST_HEAD(mb_cache_list); | 100 | static LIST_HEAD(mb_cache_list); |
| 101 | static LIST_HEAD(mb_cache_lru_list); | 101 | static LIST_HEAD(mb_cache_lru_list); |
| 102 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 102 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
| 103 | static struct shrinker *mb_shrinker; | ||
| 104 | 103 | ||
| 105 | static inline int | 104 | static inline int |
| 106 | mb_cache_indexes(struct mb_cache *cache) | 105 | mb_cache_indexes(struct mb_cache *cache) |
| @@ -118,6 +117,10 @@ mb_cache_indexes(struct mb_cache *cache) | |||
| 118 | 117 | ||
| 119 | static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); | 118 | static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); |
| 120 | 119 | ||
| 120 | static struct shrinker mb_cache_shrinker = { | ||
| 121 | .shrink = mb_cache_shrink_fn, | ||
| 122 | .seeks = DEFAULT_SEEKS, | ||
| 123 | }; | ||
| 121 | 124 | ||
| 122 | static inline int | 125 | static inline int |
| 123 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 126 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) |
| @@ -662,13 +665,13 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, | |||
| 662 | 665 | ||
| 663 | static int __init init_mbcache(void) | 666 | static int __init init_mbcache(void) |
| 664 | { | 667 | { |
| 665 | mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn); | 668 | register_shrinker(&mb_cache_shrinker); |
| 666 | return 0; | 669 | return 0; |
| 667 | } | 670 | } |
| 668 | 671 | ||
| 669 | static void __exit exit_mbcache(void) | 672 | static void __exit exit_mbcache(void) |
| 670 | { | 673 | { |
| 671 | remove_shrinker(mb_shrinker); | 674 | unregister_shrinker(&mb_cache_shrinker); |
| 672 | } | 675 | } |
| 673 | 676 | ||
| 674 | module_init(init_mbcache) | 677 | module_init(init_mbcache) |
diff --git a/fs/namespace.c b/fs/namespace.c index b696e3a0d18f..4198003d7e18 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
| 29 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
| 30 | #include "pnode.h" | 30 | #include "pnode.h" |
| 31 | #include "internal.h" | ||
| 31 | 32 | ||
| 32 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ | 33 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ |
| 33 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | 34 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); |
| @@ -320,22 +321,16 @@ EXPORT_SYMBOL(mnt_unpin); | |||
| 320 | static void *m_start(struct seq_file *m, loff_t *pos) | 321 | static void *m_start(struct seq_file *m, loff_t *pos) |
| 321 | { | 322 | { |
| 322 | struct mnt_namespace *n = m->private; | 323 | struct mnt_namespace *n = m->private; |
| 323 | struct list_head *p; | ||
| 324 | loff_t l = *pos; | ||
| 325 | 324 | ||
| 326 | down_read(&namespace_sem); | 325 | down_read(&namespace_sem); |
| 327 | list_for_each(p, &n->list) | 326 | return seq_list_start(&n->list, *pos); |
| 328 | if (!l--) | ||
| 329 | return list_entry(p, struct vfsmount, mnt_list); | ||
| 330 | return NULL; | ||
| 331 | } | 327 | } |
| 332 | 328 | ||
| 333 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 329 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
| 334 | { | 330 | { |
| 335 | struct mnt_namespace *n = m->private; | 331 | struct mnt_namespace *n = m->private; |
| 336 | struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; | 332 | |
| 337 | (*pos)++; | 333 | return seq_list_next(v, &n->list, pos); |
| 338 | return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); | ||
| 339 | } | 334 | } |
| 340 | 335 | ||
| 341 | static void m_stop(struct seq_file *m, void *v) | 336 | static void m_stop(struct seq_file *m, void *v) |
| @@ -350,7 +345,7 @@ static inline void mangle(struct seq_file *m, const char *s) | |||
| 350 | 345 | ||
| 351 | static int show_vfsmnt(struct seq_file *m, void *v) | 346 | static int show_vfsmnt(struct seq_file *m, void *v) |
| 352 | { | 347 | { |
| 353 | struct vfsmount *mnt = v; | 348 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); |
| 354 | int err = 0; | 349 | int err = 0; |
| 355 | static struct proc_fs_info { | 350 | static struct proc_fs_info { |
| 356 | int flag; | 351 | int flag; |
| @@ -405,7 +400,7 @@ struct seq_operations mounts_op = { | |||
| 405 | 400 | ||
| 406 | static int show_vfsstat(struct seq_file *m, void *v) | 401 | static int show_vfsstat(struct seq_file *m, void *v) |
| 407 | { | 402 | { |
| 408 | struct vfsmount *mnt = v; | 403 | struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list); |
| 409 | int err = 0; | 404 | int err = 0; |
| 410 | 405 | ||
| 411 | /* device */ | 406 | /* device */ |
| @@ -1457,7 +1452,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
| 1457 | 1452 | ||
| 1458 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 1453 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
| 1459 | if (!new_ns) | 1454 | if (!new_ns) |
| 1460 | return NULL; | 1455 | return ERR_PTR(-ENOMEM); |
| 1461 | 1456 | ||
| 1462 | atomic_set(&new_ns->count, 1); | 1457 | atomic_set(&new_ns->count, 1); |
| 1463 | INIT_LIST_HEAD(&new_ns->list); | 1458 | INIT_LIST_HEAD(&new_ns->list); |
| @@ -1471,7 +1466,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
| 1471 | if (!new_ns->root) { | 1466 | if (!new_ns->root) { |
| 1472 | up_write(&namespace_sem); | 1467 | up_write(&namespace_sem); |
| 1473 | kfree(new_ns); | 1468 | kfree(new_ns); |
| 1474 | return NULL; | 1469 | return ERR_PTR(-ENOMEM);; |
| 1475 | } | 1470 | } |
| 1476 | spin_lock(&vfsmount_lock); | 1471 | spin_lock(&vfsmount_lock); |
| 1477 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); | 1472 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); |
| @@ -1515,7 +1510,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
| 1515 | return new_ns; | 1510 | return new_ns; |
| 1516 | } | 1511 | } |
| 1517 | 1512 | ||
| 1518 | struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, | 1513 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, |
| 1519 | struct fs_struct *new_fs) | 1514 | struct fs_struct *new_fs) |
| 1520 | { | 1515 | { |
| 1521 | struct mnt_namespace *new_ns; | 1516 | struct mnt_namespace *new_ns; |
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index d3152f8d95c6..2b145de45b39 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
| @@ -203,7 +203,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
| 203 | 203 | ||
| 204 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { | 204 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { |
| 205 | if (pos >= MAX_NON_LFS) { | 205 | if (pos >= MAX_NON_LFS) { |
| 206 | send_sig(SIGXFSZ, current, 0); | ||
| 207 | return -EFBIG; | 206 | return -EFBIG; |
| 208 | } | 207 | } |
| 209 | if (count > MAX_NON_LFS - (u32)pos) { | 208 | if (count > MAX_NON_LFS - (u32)pos) { |
| @@ -212,7 +211,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
| 212 | } | 211 | } |
| 213 | if (pos >= inode->i_sb->s_maxbytes) { | 212 | if (pos >= inode->i_sb->s_maxbytes) { |
| 214 | if (count || pos > inode->i_sb->s_maxbytes) { | 213 | if (count || pos > inode->i_sb->s_maxbytes) { |
| 215 | send_sig(SIGXFSZ, current, 0); | ||
| 216 | return -EFBIG; | 214 | return -EFBIG; |
| 217 | } | 215 | } |
| 218 | } | 216 | } |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f309c8741a..a796be5051bf 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/sunrpc/svcsock.h> | 14 | #include <linux/sunrpc/svcsock.h> |
| 15 | #include <linux/nfs_fs.h> | 15 | #include <linux/nfs_fs.h> |
| 16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
| 17 | #include <linux/freezer.h> | ||
| 17 | 18 | ||
| 18 | #include <net/inet_sock.h> | 19 | #include <net/inet_sock.h> |
| 19 | 20 | ||
| @@ -67,6 +68,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
| 67 | daemonize("nfsv4-svc"); | 68 | daemonize("nfsv4-svc"); |
| 68 | /* Process request with signals blocked, but allow SIGKILL. */ | 69 | /* Process request with signals blocked, but allow SIGKILL. */ |
| 69 | allow_signal(SIGKILL); | 70 | allow_signal(SIGKILL); |
| 71 | set_freezable(); | ||
| 70 | 72 | ||
| 71 | complete(&nfs_callback_info.started); | 73 | complete(&nfs_callback_info.started); |
| 72 | 74 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ccb455053ee4..a49f9feff776 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -1206,23 +1206,9 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
| 1206 | */ | 1206 | */ |
| 1207 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1207 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
| 1208 | { | 1208 | { |
| 1209 | struct list_head *_p; | ||
| 1210 | loff_t pos = *_pos; | ||
| 1211 | |||
| 1212 | /* lock the list against modification */ | 1209 | /* lock the list against modification */ |
| 1213 | spin_lock(&nfs_client_lock); | 1210 | spin_lock(&nfs_client_lock); |
| 1214 | 1211 | return seq_list_start_head(&nfs_client_list, *_pos); | |
| 1215 | /* allow for the header line */ | ||
| 1216 | if (!pos) | ||
| 1217 | return SEQ_START_TOKEN; | ||
| 1218 | pos--; | ||
| 1219 | |||
| 1220 | /* find the n'th element in the list */ | ||
| 1221 | list_for_each(_p, &nfs_client_list) | ||
| 1222 | if (!pos--) | ||
| 1223 | break; | ||
| 1224 | |||
| 1225 | return _p != &nfs_client_list ? _p : NULL; | ||
| 1226 | } | 1212 | } |
| 1227 | 1213 | ||
| 1228 | /* | 1214 | /* |
| @@ -1230,14 +1216,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | |||
| 1230 | */ | 1216 | */ |
| 1231 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | 1217 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) |
| 1232 | { | 1218 | { |
| 1233 | struct list_head *_p; | 1219 | return seq_list_next(v, &nfs_client_list, pos); |
| 1234 | |||
| 1235 | (*pos)++; | ||
| 1236 | |||
| 1237 | _p = v; | ||
| 1238 | _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next; | ||
| 1239 | |||
| 1240 | return _p != &nfs_client_list ? _p : NULL; | ||
| 1241 | } | 1220 | } |
| 1242 | 1221 | ||
| 1243 | /* | 1222 | /* |
| @@ -1256,7 +1235,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
| 1256 | struct nfs_client *clp; | 1235 | struct nfs_client *clp; |
| 1257 | 1236 | ||
| 1258 | /* display header on line 1 */ | 1237 | /* display header on line 1 */ |
| 1259 | if (v == SEQ_START_TOKEN) { | 1238 | if (v == &nfs_client_list) { |
| 1260 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); | 1239 | seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); |
| 1261 | return 0; | 1240 | return 0; |
| 1262 | } | 1241 | } |
| @@ -1297,23 +1276,9 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
| 1297 | */ | 1276 | */ |
| 1298 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1277 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
| 1299 | { | 1278 | { |
| 1300 | struct list_head *_p; | ||
| 1301 | loff_t pos = *_pos; | ||
| 1302 | |||
| 1303 | /* lock the list against modification */ | 1279 | /* lock the list against modification */ |
| 1304 | spin_lock(&nfs_client_lock); | 1280 | spin_lock(&nfs_client_lock); |
| 1305 | 1281 | return seq_list_start_head(&nfs_volume_list, *_pos); | |
| 1306 | /* allow for the header line */ | ||
| 1307 | if (!pos) | ||
| 1308 | return SEQ_START_TOKEN; | ||
| 1309 | pos--; | ||
| 1310 | |||
| 1311 | /* find the n'th element in the list */ | ||
| 1312 | list_for_each(_p, &nfs_volume_list) | ||
| 1313 | if (!pos--) | ||
| 1314 | break; | ||
| 1315 | |||
| 1316 | return _p != &nfs_volume_list ? _p : NULL; | ||
| 1317 | } | 1282 | } |
| 1318 | 1283 | ||
| 1319 | /* | 1284 | /* |
| @@ -1321,14 +1286,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | |||
| 1321 | */ | 1286 | */ |
| 1322 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | 1287 | static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) |
| 1323 | { | 1288 | { |
| 1324 | struct list_head *_p; | 1289 | return seq_list_next(v, &nfs_volume_list, pos); |
| 1325 | |||
| 1326 | (*pos)++; | ||
| 1327 | |||
| 1328 | _p = v; | ||
| 1329 | _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next; | ||
| 1330 | |||
| 1331 | return _p != &nfs_volume_list ? _p : NULL; | ||
| 1332 | } | 1290 | } |
| 1333 | 1291 | ||
| 1334 | /* | 1292 | /* |
| @@ -1349,7 +1307,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
| 1349 | char dev[8], fsid[17]; | 1307 | char dev[8], fsid[17]; |
| 1350 | 1308 | ||
| 1351 | /* display header on line 1 */ | 1309 | /* display header on line 1 */ |
| 1352 | if (v == SEQ_START_TOKEN) { | 1310 | if (v == &nfs_volume_list) { |
| 1353 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); | 1311 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); |
| 1354 | return 0; | 1312 | return 0; |
| 1355 | } | 1313 | } |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a2b1af89ca1a..adffe1615c51 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -300,7 +300,10 @@ static const struct super_operations nfs4_sops = { | |||
| 300 | }; | 300 | }; |
| 301 | #endif | 301 | #endif |
| 302 | 302 | ||
| 303 | static struct shrinker *acl_shrinker; | 303 | static struct shrinker acl_shrinker = { |
| 304 | .shrink = nfs_access_cache_shrinker, | ||
| 305 | .seeks = DEFAULT_SEEKS, | ||
| 306 | }; | ||
| 304 | 307 | ||
| 305 | /* | 308 | /* |
| 306 | * Register the NFS filesystems | 309 | * Register the NFS filesystems |
| @@ -321,7 +324,7 @@ int __init register_nfs_fs(void) | |||
| 321 | if (ret < 0) | 324 | if (ret < 0) |
| 322 | goto error_2; | 325 | goto error_2; |
| 323 | #endif | 326 | #endif |
| 324 | acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker); | 327 | register_shrinker(&acl_shrinker); |
| 325 | return 0; | 328 | return 0; |
| 326 | 329 | ||
| 327 | #ifdef CONFIG_NFS_V4 | 330 | #ifdef CONFIG_NFS_V4 |
| @@ -339,8 +342,7 @@ error_0: | |||
| 339 | */ | 342 | */ |
| 340 | void __exit unregister_nfs_fs(void) | 343 | void __exit unregister_nfs_fs(void) |
| 341 | { | 344 | { |
| 342 | if (acl_shrinker != NULL) | 345 | unregister_shrinker(&acl_shrinker); |
| 343 | remove_shrinker(acl_shrinker); | ||
| 344 | #ifdef CONFIG_NFS_V4 | 346 | #ifdef CONFIG_NFS_V4 |
| 345 | unregister_filesystem(&nfs4_fs_type); | 347 | unregister_filesystem(&nfs4_fs_type); |
| 346 | nfs_unregister_sysctl(); | 348 | nfs_unregister_sysctl(); |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 6e92b0fe5323..cf61dc8ae942 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
| @@ -12,17 +12,31 @@ | |||
| 12 | 12 | ||
| 13 | #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) | 13 | #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) |
| 14 | 14 | ||
| 15 | static int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) | ||
| 16 | { | ||
| 17 | struct exp_flavor_info *f; | ||
| 18 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
| 19 | |||
| 20 | for (f = exp->ex_flavors; f < end; f++) { | ||
| 21 | if (f->pseudoflavor == rqstp->rq_flavor) | ||
| 22 | return f->flags; | ||
| 23 | } | ||
| 24 | return exp->ex_flags; | ||
| 25 | |||
| 26 | } | ||
| 27 | |||
| 15 | int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | 28 | int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) |
| 16 | { | 29 | { |
| 17 | struct svc_cred cred = rqstp->rq_cred; | 30 | struct svc_cred cred = rqstp->rq_cred; |
| 18 | int i; | 31 | int i; |
| 32 | int flags = nfsexp_flags(rqstp, exp); | ||
| 19 | int ret; | 33 | int ret; |
| 20 | 34 | ||
| 21 | if (exp->ex_flags & NFSEXP_ALLSQUASH) { | 35 | if (flags & NFSEXP_ALLSQUASH) { |
| 22 | cred.cr_uid = exp->ex_anon_uid; | 36 | cred.cr_uid = exp->ex_anon_uid; |
| 23 | cred.cr_gid = exp->ex_anon_gid; | 37 | cred.cr_gid = exp->ex_anon_gid; |
| 24 | cred.cr_group_info = groups_alloc(0); | 38 | cred.cr_group_info = groups_alloc(0); |
| 25 | } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { | 39 | } else if (flags & NFSEXP_ROOTSQUASH) { |
| 26 | struct group_info *gi; | 40 | struct group_info *gi; |
| 27 | if (!cred.cr_uid) | 41 | if (!cred.cr_uid) |
| 28 | cred.cr_uid = exp->ex_anon_uid; | 42 | cred.cr_uid = exp->ex_anon_uid; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 79bd03b8bbf8..c7bbf460b009 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
| @@ -26,12 +26,15 @@ | |||
| 26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
| 27 | #include <linux/hash.h> | 27 | #include <linux/hash.h> |
| 28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
| 29 | #include <linux/exportfs.h> | ||
| 29 | 30 | ||
| 30 | #include <linux/sunrpc/svc.h> | 31 | #include <linux/sunrpc/svc.h> |
| 31 | #include <linux/nfsd/nfsd.h> | 32 | #include <linux/nfsd/nfsd.h> |
| 32 | #include <linux/nfsd/nfsfh.h> | 33 | #include <linux/nfsd/nfsfh.h> |
| 33 | #include <linux/nfsd/syscall.h> | 34 | #include <linux/nfsd/syscall.h> |
| 34 | #include <linux/lockd/bind.h> | 35 | #include <linux/lockd/bind.h> |
| 36 | #include <linux/sunrpc/msg_prot.h> | ||
| 37 | #include <linux/sunrpc/gss_api.h> | ||
| 35 | 38 | ||
| 36 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT | 39 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT |
| 37 | 40 | ||
| @@ -451,8 +454,48 @@ out_free_all: | |||
| 451 | return err; | 454 | return err; |
| 452 | } | 455 | } |
| 453 | 456 | ||
| 457 | static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) | ||
| 458 | { | ||
| 459 | int listsize, err; | ||
| 460 | struct exp_flavor_info *f; | ||
| 461 | |||
| 462 | err = get_int(mesg, &listsize); | ||
| 463 | if (err) | ||
| 464 | return err; | ||
| 465 | if (listsize < 0 || listsize > MAX_SECINFO_LIST) | ||
| 466 | return -EINVAL; | ||
| 467 | |||
| 468 | for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { | ||
| 469 | err = get_int(mesg, &f->pseudoflavor); | ||
| 470 | if (err) | ||
| 471 | return err; | ||
| 472 | /* | ||
| 473 | * Just a quick sanity check; we could also try to check | ||
| 474 | * whether this pseudoflavor is supported, but at worst | ||
| 475 | * an unsupported pseudoflavor on the export would just | ||
| 476 | * be a pseudoflavor that won't match the flavor of any | ||
| 477 | * authenticated request. The administrator will | ||
| 478 | * probably discover the problem when someone fails to | ||
| 479 | * authenticate. | ||
| 480 | */ | ||
| 481 | if (f->pseudoflavor < 0) | ||
| 482 | return -EINVAL; | ||
| 483 | err = get_int(mesg, &f->flags); | ||
| 484 | if (err) | ||
| 485 | return err; | ||
| 486 | /* Only some flags are allowed to differ between flavors: */ | ||
| 487 | if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp->ex_flags)) | ||
| 488 | return -EINVAL; | ||
| 489 | } | ||
| 490 | exp->ex_nflavors = listsize; | ||
| 491 | return 0; | ||
| 492 | } | ||
| 493 | |||
| 454 | #else /* CONFIG_NFSD_V4 */ | 494 | #else /* CONFIG_NFSD_V4 */ |
| 455 | static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; } | 495 | static inline int |
| 496 | fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc){return 0;} | ||
| 497 | static inline int | ||
| 498 | secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } | ||
| 456 | #endif | 499 | #endif |
| 457 | 500 | ||
| 458 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | 501 | static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) |
| @@ -476,6 +519,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
| 476 | 519 | ||
| 477 | exp.ex_uuid = NULL; | 520 | exp.ex_uuid = NULL; |
| 478 | 521 | ||
| 522 | /* secinfo */ | ||
| 523 | exp.ex_nflavors = 0; | ||
| 524 | |||
| 479 | if (mesg[mlen-1] != '\n') | 525 | if (mesg[mlen-1] != '\n') |
| 480 | return -EINVAL; | 526 | return -EINVAL; |
| 481 | mesg[mlen-1] = 0; | 527 | mesg[mlen-1] = 0; |
| @@ -553,7 +599,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
| 553 | if (exp.ex_uuid == NULL) | 599 | if (exp.ex_uuid == NULL) |
| 554 | err = -ENOMEM; | 600 | err = -ENOMEM; |
| 555 | } | 601 | } |
| 556 | } else | 602 | } else if (strcmp(buf, "secinfo") == 0) |
| 603 | err = secinfo_parse(&mesg, buf, &exp); | ||
| 604 | else | ||
| 557 | /* quietly ignore unknown words and anything | 605 | /* quietly ignore unknown words and anything |
| 558 | * following. Newer user-space can try to set | 606 | * following. Newer user-space can try to set |
| 559 | * new values, then see what the result was. | 607 | * new values, then see what the result was. |
| @@ -593,6 +641,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
| 593 | 641 | ||
| 594 | static void exp_flags(struct seq_file *m, int flag, int fsid, | 642 | static void exp_flags(struct seq_file *m, int flag, int fsid, |
| 595 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); | 643 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); |
| 644 | static void show_secinfo(struct seq_file *m, struct svc_export *exp); | ||
| 596 | 645 | ||
| 597 | static int svc_export_show(struct seq_file *m, | 646 | static int svc_export_show(struct seq_file *m, |
| 598 | struct cache_detail *cd, | 647 | struct cache_detail *cd, |
| @@ -622,6 +671,7 @@ static int svc_export_show(struct seq_file *m, | |||
| 622 | seq_printf(m, "%02x", exp->ex_uuid[i]); | 671 | seq_printf(m, "%02x", exp->ex_uuid[i]); |
| 623 | } | 672 | } |
| 624 | } | 673 | } |
| 674 | show_secinfo(m, exp); | ||
| 625 | } | 675 | } |
| 626 | seq_puts(m, ")\n"); | 676 | seq_puts(m, ")\n"); |
| 627 | return 0; | 677 | return 0; |
| @@ -654,6 +704,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
| 654 | { | 704 | { |
| 655 | struct svc_export *new = container_of(cnew, struct svc_export, h); | 705 | struct svc_export *new = container_of(cnew, struct svc_export, h); |
| 656 | struct svc_export *item = container_of(citem, struct svc_export, h); | 706 | struct svc_export *item = container_of(citem, struct svc_export, h); |
| 707 | int i; | ||
| 657 | 708 | ||
| 658 | new->ex_flags = item->ex_flags; | 709 | new->ex_flags = item->ex_flags; |
| 659 | new->ex_anon_uid = item->ex_anon_uid; | 710 | new->ex_anon_uid = item->ex_anon_uid; |
| @@ -669,6 +720,10 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
| 669 | item->ex_fslocs.locations_count = 0; | 720 | item->ex_fslocs.locations_count = 0; |
| 670 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; | 721 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; |
| 671 | item->ex_fslocs.migrated = 0; | 722 | item->ex_fslocs.migrated = 0; |
| 723 | new->ex_nflavors = item->ex_nflavors; | ||
| 724 | for (i = 0; i < MAX_SECINFO_LIST; i++) { | ||
| 725 | new->ex_flavors[i] = item->ex_flavors[i]; | ||
| 726 | } | ||
| 672 | } | 727 | } |
| 673 | 728 | ||
| 674 | static struct cache_head *svc_export_alloc(void) | 729 | static struct cache_head *svc_export_alloc(void) |
| @@ -738,16 +793,18 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) | |||
| 738 | int err; | 793 | int err; |
| 739 | 794 | ||
| 740 | if (!clp) | 795 | if (!clp) |
| 741 | return NULL; | 796 | return ERR_PTR(-ENOENT); |
| 742 | 797 | ||
| 743 | key.ek_client = clp; | 798 | key.ek_client = clp; |
| 744 | key.ek_fsidtype = fsid_type; | 799 | key.ek_fsidtype = fsid_type; |
| 745 | memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); | 800 | memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); |
| 746 | 801 | ||
| 747 | ek = svc_expkey_lookup(&key); | 802 | ek = svc_expkey_lookup(&key); |
| 748 | if (ek != NULL) | 803 | if (ek == NULL) |
| 749 | if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp))) | 804 | return ERR_PTR(-ENOMEM); |
| 750 | ek = ERR_PTR(err); | 805 | err = cache_check(&svc_expkey_cache, &ek->h, reqp); |
| 806 | if (err) | ||
| 807 | return ERR_PTR(err); | ||
| 751 | return ek; | 808 | return ek; |
| 752 | } | 809 | } |
| 753 | 810 | ||
| @@ -808,30 +865,21 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, | |||
| 808 | struct cache_req *reqp) | 865 | struct cache_req *reqp) |
| 809 | { | 866 | { |
| 810 | struct svc_export *exp, key; | 867 | struct svc_export *exp, key; |
| 868 | int err; | ||
| 811 | 869 | ||
| 812 | if (!clp) | 870 | if (!clp) |
| 813 | return NULL; | 871 | return ERR_PTR(-ENOENT); |
| 814 | 872 | ||
| 815 | key.ex_client = clp; | 873 | key.ex_client = clp; |
| 816 | key.ex_mnt = mnt; | 874 | key.ex_mnt = mnt; |
| 817 | key.ex_dentry = dentry; | 875 | key.ex_dentry = dentry; |
| 818 | 876 | ||
| 819 | exp = svc_export_lookup(&key); | 877 | exp = svc_export_lookup(&key); |
| 820 | if (exp != NULL) { | 878 | if (exp == NULL) |
| 821 | int err; | 879 | return ERR_PTR(-ENOMEM); |
| 822 | 880 | err = cache_check(&svc_export_cache, &exp->h, reqp); | |
| 823 | err = cache_check(&svc_export_cache, &exp->h, reqp); | 881 | if (err) |
| 824 | switch (err) { | 882 | return ERR_PTR(err); |
| 825 | case 0: break; | ||
| 826 | case -EAGAIN: | ||
| 827 | case -ETIMEDOUT: | ||
| 828 | exp = ERR_PTR(err); | ||
| 829 | break; | ||
| 830 | default: | ||
| 831 | exp = NULL; | ||
| 832 | } | ||
| 833 | } | ||
| 834 | |||
| 835 | return exp; | 883 | return exp; |
| 836 | } | 884 | } |
| 837 | 885 | ||
| @@ -847,7 +895,7 @@ exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, | |||
| 847 | dget(dentry); | 895 | dget(dentry); |
| 848 | exp = exp_get_by_name(clp, mnt, dentry, reqp); | 896 | exp = exp_get_by_name(clp, mnt, dentry, reqp); |
| 849 | 897 | ||
| 850 | while (exp == NULL && !IS_ROOT(dentry)) { | 898 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { |
| 851 | struct dentry *parent; | 899 | struct dentry *parent; |
| 852 | 900 | ||
| 853 | parent = dget_parent(dentry); | 901 | parent = dget_parent(dentry); |
| @@ -900,7 +948,7 @@ static void exp_fsid_unhash(struct svc_export *exp) | |||
| 900 | return; | 948 | return; |
| 901 | 949 | ||
| 902 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); | 950 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); |
| 903 | if (ek && !IS_ERR(ek)) { | 951 | if (!IS_ERR(ek)) { |
| 904 | ek->h.expiry_time = get_seconds()-1; | 952 | ek->h.expiry_time = get_seconds()-1; |
| 905 | cache_put(&ek->h, &svc_expkey_cache); | 953 | cache_put(&ek->h, &svc_expkey_cache); |
| 906 | } | 954 | } |
| @@ -938,7 +986,7 @@ static void exp_unhash(struct svc_export *exp) | |||
| 938 | struct inode *inode = exp->ex_dentry->d_inode; | 986 | struct inode *inode = exp->ex_dentry->d_inode; |
| 939 | 987 | ||
| 940 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); | 988 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); |
| 941 | if (ek && !IS_ERR(ek)) { | 989 | if (!IS_ERR(ek)) { |
| 942 | ek->h.expiry_time = get_seconds()-1; | 990 | ek->h.expiry_time = get_seconds()-1; |
| 943 | cache_put(&ek->h, &svc_expkey_cache); | 991 | cache_put(&ek->h, &svc_expkey_cache); |
| 944 | } | 992 | } |
| @@ -989,13 +1037,12 @@ exp_export(struct nfsctl_export *nxp) | |||
| 989 | 1037 | ||
| 990 | /* must make sure there won't be an ex_fsid clash */ | 1038 | /* must make sure there won't be an ex_fsid clash */ |
| 991 | if ((nxp->ex_flags & NFSEXP_FSID) && | 1039 | if ((nxp->ex_flags & NFSEXP_FSID) && |
| 992 | (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) && | 1040 | (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && |
| 993 | !IS_ERR(fsid_key) && | ||
| 994 | fsid_key->ek_mnt && | 1041 | fsid_key->ek_mnt && |
| 995 | (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) | 1042 | (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) ) |
| 996 | goto finish; | 1043 | goto finish; |
| 997 | 1044 | ||
| 998 | if (exp) { | 1045 | if (!IS_ERR(exp)) { |
| 999 | /* just a flags/id/fsid update */ | 1046 | /* just a flags/id/fsid update */ |
| 1000 | 1047 | ||
| 1001 | exp_fsid_unhash(exp); | 1048 | exp_fsid_unhash(exp); |
| @@ -1104,7 +1151,7 @@ exp_unexport(struct nfsctl_export *nxp) | |||
| 1104 | err = -EINVAL; | 1151 | err = -EINVAL; |
| 1105 | exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); | 1152 | exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); |
| 1106 | path_release(&nd); | 1153 | path_release(&nd); |
| 1107 | if (!exp) | 1154 | if (IS_ERR(exp)) |
| 1108 | goto out_domain; | 1155 | goto out_domain; |
| 1109 | 1156 | ||
| 1110 | exp_do_unexport(exp); | 1157 | exp_do_unexport(exp); |
| @@ -1149,10 +1196,6 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) | |||
| 1149 | err = PTR_ERR(exp); | 1196 | err = PTR_ERR(exp); |
| 1150 | goto out; | 1197 | goto out; |
| 1151 | } | 1198 | } |
| 1152 | if (!exp) { | ||
| 1153 | dprintk("nfsd: exp_rootfh export not found.\n"); | ||
| 1154 | goto out; | ||
| 1155 | } | ||
| 1156 | 1199 | ||
| 1157 | /* | 1200 | /* |
| 1158 | * fh must be initialized before calling fh_compose | 1201 | * fh must be initialized before calling fh_compose |
| @@ -1176,17 +1219,130 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, | |||
| 1176 | { | 1219 | { |
| 1177 | struct svc_export *exp; | 1220 | struct svc_export *exp; |
| 1178 | struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); | 1221 | struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); |
| 1179 | if (!ek || IS_ERR(ek)) | 1222 | if (IS_ERR(ek)) |
| 1180 | return ERR_PTR(PTR_ERR(ek)); | 1223 | return ERR_PTR(PTR_ERR(ek)); |
| 1181 | 1224 | ||
| 1182 | exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); | 1225 | exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); |
| 1183 | cache_put(&ek->h, &svc_expkey_cache); | 1226 | cache_put(&ek->h, &svc_expkey_cache); |
| 1184 | 1227 | ||
| 1185 | if (!exp || IS_ERR(exp)) | 1228 | if (IS_ERR(exp)) |
| 1186 | return ERR_PTR(PTR_ERR(exp)); | 1229 | return ERR_PTR(PTR_ERR(exp)); |
| 1187 | return exp; | 1230 | return exp; |
| 1188 | } | 1231 | } |
| 1189 | 1232 | ||
| 1233 | __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) | ||
| 1234 | { | ||
| 1235 | struct exp_flavor_info *f; | ||
| 1236 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
| 1237 | |||
| 1238 | /* legacy gss-only clients are always OK: */ | ||
| 1239 | if (exp->ex_client == rqstp->rq_gssclient) | ||
| 1240 | return 0; | ||
| 1241 | /* ip-address based client; check sec= export option: */ | ||
| 1242 | for (f = exp->ex_flavors; f < end; f++) { | ||
| 1243 | if (f->pseudoflavor == rqstp->rq_flavor) | ||
| 1244 | return 0; | ||
| 1245 | } | ||
| 1246 | /* defaults in absence of sec= options: */ | ||
| 1247 | if (exp->ex_nflavors == 0) { | ||
| 1248 | if (rqstp->rq_flavor == RPC_AUTH_NULL || | ||
| 1249 | rqstp->rq_flavor == RPC_AUTH_UNIX) | ||
| 1250 | return 0; | ||
| 1251 | } | ||
| 1252 | return nfserr_wrongsec; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | /* | ||
| 1256 | * Uses rq_client and rq_gssclient to find an export; uses rq_client (an | ||
| 1257 | * auth_unix client) if it's available and has secinfo information; | ||
| 1258 | * otherwise, will try to use rq_gssclient. | ||
| 1259 | * | ||
| 1260 | * Called from functions that handle requests; functions that do work on | ||
| 1261 | * behalf of mountd are passed a single client name to use, and should | ||
| 1262 | * use exp_get_by_name() or exp_find(). | ||
| 1263 | */ | ||
| 1264 | struct svc_export * | ||
| 1265 | rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, | ||
| 1266 | struct dentry *dentry) | ||
| 1267 | { | ||
| 1268 | struct svc_export *gssexp, *exp = NULL; | ||
| 1269 | |||
| 1270 | if (rqstp->rq_client == NULL) | ||
| 1271 | goto gss; | ||
| 1272 | |||
| 1273 | /* First try the auth_unix client: */ | ||
| 1274 | exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, | ||
| 1275 | &rqstp->rq_chandle); | ||
| 1276 | if (PTR_ERR(exp) == -ENOENT) | ||
| 1277 | goto gss; | ||
| 1278 | if (IS_ERR(exp)) | ||
| 1279 | return exp; | ||
| 1280 | /* If it has secinfo, assume there are no gss/... clients */ | ||
| 1281 | if (exp->ex_nflavors > 0) | ||
| 1282 | return exp; | ||
| 1283 | gss: | ||
| 1284 | /* Otherwise, try falling back on gss client */ | ||
| 1285 | if (rqstp->rq_gssclient == NULL) | ||
| 1286 | return exp; | ||
| 1287 | gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, | ||
| 1288 | &rqstp->rq_chandle); | ||
| 1289 | if (PTR_ERR(gssexp) == -ENOENT) | ||
| 1290 | return exp; | ||
| 1291 | if (exp && !IS_ERR(exp)) | ||
| 1292 | exp_put(exp); | ||
| 1293 | return gssexp; | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | struct svc_export * | ||
| 1297 | rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) | ||
| 1298 | { | ||
| 1299 | struct svc_export *gssexp, *exp = NULL; | ||
| 1300 | |||
| 1301 | if (rqstp->rq_client == NULL) | ||
| 1302 | goto gss; | ||
| 1303 | |||
| 1304 | /* First try the auth_unix client: */ | ||
| 1305 | exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle); | ||
| 1306 | if (PTR_ERR(exp) == -ENOENT) | ||
| 1307 | goto gss; | ||
| 1308 | if (IS_ERR(exp)) | ||
| 1309 | return exp; | ||
| 1310 | /* If it has secinfo, assume there are no gss/... clients */ | ||
| 1311 | if (exp->ex_nflavors > 0) | ||
| 1312 | return exp; | ||
| 1313 | gss: | ||
| 1314 | /* Otherwise, try falling back on gss client */ | ||
| 1315 | if (rqstp->rq_gssclient == NULL) | ||
| 1316 | return exp; | ||
| 1317 | gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv, | ||
| 1318 | &rqstp->rq_chandle); | ||
| 1319 | if (PTR_ERR(gssexp) == -ENOENT) | ||
| 1320 | return exp; | ||
| 1321 | if (exp && !IS_ERR(exp)) | ||
| 1322 | exp_put(exp); | ||
| 1323 | return gssexp; | ||
| 1324 | } | ||
| 1325 | |||
| 1326 | struct svc_export * | ||
| 1327 | rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, | ||
| 1328 | struct dentry *dentry) | ||
| 1329 | { | ||
| 1330 | struct svc_export *exp; | ||
| 1331 | |||
| 1332 | dget(dentry); | ||
| 1333 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | ||
| 1334 | |||
| 1335 | while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { | ||
| 1336 | struct dentry *parent; | ||
| 1337 | |||
| 1338 | parent = dget_parent(dentry); | ||
| 1339 | dput(dentry); | ||
| 1340 | dentry = parent; | ||
| 1341 | exp = rqst_exp_get_by_name(rqstp, mnt, dentry); | ||
| 1342 | } | ||
| 1343 | dput(dentry); | ||
| 1344 | return exp; | ||
| 1345 | } | ||
| 1190 | 1346 | ||
| 1191 | /* | 1347 | /* |
| 1192 | * Called when we need the filehandle for the root of the pseudofs, | 1348 | * Called when we need the filehandle for the root of the pseudofs, |
| @@ -1194,8 +1350,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, | |||
| 1194 | * export point with fsid==0 | 1350 | * export point with fsid==0 |
| 1195 | */ | 1351 | */ |
| 1196 | __be32 | 1352 | __be32 |
| 1197 | exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, | 1353 | exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) |
| 1198 | struct cache_req *creq) | ||
| 1199 | { | 1354 | { |
| 1200 | struct svc_export *exp; | 1355 | struct svc_export *exp; |
| 1201 | __be32 rv; | 1356 | __be32 rv; |
| @@ -1203,12 +1358,16 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, | |||
| 1203 | 1358 | ||
| 1204 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); | 1359 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); |
| 1205 | 1360 | ||
| 1206 | exp = exp_find(clp, FSID_NUM, fsidv, creq); | 1361 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); |
| 1362 | if (PTR_ERR(exp) == -ENOENT) | ||
| 1363 | return nfserr_perm; | ||
| 1207 | if (IS_ERR(exp)) | 1364 | if (IS_ERR(exp)) |
| 1208 | return nfserrno(PTR_ERR(exp)); | 1365 | return nfserrno(PTR_ERR(exp)); |
| 1209 | if (exp == NULL) | ||
| 1210 | return nfserr_perm; | ||
| 1211 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); | 1366 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); |
| 1367 | if (rv) | ||
| 1368 | goto out; | ||
| 1369 | rv = check_nfsd_access(exp, rqstp); | ||
| 1370 | out: | ||
| 1212 | exp_put(exp); | 1371 | exp_put(exp); |
| 1213 | return rv; | 1372 | return rv; |
| 1214 | } | 1373 | } |
| @@ -1296,28 +1455,62 @@ static struct flags { | |||
| 1296 | { 0, {"", ""}} | 1455 | { 0, {"", ""}} |
| 1297 | }; | 1456 | }; |
| 1298 | 1457 | ||
| 1299 | static void exp_flags(struct seq_file *m, int flag, int fsid, | 1458 | static void show_expflags(struct seq_file *m, int flags, int mask) |
| 1300 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) | ||
| 1301 | { | 1459 | { |
| 1302 | int first = 0; | ||
| 1303 | struct flags *flg; | 1460 | struct flags *flg; |
| 1461 | int state, first = 0; | ||
| 1304 | 1462 | ||
| 1305 | for (flg = expflags; flg->flag; flg++) { | 1463 | for (flg = expflags; flg->flag; flg++) { |
| 1306 | int state = (flg->flag & flag)?0:1; | 1464 | if (flg->flag & ~mask) |
| 1465 | continue; | ||
| 1466 | state = (flg->flag & flags) ? 0 : 1; | ||
| 1307 | if (*flg->name[state]) | 1467 | if (*flg->name[state]) |
| 1308 | seq_printf(m, "%s%s", first++?",":"", flg->name[state]); | 1468 | seq_printf(m, "%s%s", first++?",":"", flg->name[state]); |
| 1309 | } | 1469 | } |
| 1470 | } | ||
| 1471 | |||
| 1472 | static void show_secinfo_flags(struct seq_file *m, int flags) | ||
| 1473 | { | ||
| 1474 | seq_printf(m, ","); | ||
| 1475 | show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); | ||
| 1476 | } | ||
| 1477 | |||
| 1478 | static void show_secinfo(struct seq_file *m, struct svc_export *exp) | ||
| 1479 | { | ||
| 1480 | struct exp_flavor_info *f; | ||
| 1481 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | ||
| 1482 | int lastflags = 0, first = 0; | ||
| 1483 | |||
| 1484 | if (exp->ex_nflavors == 0) | ||
| 1485 | return; | ||
| 1486 | for (f = exp->ex_flavors; f < end; f++) { | ||
| 1487 | if (first || f->flags != lastflags) { | ||
| 1488 | if (!first) | ||
| 1489 | show_secinfo_flags(m, lastflags); | ||
| 1490 | seq_printf(m, ",sec=%d", f->pseudoflavor); | ||
| 1491 | lastflags = f->flags; | ||
| 1492 | } else { | ||
| 1493 | seq_printf(m, ":%d", f->pseudoflavor); | ||
| 1494 | } | ||
| 1495 | } | ||
| 1496 | show_secinfo_flags(m, lastflags); | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | static void exp_flags(struct seq_file *m, int flag, int fsid, | ||
| 1500 | uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) | ||
| 1501 | { | ||
| 1502 | show_expflags(m, flag, NFSEXP_ALLFLAGS); | ||
| 1310 | if (flag & NFSEXP_FSID) | 1503 | if (flag & NFSEXP_FSID) |
| 1311 | seq_printf(m, "%sfsid=%d", first++?",":"", fsid); | 1504 | seq_printf(m, ",fsid=%d", fsid); |
| 1312 | if (anonu != (uid_t)-2 && anonu != (0x10000-2)) | 1505 | if (anonu != (uid_t)-2 && anonu != (0x10000-2)) |
| 1313 | seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); | 1506 | seq_printf(m, ",sanonuid=%d", anonu); |
| 1314 | if (anong != (gid_t)-2 && anong != (0x10000-2)) | 1507 | if (anong != (gid_t)-2 && anong != (0x10000-2)) |
| 1315 | seq_printf(m, "%sanongid=%d", first++?",":"", anong); | 1508 | seq_printf(m, ",sanongid=%d", anong); |
| 1316 | if (fsloc && fsloc->locations_count > 0) { | 1509 | if (fsloc && fsloc->locations_count > 0) { |
| 1317 | char *loctype = (fsloc->migrated) ? "refer" : "replicas"; | 1510 | char *loctype = (fsloc->migrated) ? "refer" : "replicas"; |
| 1318 | int i; | 1511 | int i; |
| 1319 | 1512 | ||
| 1320 | seq_printf(m, "%s%s=", first++?",":"", loctype); | 1513 | seq_printf(m, ",%s=", loctype); |
| 1321 | seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); | 1514 | seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); |
| 1322 | seq_putc(m, '@'); | 1515 | seq_putc(m, '@'); |
| 1323 | seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); | 1516 | seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); |
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 221acd1f11f6..9e4a568a5013 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c | |||
| @@ -65,6 +65,7 @@ nlm_fclose(struct file *filp) | |||
| 65 | static struct nlmsvc_binding nfsd_nlm_ops = { | 65 | static struct nlmsvc_binding nfsd_nlm_ops = { |
| 66 | .fopen = nlm_fopen, /* open file for locking */ | 66 | .fopen = nlm_fopen, /* open file for locking */ |
| 67 | .fclose = nlm_fclose, /* close file */ | 67 | .fclose = nlm_fclose, /* close file */ |
| 68 | .get_grace_period = get_nfs4_grace_period, | ||
| 68 | }; | 69 | }; |
| 69 | 70 | ||
| 70 | void | 71 | void |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index cc3b7badd486..b6ed38380ab8 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
| @@ -183,8 +183,13 @@ static void | |||
| 183 | summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) | 183 | summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) |
| 184 | { | 184 | { |
| 185 | struct posix_acl_entry *pa, *pe; | 185 | struct posix_acl_entry *pa, *pe; |
| 186 | pas->users = 0; | 186 | |
| 187 | pas->groups = 0; | 187 | /* |
| 188 | * Only pas.users and pas.groups need initialization; previous | ||
| 189 | * posix_acl_valid() calls ensure that the other fields will be | ||
| 190 | * initialized in the following loop. But, just to placate gcc: | ||
| 191 | */ | ||
| 192 | memset(pas, 0, sizeof(*pas)); | ||
| 188 | pas->mask = 07; | 193 | pas->mask = 07; |
| 189 | 194 | ||
| 190 | pe = acl->a_entries + acl->a_count; | 195 | pe = acl->a_entries + acl->a_count; |
| @@ -732,13 +737,16 @@ int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, | |||
| 732 | *pacl = posix_state_to_acl(&effective_acl_state, flags); | 737 | *pacl = posix_state_to_acl(&effective_acl_state, flags); |
| 733 | if (IS_ERR(*pacl)) { | 738 | if (IS_ERR(*pacl)) { |
| 734 | ret = PTR_ERR(*pacl); | 739 | ret = PTR_ERR(*pacl); |
| 740 | *pacl = NULL; | ||
| 735 | goto out_dstate; | 741 | goto out_dstate; |
| 736 | } | 742 | } |
| 737 | *dpacl = posix_state_to_acl(&default_acl_state, | 743 | *dpacl = posix_state_to_acl(&default_acl_state, |
| 738 | flags | NFS4_ACL_TYPE_DEFAULT); | 744 | flags | NFS4_ACL_TYPE_DEFAULT); |
| 739 | if (IS_ERR(*dpacl)) { | 745 | if (IS_ERR(*dpacl)) { |
| 740 | ret = PTR_ERR(*dpacl); | 746 | ret = PTR_ERR(*dpacl); |
| 747 | *dpacl = NULL; | ||
| 741 | posix_acl_release(*pacl); | 748 | posix_acl_release(*pacl); |
| 749 | *pacl = NULL; | ||
| 742 | goto out_dstate; | 750 | goto out_dstate; |
| 743 | } | 751 | } |
| 744 | sort_pacl(*pacl); | 752 | sort_pacl(*pacl); |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5443c52b57aa..31d6633c7fe4 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -75,7 +75,7 @@ enum nfs_cb_opnum4 { | |||
| 75 | #define op_enc_sz 1 | 75 | #define op_enc_sz 1 |
| 76 | #define op_dec_sz 2 | 76 | #define op_dec_sz 2 |
| 77 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) | 77 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) |
| 78 | #define enc_stateid_sz 16 | 78 | #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) |
| 79 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ | 79 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ |
| 80 | 1 + enc_stateid_sz + \ | 80 | 1 + enc_stateid_sz + \ |
| 81 | enc_nfs4_fh_sz) | 81 | enc_nfs4_fh_sz) |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 45aa21ce6784..2cf9a9a2d89c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
| @@ -587,6 +587,15 @@ idmap_lookup(struct svc_rqst *rqstp, | |||
| 587 | return ret; | 587 | return ret; |
| 588 | } | 588 | } |
| 589 | 589 | ||
| 590 | static char * | ||
| 591 | rqst_authname(struct svc_rqst *rqstp) | ||
| 592 | { | ||
| 593 | struct auth_domain *clp; | ||
| 594 | |||
| 595 | clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client; | ||
| 596 | return clp->name; | ||
| 597 | } | ||
| 598 | |||
| 590 | static int | 599 | static int |
| 591 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, | 600 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, |
| 592 | uid_t *id) | 601 | uid_t *id) |
| @@ -600,7 +609,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen | |||
| 600 | return -EINVAL; | 609 | return -EINVAL; |
| 601 | memcpy(key.name, name, namelen); | 610 | memcpy(key.name, name, namelen); |
| 602 | key.name[namelen] = '\0'; | 611 | key.name[namelen] = '\0'; |
| 603 | strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); | 612 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); |
| 604 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); | 613 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); |
| 605 | if (ret == -ENOENT) | 614 | if (ret == -ENOENT) |
| 606 | ret = -ESRCH; /* nfserr_badname */ | 615 | ret = -ESRCH; /* nfserr_badname */ |
| @@ -620,7 +629,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) | |||
| 620 | }; | 629 | }; |
| 621 | int ret; | 630 | int ret; |
| 622 | 631 | ||
| 623 | strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); | 632 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); |
| 624 | ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); | 633 | ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); |
| 625 | if (ret == -ENOENT) | 634 | if (ret == -ENOENT) |
| 626 | return sprintf(name, "%u", id); | 635 | return sprintf(name, "%u", id); |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8522729830db..3c627128e205 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <linux/nfsd/state.h> | 47 | #include <linux/nfsd/state.h> |
| 48 | #include <linux/nfsd/xdr4.h> | 48 | #include <linux/nfsd/xdr4.h> |
| 49 | #include <linux/nfs4_acl.h> | 49 | #include <linux/nfs4_acl.h> |
| 50 | #include <linux/sunrpc/gss_api.h> | ||
| 50 | 51 | ||
| 51 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 52 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
| 52 | 53 | ||
| @@ -286,8 +287,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 286 | __be32 status; | 287 | __be32 status; |
| 287 | 288 | ||
| 288 | fh_put(&cstate->current_fh); | 289 | fh_put(&cstate->current_fh); |
| 289 | status = exp_pseudoroot(rqstp->rq_client, &cstate->current_fh, | 290 | status = exp_pseudoroot(rqstp, &cstate->current_fh); |
| 290 | &rqstp->rq_chandle); | ||
| 291 | return status; | 291 | return status; |
| 292 | } | 292 | } |
| 293 | 293 | ||
| @@ -474,8 +474,8 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 474 | __be32 ret; | 474 | __be32 ret; |
| 475 | 475 | ||
| 476 | fh_init(&tmp_fh, NFS4_FHSIZE); | 476 | fh_init(&tmp_fh, NFS4_FHSIZE); |
| 477 | if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh, | 477 | ret = exp_pseudoroot(rqstp, &tmp_fh); |
| 478 | &rqstp->rq_chandle)) != 0) | 478 | if (ret) |
| 479 | return ret; | 479 | return ret; |
| 480 | if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { | 480 | if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { |
| 481 | fh_put(&tmp_fh); | 481 | fh_put(&tmp_fh); |
| @@ -611,6 +611,30 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 611 | } | 611 | } |
| 612 | 612 | ||
| 613 | static __be32 | 613 | static __be32 |
| 614 | nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
| 615 | struct nfsd4_secinfo *secinfo) | ||
| 616 | { | ||
| 617 | struct svc_fh resfh; | ||
| 618 | struct svc_export *exp; | ||
| 619 | struct dentry *dentry; | ||
| 620 | __be32 err; | ||
| 621 | |||
| 622 | fh_init(&resfh, NFS4_FHSIZE); | ||
| 623 | err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, | ||
| 624 | secinfo->si_name, secinfo->si_namelen, | ||
| 625 | &exp, &dentry); | ||
| 626 | if (err) | ||
| 627 | return err; | ||
| 628 | if (dentry->d_inode == NULL) { | ||
| 629 | exp_put(exp); | ||
| 630 | err = nfserr_noent; | ||
| 631 | } else | ||
| 632 | secinfo->si_exp = exp; | ||
| 633 | dput(dentry); | ||
| 634 | return err; | ||
| 635 | } | ||
| 636 | |||
| 637 | static __be32 | ||
| 614 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 638 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
| 615 | struct nfsd4_setattr *setattr) | 639 | struct nfsd4_setattr *setattr) |
| 616 | { | 640 | { |
| @@ -1009,6 +1033,9 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
| 1009 | [OP_SAVEFH] = { | 1033 | [OP_SAVEFH] = { |
| 1010 | .op_func = (nfsd4op_func)nfsd4_savefh, | 1034 | .op_func = (nfsd4op_func)nfsd4_savefh, |
| 1011 | }, | 1035 | }, |
| 1036 | [OP_SECINFO] = { | ||
| 1037 | .op_func = (nfsd4op_func)nfsd4_secinfo, | ||
| 1038 | }, | ||
| 1012 | [OP_SETATTR] = { | 1039 | [OP_SETATTR] = { |
| 1013 | .op_func = (nfsd4op_func)nfsd4_setattr, | 1040 | .op_func = (nfsd4op_func)nfsd4_setattr, |
| 1014 | }, | 1041 | }, |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8c52913d7cb6..e4a4c87ec8c6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -49,8 +49,10 @@ | |||
| 49 | #include <linux/nfsd/state.h> | 49 | #include <linux/nfsd/state.h> |
| 50 | #include <linux/nfsd/xdr4.h> | 50 | #include <linux/nfsd/xdr4.h> |
| 51 | #include <linux/namei.h> | 51 | #include <linux/namei.h> |
| 52 | #include <linux/swap.h> | ||
| 52 | #include <linux/mutex.h> | 53 | #include <linux/mutex.h> |
| 53 | #include <linux/lockd/bind.h> | 54 | #include <linux/lockd/bind.h> |
| 55 | #include <linux/module.h> | ||
| 54 | 56 | ||
| 55 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 57 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
| 56 | 58 | ||
| @@ -149,6 +151,7 @@ get_nfs4_file(struct nfs4_file *fi) | |||
| 149 | } | 151 | } |
| 150 | 152 | ||
| 151 | static int num_delegations; | 153 | static int num_delegations; |
| 154 | unsigned int max_delegations; | ||
| 152 | 155 | ||
| 153 | /* | 156 | /* |
| 154 | * Open owner state (share locks) | 157 | * Open owner state (share locks) |
| @@ -192,7 +195,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
| 192 | struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; | 195 | struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; |
| 193 | 196 | ||
| 194 | dprintk("NFSD alloc_init_deleg\n"); | 197 | dprintk("NFSD alloc_init_deleg\n"); |
| 195 | if (num_delegations > STATEID_HASH_SIZE * 4) | 198 | if (fp->fi_had_conflict) |
| 199 | return NULL; | ||
| 200 | if (num_delegations > max_delegations) | ||
| 196 | return NULL; | 201 | return NULL; |
| 197 | dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); | 202 | dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); |
| 198 | if (dp == NULL) | 203 | if (dp == NULL) |
| @@ -999,6 +1004,7 @@ alloc_init_file(struct inode *ino) | |||
| 999 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | 1004 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
| 1000 | fp->fi_inode = igrab(ino); | 1005 | fp->fi_inode = igrab(ino); |
| 1001 | fp->fi_id = current_fileid++; | 1006 | fp->fi_id = current_fileid++; |
| 1007 | fp->fi_had_conflict = false; | ||
| 1002 | return fp; | 1008 | return fp; |
| 1003 | } | 1009 | } |
| 1004 | return NULL; | 1010 | return NULL; |
| @@ -1325,6 +1331,7 @@ do_recall(void *__dp) | |||
| 1325 | { | 1331 | { |
| 1326 | struct nfs4_delegation *dp = __dp; | 1332 | struct nfs4_delegation *dp = __dp; |
| 1327 | 1333 | ||
| 1334 | dp->dl_file->fi_had_conflict = true; | ||
| 1328 | nfsd4_cb_recall(dp); | 1335 | nfsd4_cb_recall(dp); |
| 1329 | return 0; | 1336 | return 0; |
| 1330 | } | 1337 | } |
| @@ -3190,20 +3197,49 @@ nfsd4_load_reboot_recovery_data(void) | |||
| 3190 | printk("NFSD: Failure reading reboot recovery data\n"); | 3197 | printk("NFSD: Failure reading reboot recovery data\n"); |
| 3191 | } | 3198 | } |
| 3192 | 3199 | ||
| 3200 | unsigned long | ||
| 3201 | get_nfs4_grace_period(void) | ||
| 3202 | { | ||
| 3203 | return max(user_lease_time, lease_time) * HZ; | ||
| 3204 | } | ||
| 3205 | |||
| 3206 | /* | ||
| 3207 | * Since the lifetime of a delegation isn't limited to that of an open, a | ||
| 3208 | * client may quite reasonably hang on to a delegation as long as it has | ||
| 3209 | * the inode cached. This becomes an obvious problem the first time a | ||
| 3210 | * client's inode cache approaches the size of the server's total memory. | ||
| 3211 | * | ||
| 3212 | * For now we avoid this problem by imposing a hard limit on the number | ||
| 3213 | * of delegations, which varies according to the server's memory size. | ||
| 3214 | */ | ||
| 3215 | static void | ||
| 3216 | set_max_delegations(void) | ||
| 3217 | { | ||
| 3218 | /* | ||
| 3219 | * Allow at most 4 delegations per megabyte of RAM. Quick | ||
| 3220 | * estimates suggest that in the worst case (where every delegation | ||
| 3221 | * is for a different inode), a delegation could take about 1.5K, | ||
| 3222 | * giving a worst case usage of about 6% of memory. | ||
| 3223 | */ | ||
| 3224 | max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); | ||
| 3225 | } | ||
| 3226 | |||
| 3193 | /* initialization to perform when the nfsd service is started: */ | 3227 | /* initialization to perform when the nfsd service is started: */ |
| 3194 | 3228 | ||
| 3195 | static void | 3229 | static void |
| 3196 | __nfs4_state_start(void) | 3230 | __nfs4_state_start(void) |
| 3197 | { | 3231 | { |
| 3198 | time_t grace_time; | 3232 | unsigned long grace_time; |
| 3199 | 3233 | ||
| 3200 | boot_time = get_seconds(); | 3234 | boot_time = get_seconds(); |
| 3201 | grace_time = max(user_lease_time, lease_time); | 3235 | grace_time = get_nfs_grace_period(); |
| 3202 | lease_time = user_lease_time; | 3236 | lease_time = user_lease_time; |
| 3203 | in_grace = 1; | 3237 | in_grace = 1; |
| 3204 | printk("NFSD: starting %ld-second grace period\n", grace_time); | 3238 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
| 3239 | grace_time/HZ); | ||
| 3205 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 3240 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
| 3206 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ); | 3241 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time); |
| 3242 | set_max_delegations(); | ||
| 3207 | } | 3243 | } |
| 3208 | 3244 | ||
| 3209 | int | 3245 | int |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15809dfd88a5..b3d55c6747fd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -56,6 +56,8 @@ | |||
| 56 | #include <linux/nfsd_idmap.h> | 56 | #include <linux/nfsd_idmap.h> |
| 57 | #include <linux/nfs4.h> | 57 | #include <linux/nfs4.h> |
| 58 | #include <linux/nfs4_acl.h> | 58 | #include <linux/nfs4_acl.h> |
| 59 | #include <linux/sunrpc/gss_api.h> | ||
| 60 | #include <linux/sunrpc/svcauth_gss.h> | ||
| 59 | 61 | ||
| 60 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 62 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
| 61 | 63 | ||
| @@ -819,6 +821,23 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) | |||
| 819 | } | 821 | } |
| 820 | 822 | ||
| 821 | static __be32 | 823 | static __be32 |
| 824 | nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | ||
| 825 | struct nfsd4_secinfo *secinfo) | ||
| 826 | { | ||
| 827 | DECODE_HEAD; | ||
| 828 | |||
| 829 | READ_BUF(4); | ||
| 830 | READ32(secinfo->si_namelen); | ||
| 831 | READ_BUF(secinfo->si_namelen); | ||
| 832 | SAVEMEM(secinfo->si_name, secinfo->si_namelen); | ||
| 833 | status = check_filename(secinfo->si_name, secinfo->si_namelen, | ||
| 834 | nfserr_noent); | ||
| 835 | if (status) | ||
| 836 | return status; | ||
| 837 | DECODE_TAIL; | ||
| 838 | } | ||
| 839 | |||
| 840 | static __be32 | ||
| 822 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 841 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
| 823 | { | 842 | { |
| 824 | DECODE_HEAD; | 843 | DECODE_HEAD; |
| @@ -1131,6 +1150,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
| 1131 | case OP_SAVEFH: | 1150 | case OP_SAVEFH: |
| 1132 | op->status = nfs_ok; | 1151 | op->status = nfs_ok; |
| 1133 | break; | 1152 | break; |
| 1153 | case OP_SECINFO: | ||
| 1154 | op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo); | ||
| 1155 | break; | ||
| 1134 | case OP_SETATTR: | 1156 | case OP_SETATTR: |
| 1135 | op->status = nfsd4_decode_setattr(argp, &op->u.setattr); | 1157 | op->status = nfsd4_decode_setattr(argp, &op->u.setattr); |
| 1136 | break; | 1158 | break; |
| @@ -1296,7 +1318,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * | |||
| 1296 | char *path, *rootpath; | 1318 | char *path, *rootpath; |
| 1297 | 1319 | ||
| 1298 | fh_init(&tmp_fh, NFS4_FHSIZE); | 1320 | fh_init(&tmp_fh, NFS4_FHSIZE); |
| 1299 | *stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle); | 1321 | *stat = exp_pseudoroot(rqstp, &tmp_fh); |
| 1300 | if (*stat) | 1322 | if (*stat) |
| 1301 | return NULL; | 1323 | return NULL; |
| 1302 | rootpath = tmp_fh.fh_export->ex_path; | 1324 | rootpath = tmp_fh.fh_export->ex_path; |
| @@ -1847,11 +1869,19 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
| 1847 | if (d_mountpoint(dentry)) { | 1869 | if (d_mountpoint(dentry)) { |
| 1848 | int err; | 1870 | int err; |
| 1849 | 1871 | ||
| 1872 | /* | ||
| 1873 | * Why the heck aren't we just using nfsd_lookup?? | ||
| 1874 | * Different "."/".." handling? Something else? | ||
| 1875 | * At least, add a comment here to explain.... | ||
| 1876 | */ | ||
| 1850 | err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp); | 1877 | err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp); |
| 1851 | if (err) { | 1878 | if (err) { |
| 1852 | nfserr = nfserrno(err); | 1879 | nfserr = nfserrno(err); |
| 1853 | goto out_put; | 1880 | goto out_put; |
| 1854 | } | 1881 | } |
| 1882 | nfserr = check_nfsd_access(exp, cd->rd_rqstp); | ||
| 1883 | if (nfserr) | ||
| 1884 | goto out_put; | ||
| 1855 | 1885 | ||
| 1856 | } | 1886 | } |
| 1857 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, | 1887 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, |
| @@ -2419,6 +2449,72 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ | |||
| 2419 | } | 2449 | } |
| 2420 | } | 2450 | } |
| 2421 | 2451 | ||
| 2452 | static void | ||
| 2453 | nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, int nfserr, | ||
| 2454 | struct nfsd4_secinfo *secinfo) | ||
| 2455 | { | ||
| 2456 | int i = 0; | ||
| 2457 | struct svc_export *exp = secinfo->si_exp; | ||
| 2458 | u32 nflavs; | ||
| 2459 | struct exp_flavor_info *flavs; | ||
| 2460 | struct exp_flavor_info def_flavs[2]; | ||
| 2461 | ENCODE_HEAD; | ||
| 2462 | |||
| 2463 | if (nfserr) | ||
| 2464 | goto out; | ||
| 2465 | if (exp->ex_nflavors) { | ||
| 2466 | flavs = exp->ex_flavors; | ||
| 2467 | nflavs = exp->ex_nflavors; | ||
| 2468 | } else { /* Handling of some defaults in absence of real secinfo: */ | ||
| 2469 | flavs = def_flavs; | ||
| 2470 | if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) { | ||
| 2471 | nflavs = 2; | ||
| 2472 | flavs[0].pseudoflavor = RPC_AUTH_UNIX; | ||
| 2473 | flavs[1].pseudoflavor = RPC_AUTH_NULL; | ||
| 2474 | } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) { | ||
| 2475 | nflavs = 1; | ||
| 2476 | flavs[0].pseudoflavor | ||
| 2477 | = svcauth_gss_flavor(exp->ex_client); | ||
| 2478 | } else { | ||
| 2479 | nflavs = 1; | ||
| 2480 | flavs[0].pseudoflavor | ||
| 2481 | = exp->ex_client->flavour->flavour; | ||
| 2482 | } | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | RESERVE_SPACE(4); | ||
| 2486 | WRITE32(nflavs); | ||
| 2487 | ADJUST_ARGS(); | ||
| 2488 | for (i = 0; i < nflavs; i++) { | ||
| 2489 | u32 flav = flavs[i].pseudoflavor; | ||
| 2490 | struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); | ||
| 2491 | |||
| 2492 | if (gm) { | ||
| 2493 | RESERVE_SPACE(4); | ||
| 2494 | WRITE32(RPC_AUTH_GSS); | ||
| 2495 | ADJUST_ARGS(); | ||
| 2496 | RESERVE_SPACE(4 + gm->gm_oid.len); | ||
| 2497 | WRITE32(gm->gm_oid.len); | ||
| 2498 | WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); | ||
| 2499 | ADJUST_ARGS(); | ||
| 2500 | RESERVE_SPACE(4); | ||
| 2501 | WRITE32(0); /* qop */ | ||
| 2502 | ADJUST_ARGS(); | ||
| 2503 | RESERVE_SPACE(4); | ||
| 2504 | WRITE32(gss_pseudoflavor_to_service(gm, flav)); | ||
| 2505 | ADJUST_ARGS(); | ||
| 2506 | gss_mech_put(gm); | ||
| 2507 | } else { | ||
| 2508 | RESERVE_SPACE(4); | ||
| 2509 | WRITE32(flav); | ||
| 2510 | ADJUST_ARGS(); | ||
| 2511 | } | ||
| 2512 | } | ||
| 2513 | out: | ||
| 2514 | if (exp) | ||
| 2515 | exp_put(exp); | ||
| 2516 | } | ||
| 2517 | |||
| 2422 | /* | 2518 | /* |
| 2423 | * The SETATTR encode routine is special -- it always encodes a bitmap, | 2519 | * The SETATTR encode routine is special -- it always encodes a bitmap, |
| 2424 | * regardless of the error status. | 2520 | * regardless of the error status. |
| @@ -2559,6 +2655,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | |||
| 2559 | break; | 2655 | break; |
| 2560 | case OP_SAVEFH: | 2656 | case OP_SAVEFH: |
| 2561 | break; | 2657 | break; |
| 2658 | case OP_SECINFO: | ||
| 2659 | nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo); | ||
| 2660 | break; | ||
| 2562 | case OP_SETATTR: | 2661 | case OP_SETATTR: |
| 2563 | nfsd4_encode_setattr(resp, op->status, &op->u.setattr); | 2662 | nfsd4_encode_setattr(resp, op->status, &op->u.setattr); |
| 2564 | break; | 2663 | break; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 71c686dc7257..baac89d917ca 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | #include <linux/nfsd/cache.h> | 35 | #include <linux/nfsd/cache.h> |
| 36 | #include <linux/nfsd/xdr.h> | 36 | #include <linux/nfsd/xdr.h> |
| 37 | #include <linux/nfsd/syscall.h> | 37 | #include <linux/nfsd/syscall.h> |
| 38 | #include <linux/nfsd/interface.h> | ||
| 39 | 38 | ||
| 40 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
| 41 | 40 | ||
| @@ -245,7 +244,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) | |||
| 245 | } | 244 | } |
| 246 | exp_readunlock(); | 245 | exp_readunlock(); |
| 247 | if (err == 0) | 246 | if (err == 0) |
| 248 | err = res->fh_size + (int)&((struct knfsd_fh*)0)->fh_base; | 247 | err = res->fh_size + offsetof(struct knfsd_fh, fh_base); |
| 249 | out: | 248 | out: |
| 250 | return err; | 249 | return err; |
| 251 | } | 250 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6ca2d24fc216..0eb464a39aae 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
| @@ -15,10 +15,12 @@ | |||
| 15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
| 16 | #include <linux/stat.h> | 16 | #include <linux/stat.h> |
| 17 | #include <linux/dcache.h> | 17 | #include <linux/dcache.h> |
| 18 | #include <linux/exportfs.h> | ||
| 18 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
| 19 | 20 | ||
| 20 | #include <linux/sunrpc/clnt.h> | 21 | #include <linux/sunrpc/clnt.h> |
| 21 | #include <linux/sunrpc/svc.h> | 22 | #include <linux/sunrpc/svc.h> |
| 23 | #include <linux/sunrpc/svcauth_gss.h> | ||
| 22 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
| 23 | 25 | ||
| 24 | #define NFSDDBG_FACILITY NFSDDBG_FH | 26 | #define NFSDDBG_FACILITY NFSDDBG_FH |
| @@ -27,10 +29,6 @@ | |||
| 27 | static int nfsd_nr_verified; | 29 | static int nfsd_nr_verified; |
| 28 | static int nfsd_nr_put; | 30 | static int nfsd_nr_put; |
| 29 | 31 | ||
| 30 | extern struct export_operations export_op_default; | ||
| 31 | |||
| 32 | #define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun) | ||
| 33 | |||
| 34 | /* | 32 | /* |
| 35 | * our acceptability function. | 33 | * our acceptability function. |
| 36 | * if NOSUBTREECHECK, accept anything | 34 | * if NOSUBTREECHECK, accept anything |
| @@ -123,8 +121,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 123 | int data_left = fh->fh_size/4; | 121 | int data_left = fh->fh_size/4; |
| 124 | 122 | ||
| 125 | error = nfserr_stale; | 123 | error = nfserr_stale; |
| 126 | if (rqstp->rq_client == NULL) | ||
| 127 | goto out; | ||
| 128 | if (rqstp->rq_vers > 2) | 124 | if (rqstp->rq_vers > 2) |
| 129 | error = nfserr_badhandle; | 125 | error = nfserr_badhandle; |
| 130 | if (rqstp->rq_vers == 4 && fh->fh_size == 0) | 126 | if (rqstp->rq_vers == 4 && fh->fh_size == 0) |
| @@ -148,7 +144,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 148 | fh->fh_fsid[1] = fh->fh_fsid[2]; | 144 | fh->fh_fsid[1] = fh->fh_fsid[2]; |
| 149 | } | 145 | } |
| 150 | if ((data_left -= len)<0) goto out; | 146 | if ((data_left -= len)<0) goto out; |
| 151 | exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle); | 147 | exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap); |
| 152 | datap += len; | 148 | datap += len; |
| 153 | } else { | 149 | } else { |
| 154 | dev_t xdev; | 150 | dev_t xdev; |
| @@ -159,19 +155,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 159 | xdev = old_decode_dev(fh->ofh_xdev); | 155 | xdev = old_decode_dev(fh->ofh_xdev); |
| 160 | xino = u32_to_ino_t(fh->ofh_xino); | 156 | xino = u32_to_ino_t(fh->ofh_xino); |
| 161 | mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); | 157 | mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); |
| 162 | exp = exp_find(rqstp->rq_client, FSID_DEV, tfh, | 158 | exp = rqst_exp_find(rqstp, FSID_DEV, tfh); |
| 163 | &rqstp->rq_chandle); | ||
| 164 | } | 159 | } |
| 165 | 160 | ||
| 166 | if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN | 161 | error = nfserr_stale; |
| 167 | || PTR_ERR(exp) == -ETIMEDOUT)) { | 162 | if (PTR_ERR(exp) == -ENOENT) |
| 168 | error = nfserrno(PTR_ERR(exp)); | ||
| 169 | goto out; | 163 | goto out; |
| 170 | } | ||
| 171 | 164 | ||
| 172 | error = nfserr_stale; | 165 | if (IS_ERR(exp)) { |
| 173 | if (!exp || IS_ERR(exp)) | 166 | error = nfserrno(PTR_ERR(exp)); |
| 174 | goto out; | 167 | goto out; |
| 168 | } | ||
| 175 | 169 | ||
| 176 | /* Check if the request originated from a secure port. */ | 170 | /* Check if the request originated from a secure port. */ |
| 177 | error = nfserr_perm; | 171 | error = nfserr_perm; |
| @@ -211,11 +205,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 211 | if (fileid_type == 0) | 205 | if (fileid_type == 0) |
| 212 | dentry = dget(exp->ex_dentry); | 206 | dentry = dget(exp->ex_dentry); |
| 213 | else { | 207 | else { |
| 214 | struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; | 208 | dentry = exportfs_decode_fh(exp->ex_mnt, datap, |
| 215 | dentry = CALL(nop,decode_fh)(exp->ex_mnt->mnt_sb, | 209 | data_left, fileid_type, |
| 216 | datap, data_left, | 210 | nfsd_acceptable, exp); |
| 217 | fileid_type, | ||
| 218 | nfsd_acceptable, exp); | ||
| 219 | } | 211 | } |
| 220 | if (dentry == NULL) | 212 | if (dentry == NULL) |
| 221 | goto out; | 213 | goto out; |
| @@ -257,8 +249,19 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 257 | if (error) | 249 | if (error) |
| 258 | goto out; | 250 | goto out; |
| 259 | 251 | ||
| 252 | if (!(access & MAY_LOCK)) { | ||
| 253 | /* | ||
| 254 | * pseudoflavor restrictions are not enforced on NLM, | ||
| 255 | * which clients virtually always use auth_sys for, | ||
| 256 | * even while using RPCSEC_GSS for NFS. | ||
| 257 | */ | ||
| 258 | error = check_nfsd_access(exp, rqstp); | ||
| 259 | if (error) | ||
| 260 | goto out; | ||
| 261 | } | ||
| 262 | |||
| 260 | /* Finally, check access permissions. */ | 263 | /* Finally, check access permissions. */ |
| 261 | error = nfsd_permission(exp, dentry, access); | 264 | error = nfsd_permission(rqstp, exp, dentry, access); |
| 262 | 265 | ||
| 263 | if (error) { | 266 | if (error) { |
| 264 | dprintk("fh_verify: %s/%s permission failure, " | 267 | dprintk("fh_verify: %s/%s permission failure, " |
| @@ -286,15 +289,13 @@ out: | |||
| 286 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, | 289 | static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, |
| 287 | __u32 *datap, int *maxsize) | 290 | __u32 *datap, int *maxsize) |
| 288 | { | 291 | { |
| 289 | struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; | ||
| 290 | |||
| 291 | if (dentry == exp->ex_dentry) { | 292 | if (dentry == exp->ex_dentry) { |
| 292 | *maxsize = 0; | 293 | *maxsize = 0; |
| 293 | return 0; | 294 | return 0; |
| 294 | } | 295 | } |
| 295 | 296 | ||
| 296 | return CALL(nop,encode_fh)(dentry, datap, maxsize, | 297 | return exportfs_encode_fh(dentry, datap, maxsize, |
| 297 | !(exp->ex_flags&NFSEXP_NOSUBTREECHECK)); | 298 | !(exp->ex_flags & NFSEXP_NOSUBTREECHECK)); |
| 298 | } | 299 | } |
| 299 | 300 | ||
| 300 | /* | 301 | /* |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b2c7147aa921..977a71f64e19 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
| @@ -278,7 +278,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
| 278 | * echo thing > device-special-file-or-pipe | 278 | * echo thing > device-special-file-or-pipe |
| 279 | * by doing a CREATE with type==0 | 279 | * by doing a CREATE with type==0 |
| 280 | */ | 280 | */ |
| 281 | nfserr = nfsd_permission(newfhp->fh_export, | 281 | nfserr = nfsd_permission(rqstp, |
| 282 | newfhp->fh_export, | ||
| 282 | newfhp->fh_dentry, | 283 | newfhp->fh_dentry, |
| 283 | MAY_WRITE|MAY_LOCAL_ACCESS); | 284 | MAY_WRITE|MAY_LOCAL_ACCESS); |
| 284 | if (nfserr && nfserr != nfserr_rofs) | 285 | if (nfserr && nfserr != nfserr_rofs) |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ff55950efb43..a8c89ae4c743 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
| 21 | #include <linux/smp_lock.h> | 21 | #include <linux/smp_lock.h> |
| 22 | #include <linux/freezer.h> | ||
| 22 | #include <linux/fs_struct.h> | 23 | #include <linux/fs_struct.h> |
| 23 | 24 | ||
| 24 | #include <linux/sunrpc/types.h> | 25 | #include <linux/sunrpc/types.h> |
| @@ -432,6 +433,7 @@ nfsd(struct svc_rqst *rqstp) | |||
| 432 | * dirty pages. | 433 | * dirty pages. |
| 433 | */ | 434 | */ |
| 434 | current->flags |= PF_LESS_THROTTLE; | 435 | current->flags |= PF_LESS_THROTTLE; |
| 436 | set_freezable(); | ||
| 435 | 437 | ||
| 436 | /* | 438 | /* |
| 437 | * The main request loop | 439 | * The main request loop |
| @@ -492,6 +494,15 @@ out: | |||
| 492 | module_put_and_exit(0); | 494 | module_put_and_exit(0); |
| 493 | } | 495 | } |
| 494 | 496 | ||
| 497 | static __be32 map_new_errors(u32 vers, __be32 nfserr) | ||
| 498 | { | ||
| 499 | if (nfserr == nfserr_jukebox && vers == 2) | ||
| 500 | return nfserr_dropit; | ||
| 501 | if (nfserr == nfserr_wrongsec && vers < 4) | ||
| 502 | return nfserr_acces; | ||
| 503 | return nfserr; | ||
| 504 | } | ||
| 505 | |||
| 495 | int | 506 | int |
| 496 | nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | 507 | nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) |
| 497 | { | 508 | { |
| @@ -534,6 +545,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
| 534 | 545 | ||
| 535 | /* Now call the procedure handler, and encode NFS status. */ | 546 | /* Now call the procedure handler, and encode NFS status. */ |
| 536 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 547 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
| 548 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | ||
| 537 | if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2) | 549 | if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2) |
| 538 | nfserr = nfserr_dropit; | 550 | nfserr = nfserr_dropit; |
| 539 | if (nfserr == nfserr_dropit) { | 551 | if (nfserr == nfserr_dropit) { |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 945b1cedde2b..e90f4a8a1d01 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -113,7 +113,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
| 113 | 113 | ||
| 114 | while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); | 114 | while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); |
| 115 | 115 | ||
| 116 | exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle); | 116 | exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); |
| 117 | if (IS_ERR(exp2)) { | 117 | if (IS_ERR(exp2)) { |
| 118 | err = PTR_ERR(exp2); | 118 | err = PTR_ERR(exp2); |
| 119 | dput(mounts); | 119 | dput(mounts); |
| @@ -135,21 +135,10 @@ out: | |||
| 135 | return err; | 135 | return err; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /* | ||
| 139 | * Look up one component of a pathname. | ||
| 140 | * N.B. After this call _both_ fhp and resfh need an fh_put | ||
| 141 | * | ||
| 142 | * If the lookup would cross a mountpoint, and the mounted filesystem | ||
| 143 | * is exported to the client with NFSEXP_NOHIDE, then the lookup is | ||
| 144 | * accepted as it stands and the mounted directory is | ||
| 145 | * returned. Otherwise the covered directory is returned. | ||
| 146 | * NOTE: this mountpoint crossing is not supported properly by all | ||
| 147 | * clients and is explicitly disallowed for NFSv3 | ||
| 148 | * NeilBrown <neilb@cse.unsw.edu.au> | ||
| 149 | */ | ||
| 150 | __be32 | 138 | __be32 |
| 151 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | 139 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, |
| 152 | int len, struct svc_fh *resfh) | 140 | const char *name, int len, |
| 141 | struct svc_export **exp_ret, struct dentry **dentry_ret) | ||
| 153 | { | 142 | { |
| 154 | struct svc_export *exp; | 143 | struct svc_export *exp; |
| 155 | struct dentry *dparent; | 144 | struct dentry *dparent; |
| @@ -168,8 +157,6 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
| 168 | exp = fhp->fh_export; | 157 | exp = fhp->fh_export; |
| 169 | exp_get(exp); | 158 | exp_get(exp); |
| 170 | 159 | ||
| 171 | err = nfserr_acces; | ||
| 172 | |||
| 173 | /* Lookup the name, but don't follow links */ | 160 | /* Lookup the name, but don't follow links */ |
| 174 | if (isdotent(name, len)) { | 161 | if (isdotent(name, len)) { |
| 175 | if (len==1) | 162 | if (len==1) |
| @@ -190,17 +177,15 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
| 190 | dput(dentry); | 177 | dput(dentry); |
| 191 | dentry = dp; | 178 | dentry = dp; |
| 192 | 179 | ||
| 193 | exp2 = exp_parent(exp->ex_client, mnt, dentry, | 180 | exp2 = rqst_exp_parent(rqstp, mnt, dentry); |
| 194 | &rqstp->rq_chandle); | 181 | if (PTR_ERR(exp2) == -ENOENT) { |
| 195 | if (IS_ERR(exp2)) { | 182 | dput(dentry); |
| 183 | dentry = dget(dparent); | ||
| 184 | } else if (IS_ERR(exp2)) { | ||
| 196 | host_err = PTR_ERR(exp2); | 185 | host_err = PTR_ERR(exp2); |
| 197 | dput(dentry); | 186 | dput(dentry); |
| 198 | mntput(mnt); | 187 | mntput(mnt); |
| 199 | goto out_nfserr; | 188 | goto out_nfserr; |
| 200 | } | ||
| 201 | if (!exp2) { | ||
| 202 | dput(dentry); | ||
| 203 | dentry = dget(dparent); | ||
| 204 | } else { | 189 | } else { |
| 205 | exp_put(exp); | 190 | exp_put(exp); |
| 206 | exp = exp2; | 191 | exp = exp2; |
| @@ -223,6 +208,41 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
| 223 | } | 208 | } |
| 224 | } | 209 | } |
| 225 | } | 210 | } |
| 211 | *dentry_ret = dentry; | ||
| 212 | *exp_ret = exp; | ||
| 213 | return 0; | ||
| 214 | |||
| 215 | out_nfserr: | ||
| 216 | exp_put(exp); | ||
| 217 | return nfserrno(host_err); | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * Look up one component of a pathname. | ||
| 222 | * N.B. After this call _both_ fhp and resfh need an fh_put | ||
| 223 | * | ||
| 224 | * If the lookup would cross a mountpoint, and the mounted filesystem | ||
| 225 | * is exported to the client with NFSEXP_NOHIDE, then the lookup is | ||
| 226 | * accepted as it stands and the mounted directory is | ||
| 227 | * returned. Otherwise the covered directory is returned. | ||
| 228 | * NOTE: this mountpoint crossing is not supported properly by all | ||
| 229 | * clients and is explicitly disallowed for NFSv3 | ||
| 230 | * NeilBrown <neilb@cse.unsw.edu.au> | ||
| 231 | */ | ||
| 232 | __be32 | ||
| 233 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | ||
| 234 | int len, struct svc_fh *resfh) | ||
| 235 | { | ||
| 236 | struct svc_export *exp; | ||
| 237 | struct dentry *dentry; | ||
| 238 | __be32 err; | ||
| 239 | |||
| 240 | err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); | ||
| 241 | if (err) | ||
| 242 | return err; | ||
| 243 | err = check_nfsd_access(exp, rqstp); | ||
| 244 | if (err) | ||
| 245 | goto out; | ||
| 226 | /* | 246 | /* |
| 227 | * Note: we compose the file handle now, but as the | 247 | * Note: we compose the file handle now, but as the |
| 228 | * dentry may be negative, it may need to be updated. | 248 | * dentry may be negative, it may need to be updated. |
| @@ -230,16 +250,13 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
| 230 | err = fh_compose(resfh, exp, dentry, fhp); | 250 | err = fh_compose(resfh, exp, dentry, fhp); |
| 231 | if (!err && !dentry->d_inode) | 251 | if (!err && !dentry->d_inode) |
| 232 | err = nfserr_noent; | 252 | err = nfserr_noent; |
| 233 | dput(dentry); | ||
| 234 | out: | 253 | out: |
| 254 | dput(dentry); | ||
| 235 | exp_put(exp); | 255 | exp_put(exp); |
| 236 | return err; | 256 | return err; |
| 237 | |||
| 238 | out_nfserr: | ||
| 239 | err = nfserrno(host_err); | ||
| 240 | goto out; | ||
| 241 | } | 257 | } |
| 242 | 258 | ||
| 259 | |||
| 243 | /* | 260 | /* |
| 244 | * Set various file attributes. | 261 | * Set various file attributes. |
| 245 | * N.B. After this call fhp needs an fh_put | 262 | * N.B. After this call fhp needs an fh_put |
| @@ -311,7 +328,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
| 311 | /* The size case is special. It changes the file as well as the attributes. */ | 328 | /* The size case is special. It changes the file as well as the attributes. */ |
| 312 | if (iap->ia_valid & ATTR_SIZE) { | 329 | if (iap->ia_valid & ATTR_SIZE) { |
| 313 | if (iap->ia_size < inode->i_size) { | 330 | if (iap->ia_size < inode->i_size) { |
| 314 | err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); | 331 | err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); |
| 315 | if (err) | 332 | if (err) |
| 316 | goto out; | 333 | goto out; |
| 317 | } | 334 | } |
| @@ -435,7 +452,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 435 | /* Get inode */ | 452 | /* Get inode */ |
| 436 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); | 453 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); |
| 437 | if (error) | 454 | if (error) |
| 438 | goto out; | 455 | return error; |
| 439 | 456 | ||
| 440 | dentry = fhp->fh_dentry; | 457 | dentry = fhp->fh_dentry; |
| 441 | inode = dentry->d_inode; | 458 | inode = dentry->d_inode; |
| @@ -444,33 +461,25 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 444 | 461 | ||
| 445 | host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); | 462 | host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); |
| 446 | if (host_error == -EINVAL) { | 463 | if (host_error == -EINVAL) { |
| 447 | error = nfserr_attrnotsupp; | 464 | return nfserr_attrnotsupp; |
| 448 | goto out; | ||
| 449 | } else if (host_error < 0) | 465 | } else if (host_error < 0) |
| 450 | goto out_nfserr; | 466 | goto out_nfserr; |
| 451 | 467 | ||
| 452 | host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); | 468 | host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); |
| 453 | if (host_error < 0) | 469 | if (host_error < 0) |
| 454 | goto out_nfserr; | 470 | goto out_release; |
| 455 | 471 | ||
| 456 | if (S_ISDIR(inode->i_mode)) { | 472 | if (S_ISDIR(inode->i_mode)) |
| 457 | host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); | 473 | host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); |
| 458 | if (host_error < 0) | ||
| 459 | goto out_nfserr; | ||
| 460 | } | ||
| 461 | |||
| 462 | error = nfs_ok; | ||
| 463 | 474 | ||
| 464 | out: | 475 | out_release: |
| 465 | posix_acl_release(pacl); | 476 | posix_acl_release(pacl); |
| 466 | posix_acl_release(dpacl); | 477 | posix_acl_release(dpacl); |
| 467 | return (error); | ||
| 468 | out_nfserr: | 478 | out_nfserr: |
| 469 | if (host_error == -EOPNOTSUPP) | 479 | if (host_error == -EOPNOTSUPP) |
| 470 | error = nfserr_attrnotsupp; | 480 | return nfserr_attrnotsupp; |
| 471 | else | 481 | else |
| 472 | error = nfserrno(host_error); | 482 | return nfserrno(host_error); |
| 473 | goto out; | ||
| 474 | } | 483 | } |
| 475 | 484 | ||
| 476 | static struct posix_acl * | 485 | static struct posix_acl * |
| @@ -607,7 +616,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor | |||
| 607 | 616 | ||
| 608 | sresult |= map->access; | 617 | sresult |= map->access; |
| 609 | 618 | ||
| 610 | err2 = nfsd_permission(export, dentry, map->how); | 619 | err2 = nfsd_permission(rqstp, export, dentry, map->how); |
| 611 | switch (err2) { | 620 | switch (err2) { |
| 612 | case nfs_ok: | 621 | case nfs_ok: |
| 613 | result |= map->access; | 622 | result |= map->access; |
| @@ -1034,7 +1043,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
| 1034 | __be32 err; | 1043 | __be32 err; |
| 1035 | 1044 | ||
| 1036 | if (file) { | 1045 | if (file) { |
| 1037 | err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, | 1046 | err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, |
| 1038 | MAY_READ|MAY_OWNER_OVERRIDE); | 1047 | MAY_READ|MAY_OWNER_OVERRIDE); |
| 1039 | if (err) | 1048 | if (err) |
| 1040 | goto out; | 1049 | goto out; |
| @@ -1063,7 +1072,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
| 1063 | __be32 err = 0; | 1072 | __be32 err = 0; |
| 1064 | 1073 | ||
| 1065 | if (file) { | 1074 | if (file) { |
| 1066 | err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, | 1075 | err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, |
| 1067 | MAY_WRITE|MAY_OWNER_OVERRIDE); | 1076 | MAY_WRITE|MAY_OWNER_OVERRIDE); |
| 1068 | if (err) | 1077 | if (err) |
| 1069 | goto out; | 1078 | goto out; |
| @@ -1792,7 +1801,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) | |||
| 1792 | * Check for a user's access permissions to this inode. | 1801 | * Check for a user's access permissions to this inode. |
| 1793 | */ | 1802 | */ |
| 1794 | __be32 | 1803 | __be32 |
| 1795 | nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | 1804 | nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, |
| 1805 | struct dentry *dentry, int acc) | ||
| 1796 | { | 1806 | { |
| 1797 | struct inode *inode = dentry->d_inode; | 1807 | struct inode *inode = dentry->d_inode; |
| 1798 | int err; | 1808 | int err; |
| @@ -1823,7 +1833,7 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | |||
| 1823 | */ | 1833 | */ |
| 1824 | if (!(acc & MAY_LOCAL_ACCESS)) | 1834 | if (!(acc & MAY_LOCAL_ACCESS)) |
| 1825 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { | 1835 | if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { |
| 1826 | if (EX_RDONLY(exp) || IS_RDONLY(inode)) | 1836 | if (EX_RDONLY(exp, rqstp) || IS_RDONLY(inode)) |
| 1827 | return nfserr_rofs; | 1837 | return nfserr_rofs; |
| 1828 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) | 1838 | if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) |
| 1829 | return nfserr_perm; | 1839 | return nfserr_perm; |
diff --git a/fs/nls/Makefile b/fs/nls/Makefile index a7ade138d684..f499dd7c3905 100644 --- a/fs/nls/Makefile +++ b/fs/nls/Makefile | |||
| @@ -36,11 +36,9 @@ obj-$(CONFIG_NLS_ISO8859_6) += nls_iso8859-6.o | |||
| 36 | obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o | 36 | obj-$(CONFIG_NLS_ISO8859_7) += nls_iso8859-7.o |
| 37 | obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o | 37 | obj-$(CONFIG_NLS_ISO8859_8) += nls_cp1255.o |
| 38 | obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o | 38 | obj-$(CONFIG_NLS_ISO8859_9) += nls_iso8859-9.o |
| 39 | obj-$(CONFIG_NLS_ISO8859_10) += nls_iso8859-10.o | ||
| 40 | obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o | 39 | obj-$(CONFIG_NLS_ISO8859_13) += nls_iso8859-13.o |
| 41 | obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o | 40 | obj-$(CONFIG_NLS_ISO8859_14) += nls_iso8859-14.o |
| 42 | obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o | 41 | obj-$(CONFIG_NLS_ISO8859_15) += nls_iso8859-15.o |
| 43 | obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o | 42 | obj-$(CONFIG_NLS_KOI8_R) += nls_koi8-r.o |
| 44 | obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o | 43 | obj-$(CONFIG_NLS_KOI8_U) += nls_koi8-u.o nls_koi8-ru.o |
| 45 | obj-$(CONFIG_NLS_ABC) += nls_abc.o | ||
| 46 | obj-$(CONFIG_NLS_UTF8) += nls_utf8.o | 44 | obj-$(CONFIG_NLS_UTF8) += nls_utf8.o |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index bff01a54675a..e93c6142b23c 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/dcache.h> | 23 | #include <linux/dcache.h> |
| 24 | #include <linux/exportfs.h> | ||
| 24 | #include <linux/security.h> | 25 | #include <linux/security.h> |
| 25 | 26 | ||
| 26 | #include "attrib.h" | 27 | #include "attrib.h" |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 19712a7d145f..f5e11f4fa952 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -50,6 +50,8 @@ | |||
| 50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
| 51 | 51 | ||
| 52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
| 53 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
| 54 | struct ocfs2_extent_block *eb); | ||
| 53 | 55 | ||
| 54 | /* | 56 | /* |
| 55 | * Structures which describe a path through a btree, and functions to | 57 | * Structures which describe a path through a btree, and functions to |
| @@ -117,6 +119,31 @@ static void ocfs2_free_path(struct ocfs2_path *path) | |||
| 117 | } | 119 | } |
| 118 | 120 | ||
| 119 | /* | 121 | /* |
| 122 | * All the elements of src into dest. After this call, src could be freed | ||
| 123 | * without affecting dest. | ||
| 124 | * | ||
| 125 | * Both paths should have the same root. Any non-root elements of dest | ||
| 126 | * will be freed. | ||
| 127 | */ | ||
| 128 | static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) | ||
| 129 | { | ||
| 130 | int i; | ||
| 131 | |||
| 132 | BUG_ON(path_root_bh(dest) != path_root_bh(src)); | ||
| 133 | BUG_ON(path_root_el(dest) != path_root_el(src)); | ||
| 134 | |||
| 135 | ocfs2_reinit_path(dest, 1); | ||
| 136 | |||
| 137 | for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { | ||
| 138 | dest->p_node[i].bh = src->p_node[i].bh; | ||
| 139 | dest->p_node[i].el = src->p_node[i].el; | ||
| 140 | |||
| 141 | if (dest->p_node[i].bh) | ||
| 142 | get_bh(dest->p_node[i].bh); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | /* | ||
| 120 | * Make the *dest path the same as src and re-initialize src path to | 147 | * Make the *dest path the same as src and re-initialize src path to |
| 121 | * have a root only. | 148 | * have a root only. |
| 122 | */ | 149 | */ |
| @@ -212,10 +239,41 @@ out: | |||
| 212 | return ret; | 239 | return ret; |
| 213 | } | 240 | } |
| 214 | 241 | ||
| 242 | /* | ||
| 243 | * Return the index of the extent record which contains cluster #v_cluster. | ||
| 244 | * -1 is returned if it was not found. | ||
| 245 | * | ||
| 246 | * Should work fine on interior and exterior nodes. | ||
| 247 | */ | ||
| 248 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) | ||
| 249 | { | ||
| 250 | int ret = -1; | ||
| 251 | int i; | ||
| 252 | struct ocfs2_extent_rec *rec; | ||
| 253 | u32 rec_end, rec_start, clusters; | ||
| 254 | |||
| 255 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
| 256 | rec = &el->l_recs[i]; | ||
| 257 | |||
| 258 | rec_start = le32_to_cpu(rec->e_cpos); | ||
| 259 | clusters = ocfs2_rec_clusters(el, rec); | ||
| 260 | |||
| 261 | rec_end = rec_start + clusters; | ||
| 262 | |||
| 263 | if (v_cluster >= rec_start && v_cluster < rec_end) { | ||
| 264 | ret = i; | ||
| 265 | break; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | return ret; | ||
| 270 | } | ||
| 271 | |||
| 215 | enum ocfs2_contig_type { | 272 | enum ocfs2_contig_type { |
| 216 | CONTIG_NONE = 0, | 273 | CONTIG_NONE = 0, |
| 217 | CONTIG_LEFT, | 274 | CONTIG_LEFT, |
| 218 | CONTIG_RIGHT | 275 | CONTIG_RIGHT, |
| 276 | CONTIG_LEFTRIGHT, | ||
| 219 | }; | 277 | }; |
| 220 | 278 | ||
| 221 | 279 | ||
| @@ -253,6 +311,14 @@ static enum ocfs2_contig_type | |||
| 253 | { | 311 | { |
| 254 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); | 312 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); |
| 255 | 313 | ||
| 314 | /* | ||
| 315 | * Refuse to coalesce extent records with different flag | ||
| 316 | * fields - we don't want to mix unwritten extents with user | ||
| 317 | * data. | ||
| 318 | */ | ||
| 319 | if (ext->e_flags != insert_rec->e_flags) | ||
| 320 | return CONTIG_NONE; | ||
| 321 | |||
| 256 | if (ocfs2_extents_adjacent(ext, insert_rec) && | 322 | if (ocfs2_extents_adjacent(ext, insert_rec) && |
| 257 | ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) | 323 | ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) |
| 258 | return CONTIG_RIGHT; | 324 | return CONTIG_RIGHT; |
| @@ -277,7 +343,14 @@ enum ocfs2_append_type { | |||
| 277 | APPEND_TAIL, | 343 | APPEND_TAIL, |
| 278 | }; | 344 | }; |
| 279 | 345 | ||
| 346 | enum ocfs2_split_type { | ||
| 347 | SPLIT_NONE = 0, | ||
| 348 | SPLIT_LEFT, | ||
| 349 | SPLIT_RIGHT, | ||
| 350 | }; | ||
| 351 | |||
| 280 | struct ocfs2_insert_type { | 352 | struct ocfs2_insert_type { |
| 353 | enum ocfs2_split_type ins_split; | ||
| 281 | enum ocfs2_append_type ins_appending; | 354 | enum ocfs2_append_type ins_appending; |
| 282 | enum ocfs2_contig_type ins_contig; | 355 | enum ocfs2_contig_type ins_contig; |
| 283 | int ins_contig_index; | 356 | int ins_contig_index; |
| @@ -285,6 +358,13 @@ struct ocfs2_insert_type { | |||
| 285 | int ins_tree_depth; | 358 | int ins_tree_depth; |
| 286 | }; | 359 | }; |
| 287 | 360 | ||
| 361 | struct ocfs2_merge_ctxt { | ||
| 362 | enum ocfs2_contig_type c_contig_type; | ||
| 363 | int c_has_empty_extent; | ||
| 364 | int c_split_covers_rec; | ||
| 365 | int c_used_tail_recs; | ||
| 366 | }; | ||
| 367 | |||
| 288 | /* | 368 | /* |
| 289 | * How many free extents have we got before we need more meta data? | 369 | * How many free extents have we got before we need more meta data? |
| 290 | */ | 370 | */ |
| @@ -384,13 +464,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
| 384 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); | 464 | strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); |
| 385 | eb->h_blkno = cpu_to_le64(first_blkno); | 465 | eb->h_blkno = cpu_to_le64(first_blkno); |
| 386 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); | 466 | eb->h_fs_generation = cpu_to_le32(osb->fs_generation); |
| 387 | |||
| 388 | #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS | ||
| 389 | /* we always use slot zero's suballocator */ | ||
| 390 | eb->h_suballoc_slot = 0; | ||
| 391 | #else | ||
| 392 | eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); | 467 | eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); |
| 393 | #endif | ||
| 394 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); | 468 | eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); |
| 395 | eb->h_list.l_count = | 469 | eb->h_list.l_count = |
| 396 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); | 470 | cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); |
| @@ -461,7 +535,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 461 | struct inode *inode, | 535 | struct inode *inode, |
| 462 | struct buffer_head *fe_bh, | 536 | struct buffer_head *fe_bh, |
| 463 | struct buffer_head *eb_bh, | 537 | struct buffer_head *eb_bh, |
| 464 | struct buffer_head *last_eb_bh, | 538 | struct buffer_head **last_eb_bh, |
| 465 | struct ocfs2_alloc_context *meta_ac) | 539 | struct ocfs2_alloc_context *meta_ac) |
| 466 | { | 540 | { |
| 467 | int status, new_blocks, i; | 541 | int status, new_blocks, i; |
| @@ -476,7 +550,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 476 | 550 | ||
| 477 | mlog_entry_void(); | 551 | mlog_entry_void(); |
| 478 | 552 | ||
| 479 | BUG_ON(!last_eb_bh); | 553 | BUG_ON(!last_eb_bh || !*last_eb_bh); |
| 480 | 554 | ||
| 481 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 555 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
| 482 | 556 | ||
| @@ -507,7 +581,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 507 | goto bail; | 581 | goto bail; |
| 508 | } | 582 | } |
| 509 | 583 | ||
| 510 | eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; | 584 | eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; |
| 511 | new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); | 585 | new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); |
| 512 | 586 | ||
| 513 | /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be | 587 | /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be |
| @@ -568,7 +642,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 568 | * journal_dirty erroring as it won't unless we've aborted the | 642 | * journal_dirty erroring as it won't unless we've aborted the |
| 569 | * handle (in which case we would never be here) so reserving | 643 | * handle (in which case we would never be here) so reserving |
| 570 | * the write with journal_access is all we need to do. */ | 644 | * the write with journal_access is all we need to do. */ |
| 571 | status = ocfs2_journal_access(handle, inode, last_eb_bh, | 645 | status = ocfs2_journal_access(handle, inode, *last_eb_bh, |
| 572 | OCFS2_JOURNAL_ACCESS_WRITE); | 646 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 573 | if (status < 0) { | 647 | if (status < 0) { |
| 574 | mlog_errno(status); | 648 | mlog_errno(status); |
| @@ -601,10 +675,10 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 601 | * next_leaf on the previously last-extent-block. */ | 675 | * next_leaf on the previously last-extent-block. */ |
| 602 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); | 676 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); |
| 603 | 677 | ||
| 604 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | 678 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; |
| 605 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); | 679 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); |
| 606 | 680 | ||
| 607 | status = ocfs2_journal_dirty(handle, last_eb_bh); | 681 | status = ocfs2_journal_dirty(handle, *last_eb_bh); |
| 608 | if (status < 0) | 682 | if (status < 0) |
| 609 | mlog_errno(status); | 683 | mlog_errno(status); |
| 610 | status = ocfs2_journal_dirty(handle, fe_bh); | 684 | status = ocfs2_journal_dirty(handle, fe_bh); |
| @@ -616,6 +690,14 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 616 | mlog_errno(status); | 690 | mlog_errno(status); |
| 617 | } | 691 | } |
| 618 | 692 | ||
| 693 | /* | ||
| 694 | * Some callers want to track the rightmost leaf so pass it | ||
| 695 | * back here. | ||
| 696 | */ | ||
| 697 | brelse(*last_eb_bh); | ||
| 698 | get_bh(new_eb_bhs[0]); | ||
| 699 | *last_eb_bh = new_eb_bhs[0]; | ||
| 700 | |||
| 619 | status = 0; | 701 | status = 0; |
| 620 | bail: | 702 | bail: |
| 621 | if (new_eb_bhs) { | 703 | if (new_eb_bhs) { |
| @@ -829,6 +911,87 @@ bail: | |||
| 829 | } | 911 | } |
| 830 | 912 | ||
| 831 | /* | 913 | /* |
| 914 | * Grow a b-tree so that it has more records. | ||
| 915 | * | ||
| 916 | * We might shift the tree depth in which case existing paths should | ||
| 917 | * be considered invalid. | ||
| 918 | * | ||
| 919 | * Tree depth after the grow is returned via *final_depth. | ||
| 920 | * | ||
| 921 | * *last_eb_bh will be updated by ocfs2_add_branch(). | ||
| 922 | */ | ||
| 923 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | ||
| 924 | struct buffer_head *di_bh, int *final_depth, | ||
| 925 | struct buffer_head **last_eb_bh, | ||
| 926 | struct ocfs2_alloc_context *meta_ac) | ||
| 927 | { | ||
| 928 | int ret, shift; | ||
| 929 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 930 | int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); | ||
| 931 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 932 | struct buffer_head *bh = NULL; | ||
| 933 | |||
| 934 | BUG_ON(meta_ac == NULL); | ||
| 935 | |||
| 936 | shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); | ||
| 937 | if (shift < 0) { | ||
| 938 | ret = shift; | ||
| 939 | mlog_errno(ret); | ||
| 940 | goto out; | ||
| 941 | } | ||
| 942 | |||
| 943 | /* We traveled all the way to the bottom of the allocation tree | ||
| 944 | * and didn't find room for any more extents - we need to add | ||
| 945 | * another tree level */ | ||
| 946 | if (shift) { | ||
| 947 | BUG_ON(bh); | ||
| 948 | mlog(0, "need to shift tree depth (current = %d)\n", depth); | ||
| 949 | |||
| 950 | /* ocfs2_shift_tree_depth will return us a buffer with | ||
| 951 | * the new extent block (so we can pass that to | ||
| 952 | * ocfs2_add_branch). */ | ||
| 953 | ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, | ||
| 954 | meta_ac, &bh); | ||
| 955 | if (ret < 0) { | ||
| 956 | mlog_errno(ret); | ||
| 957 | goto out; | ||
| 958 | } | ||
| 959 | depth++; | ||
| 960 | if (depth == 1) { | ||
| 961 | /* | ||
| 962 | * Special case: we have room now if we shifted from | ||
| 963 | * tree_depth 0, so no more work needs to be done. | ||
| 964 | * | ||
| 965 | * We won't be calling add_branch, so pass | ||
| 966 | * back *last_eb_bh as the new leaf. At depth | ||
| 967 | * zero, it should always be null so there's | ||
| 968 | * no reason to brelse. | ||
| 969 | */ | ||
| 970 | BUG_ON(*last_eb_bh); | ||
| 971 | get_bh(bh); | ||
| 972 | *last_eb_bh = bh; | ||
| 973 | goto out; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | /* call ocfs2_add_branch to add the final part of the tree with | ||
| 978 | * the new data. */ | ||
| 979 | mlog(0, "add branch. bh = %p\n", bh); | ||
| 980 | ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, | ||
| 981 | meta_ac); | ||
| 982 | if (ret < 0) { | ||
| 983 | mlog_errno(ret); | ||
| 984 | goto out; | ||
| 985 | } | ||
| 986 | |||
| 987 | out: | ||
| 988 | if (final_depth) | ||
| 989 | *final_depth = depth; | ||
| 990 | brelse(bh); | ||
| 991 | return ret; | ||
| 992 | } | ||
| 993 | |||
| 994 | /* | ||
| 832 | * This is only valid for leaf nodes, which are the only ones that can | 995 | * This is only valid for leaf nodes, which are the only ones that can |
| 833 | * have empty extents anyway. | 996 | * have empty extents anyway. |
| 834 | */ | 997 | */ |
| @@ -934,6 +1097,22 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, | |||
| 934 | 1097 | ||
| 935 | } | 1098 | } |
| 936 | 1099 | ||
| 1100 | static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el) | ||
| 1101 | { | ||
| 1102 | int size, num_recs = le16_to_cpu(el->l_next_free_rec); | ||
| 1103 | |||
| 1104 | BUG_ON(num_recs == 0); | ||
| 1105 | |||
| 1106 | if (ocfs2_is_empty_extent(&el->l_recs[0])) { | ||
| 1107 | num_recs--; | ||
| 1108 | size = num_recs * sizeof(struct ocfs2_extent_rec); | ||
| 1109 | memmove(&el->l_recs[0], &el->l_recs[1], size); | ||
| 1110 | memset(&el->l_recs[num_recs], 0, | ||
| 1111 | sizeof(struct ocfs2_extent_rec)); | ||
| 1112 | el->l_next_free_rec = cpu_to_le16(num_recs); | ||
| 1113 | } | ||
| 1114 | } | ||
| 1115 | |||
| 937 | /* | 1116 | /* |
| 938 | * Create an empty extent record . | 1117 | * Create an empty extent record . |
| 939 | * | 1118 | * |
| @@ -1211,6 +1390,10 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, | |||
| 1211 | * immediately to their right. | 1390 | * immediately to their right. |
| 1212 | */ | 1391 | */ |
| 1213 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); | 1392 | left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); |
| 1393 | if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { | ||
| 1394 | BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); | ||
| 1395 | left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); | ||
| 1396 | } | ||
| 1214 | left_clusters -= le32_to_cpu(left_rec->e_cpos); | 1397 | left_clusters -= le32_to_cpu(left_rec->e_cpos); |
| 1215 | left_rec->e_int_clusters = cpu_to_le32(left_clusters); | 1398 | left_rec->e_int_clusters = cpu_to_le32(left_clusters); |
| 1216 | 1399 | ||
| @@ -1531,10 +1714,16 @@ out: | |||
| 1531 | return ret; | 1714 | return ret; |
| 1532 | } | 1715 | } |
| 1533 | 1716 | ||
| 1717 | /* | ||
| 1718 | * Extend the transaction by enough credits to complete the rotation, | ||
| 1719 | * and still leave at least the original number of credits allocated | ||
| 1720 | * to this transaction. | ||
| 1721 | */ | ||
| 1534 | static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, | 1722 | static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, |
| 1723 | int op_credits, | ||
| 1535 | struct ocfs2_path *path) | 1724 | struct ocfs2_path *path) |
| 1536 | { | 1725 | { |
| 1537 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; | 1726 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; |
| 1538 | 1727 | ||
| 1539 | if (handle->h_buffer_credits < credits) | 1728 | if (handle->h_buffer_credits < credits) |
| 1540 | return ocfs2_extend_trans(handle, credits); | 1729 | return ocfs2_extend_trans(handle, credits); |
| @@ -1568,6 +1757,29 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, | |||
| 1568 | return 0; | 1757 | return 0; |
| 1569 | } | 1758 | } |
| 1570 | 1759 | ||
| 1760 | static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) | ||
| 1761 | { | ||
| 1762 | int next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 1763 | unsigned int range; | ||
| 1764 | struct ocfs2_extent_rec *rec; | ||
| 1765 | |||
| 1766 | if (next_free == 0) | ||
| 1767 | return 0; | ||
| 1768 | |||
| 1769 | rec = &el->l_recs[0]; | ||
| 1770 | if (ocfs2_is_empty_extent(rec)) { | ||
| 1771 | /* Empty list. */ | ||
| 1772 | if (next_free == 1) | ||
| 1773 | return 0; | ||
| 1774 | rec = &el->l_recs[1]; | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
| 1778 | if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) | ||
| 1779 | return 1; | ||
| 1780 | return 0; | ||
| 1781 | } | ||
| 1782 | |||
| 1571 | /* | 1783 | /* |
| 1572 | * Rotate all the records in a btree right one record, starting at insert_cpos. | 1784 | * Rotate all the records in a btree right one record, starting at insert_cpos. |
| 1573 | * | 1785 | * |
| @@ -1586,11 +1798,12 @@ static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, | |||
| 1586 | */ | 1798 | */ |
| 1587 | static int ocfs2_rotate_tree_right(struct inode *inode, | 1799 | static int ocfs2_rotate_tree_right(struct inode *inode, |
| 1588 | handle_t *handle, | 1800 | handle_t *handle, |
| 1801 | enum ocfs2_split_type split, | ||
| 1589 | u32 insert_cpos, | 1802 | u32 insert_cpos, |
| 1590 | struct ocfs2_path *right_path, | 1803 | struct ocfs2_path *right_path, |
| 1591 | struct ocfs2_path **ret_left_path) | 1804 | struct ocfs2_path **ret_left_path) |
| 1592 | { | 1805 | { |
| 1593 | int ret, start; | 1806 | int ret, start, orig_credits = handle->h_buffer_credits; |
| 1594 | u32 cpos; | 1807 | u32 cpos; |
| 1595 | struct ocfs2_path *left_path = NULL; | 1808 | struct ocfs2_path *left_path = NULL; |
| 1596 | 1809 | ||
| @@ -1657,9 +1870,9 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
| 1657 | (unsigned long long) | 1870 | (unsigned long long) |
| 1658 | path_leaf_bh(left_path)->b_blocknr); | 1871 | path_leaf_bh(left_path)->b_blocknr); |
| 1659 | 1872 | ||
| 1660 | if (ocfs2_rotate_requires_path_adjustment(left_path, | 1873 | if (split == SPLIT_NONE && |
| 1874 | ocfs2_rotate_requires_path_adjustment(left_path, | ||
| 1661 | insert_cpos)) { | 1875 | insert_cpos)) { |
| 1662 | mlog(0, "Path adjustment required\n"); | ||
| 1663 | 1876 | ||
| 1664 | /* | 1877 | /* |
| 1665 | * We've rotated the tree as much as we | 1878 | * We've rotated the tree as much as we |
| @@ -1687,7 +1900,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
| 1687 | right_path->p_tree_depth); | 1900 | right_path->p_tree_depth); |
| 1688 | 1901 | ||
| 1689 | ret = ocfs2_extend_rotate_transaction(handle, start, | 1902 | ret = ocfs2_extend_rotate_transaction(handle, start, |
| 1690 | right_path); | 1903 | orig_credits, right_path); |
| 1691 | if (ret) { | 1904 | if (ret) { |
| 1692 | mlog_errno(ret); | 1905 | mlog_errno(ret); |
| 1693 | goto out; | 1906 | goto out; |
| @@ -1700,6 +1913,24 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
| 1700 | goto out; | 1913 | goto out; |
| 1701 | } | 1914 | } |
| 1702 | 1915 | ||
| 1916 | if (split != SPLIT_NONE && | ||
| 1917 | ocfs2_leftmost_rec_contains(path_leaf_el(right_path), | ||
| 1918 | insert_cpos)) { | ||
| 1919 | /* | ||
| 1920 | * A rotate moves the rightmost left leaf | ||
| 1921 | * record over to the leftmost right leaf | ||
| 1922 | * slot. If we're doing an extent split | ||
| 1923 | * instead of a real insert, then we have to | ||
| 1924 | * check that the extent to be split wasn't | ||
| 1925 | * just moved over. If it was, then we can | ||
| 1926 | * exit here, passing left_path back - | ||
| 1927 | * ocfs2_split_extent() is smart enough to | ||
| 1928 | * search both leaves. | ||
| 1929 | */ | ||
| 1930 | *ret_left_path = left_path; | ||
| 1931 | goto out_ret_path; | ||
| 1932 | } | ||
| 1933 | |||
| 1703 | /* | 1934 | /* |
| 1704 | * There is no need to re-read the next right path | 1935 | * There is no need to re-read the next right path |
| 1705 | * as we know that it'll be our current left | 1936 | * as we know that it'll be our current left |
| @@ -1722,6 +1953,1031 @@ out_ret_path: | |||
| 1722 | return ret; | 1953 | return ret; |
| 1723 | } | 1954 | } |
| 1724 | 1955 | ||
| 1956 | static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | ||
| 1957 | struct ocfs2_path *path) | ||
| 1958 | { | ||
| 1959 | int i, idx; | ||
| 1960 | struct ocfs2_extent_rec *rec; | ||
| 1961 | struct ocfs2_extent_list *el; | ||
| 1962 | struct ocfs2_extent_block *eb; | ||
| 1963 | u32 range; | ||
| 1964 | |||
| 1965 | /* Path should always be rightmost. */ | ||
| 1966 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
| 1967 | BUG_ON(eb->h_next_leaf_blk != 0ULL); | ||
| 1968 | |||
| 1969 | el = &eb->h_list; | ||
| 1970 | BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); | ||
| 1971 | idx = le16_to_cpu(el->l_next_free_rec) - 1; | ||
| 1972 | rec = &el->l_recs[idx]; | ||
| 1973 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
| 1974 | |||
| 1975 | for (i = 0; i < path->p_tree_depth; i++) { | ||
| 1976 | el = path->p_node[i].el; | ||
| 1977 | idx = le16_to_cpu(el->l_next_free_rec) - 1; | ||
| 1978 | rec = &el->l_recs[idx]; | ||
| 1979 | |||
| 1980 | rec->e_int_clusters = cpu_to_le32(range); | ||
| 1981 | le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); | ||
| 1982 | |||
| 1983 | ocfs2_journal_dirty(handle, path->p_node[i].bh); | ||
| 1984 | } | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | ||
| 1988 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
| 1989 | struct ocfs2_path *path, int unlink_start) | ||
| 1990 | { | ||
| 1991 | int ret, i; | ||
| 1992 | struct ocfs2_extent_block *eb; | ||
| 1993 | struct ocfs2_extent_list *el; | ||
| 1994 | struct buffer_head *bh; | ||
| 1995 | |||
| 1996 | for(i = unlink_start; i < path_num_items(path); i++) { | ||
| 1997 | bh = path->p_node[i].bh; | ||
| 1998 | |||
| 1999 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
| 2000 | /* | ||
| 2001 | * Not all nodes might have had their final count | ||
| 2002 | * decremented by the caller - handle this here. | ||
| 2003 | */ | ||
| 2004 | el = &eb->h_list; | ||
| 2005 | if (le16_to_cpu(el->l_next_free_rec) > 1) { | ||
| 2006 | mlog(ML_ERROR, | ||
| 2007 | "Inode %llu, attempted to remove extent block " | ||
| 2008 | "%llu with %u records\n", | ||
| 2009 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 2010 | (unsigned long long)le64_to_cpu(eb->h_blkno), | ||
| 2011 | le16_to_cpu(el->l_next_free_rec)); | ||
| 2012 | |||
| 2013 | ocfs2_journal_dirty(handle, bh); | ||
| 2014 | ocfs2_remove_from_cache(inode, bh); | ||
| 2015 | continue; | ||
| 2016 | } | ||
| 2017 | |||
| 2018 | el->l_next_free_rec = 0; | ||
| 2019 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
| 2020 | |||
| 2021 | ocfs2_journal_dirty(handle, bh); | ||
| 2022 | |||
| 2023 | ret = ocfs2_cache_extent_block_free(dealloc, eb); | ||
| 2024 | if (ret) | ||
| 2025 | mlog_errno(ret); | ||
| 2026 | |||
| 2027 | ocfs2_remove_from_cache(inode, bh); | ||
| 2028 | } | ||
| 2029 | } | ||
| 2030 | |||
| 2031 | static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, | ||
| 2032 | struct ocfs2_path *left_path, | ||
| 2033 | struct ocfs2_path *right_path, | ||
| 2034 | int subtree_index, | ||
| 2035 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 2036 | { | ||
| 2037 | int i; | ||
| 2038 | struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; | ||
| 2039 | struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; | ||
| 2040 | struct ocfs2_extent_list *el; | ||
| 2041 | struct ocfs2_extent_block *eb; | ||
| 2042 | |||
| 2043 | el = path_leaf_el(left_path); | ||
| 2044 | |||
| 2045 | eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; | ||
| 2046 | |||
| 2047 | for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) | ||
| 2048 | if (root_el->l_recs[i].e_blkno == eb->h_blkno) | ||
| 2049 | break; | ||
| 2050 | |||
| 2051 | BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec)); | ||
| 2052 | |||
| 2053 | memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); | ||
| 2054 | le16_add_cpu(&root_el->l_next_free_rec, -1); | ||
| 2055 | |||
| 2056 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
| 2057 | eb->h_next_leaf_blk = 0; | ||
| 2058 | |||
| 2059 | ocfs2_journal_dirty(handle, root_bh); | ||
| 2060 | ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
| 2061 | |||
| 2062 | ocfs2_unlink_path(inode, handle, dealloc, right_path, | ||
| 2063 | subtree_index + 1); | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | ||
| 2067 | struct ocfs2_path *left_path, | ||
| 2068 | struct ocfs2_path *right_path, | ||
| 2069 | int subtree_index, | ||
| 2070 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
| 2071 | int *deleted) | ||
| 2072 | { | ||
| 2073 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | ||
| 2074 | struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); | ||
| 2075 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 2076 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; | ||
| 2077 | struct ocfs2_extent_block *eb; | ||
| 2078 | |||
| 2079 | *deleted = 0; | ||
| 2080 | |||
| 2081 | right_leaf_el = path_leaf_el(right_path); | ||
| 2082 | left_leaf_el = path_leaf_el(left_path); | ||
| 2083 | root_bh = left_path->p_node[subtree_index].bh; | ||
| 2084 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
| 2085 | |||
| 2086 | if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0])) | ||
| 2087 | return 0; | ||
| 2088 | |||
| 2089 | eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data; | ||
| 2090 | if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) { | ||
| 2091 | /* | ||
| 2092 | * It's legal for us to proceed if the right leaf is | ||
| 2093 | * the rightmost one and it has an empty extent. There | ||
| 2094 | * are two cases to handle - whether the leaf will be | ||
| 2095 | * empty after removal or not. If the leaf isn't empty | ||
| 2096 | * then just remove the empty extent up front. The | ||
| 2097 | * next block will handle empty leaves by flagging | ||
| 2098 | * them for unlink. | ||
| 2099 | * | ||
| 2100 | * Non rightmost leaves will throw -EAGAIN and the | ||
| 2101 | * caller can manually move the subtree and retry. | ||
| 2102 | */ | ||
| 2103 | |||
| 2104 | if (eb->h_next_leaf_blk != 0ULL) | ||
| 2105 | return -EAGAIN; | ||
| 2106 | |||
| 2107 | if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { | ||
| 2108 | ret = ocfs2_journal_access(handle, inode, | ||
| 2109 | path_leaf_bh(right_path), | ||
| 2110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2111 | if (ret) { | ||
| 2112 | mlog_errno(ret); | ||
| 2113 | goto out; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | ocfs2_remove_empty_extent(right_leaf_el); | ||
| 2117 | } else | ||
| 2118 | right_has_empty = 1; | ||
| 2119 | } | ||
| 2120 | |||
| 2121 | if (eb->h_next_leaf_blk == 0ULL && | ||
| 2122 | le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) { | ||
| 2123 | /* | ||
| 2124 | * We have to update i_last_eb_blk during the meta | ||
| 2125 | * data delete. | ||
| 2126 | */ | ||
| 2127 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
| 2128 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2129 | if (ret) { | ||
| 2130 | mlog_errno(ret); | ||
| 2131 | goto out; | ||
| 2132 | } | ||
| 2133 | |||
| 2134 | del_right_subtree = 1; | ||
| 2135 | } | ||
| 2136 | |||
| 2137 | /* | ||
| 2138 | * Getting here with an empty extent in the right path implies | ||
| 2139 | * that it's the rightmost path and will be deleted. | ||
| 2140 | */ | ||
| 2141 | BUG_ON(right_has_empty && !del_right_subtree); | ||
| 2142 | |||
| 2143 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 2144 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2145 | if (ret) { | ||
| 2146 | mlog_errno(ret); | ||
| 2147 | goto out; | ||
| 2148 | } | ||
| 2149 | |||
| 2150 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { | ||
| 2151 | ret = ocfs2_journal_access(handle, inode, | ||
| 2152 | right_path->p_node[i].bh, | ||
| 2153 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2154 | if (ret) { | ||
| 2155 | mlog_errno(ret); | ||
| 2156 | goto out; | ||
| 2157 | } | ||
| 2158 | |||
| 2159 | ret = ocfs2_journal_access(handle, inode, | ||
| 2160 | left_path->p_node[i].bh, | ||
| 2161 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2162 | if (ret) { | ||
| 2163 | mlog_errno(ret); | ||
| 2164 | goto out; | ||
| 2165 | } | ||
| 2166 | } | ||
| 2167 | |||
| 2168 | if (!right_has_empty) { | ||
| 2169 | /* | ||
| 2170 | * Only do this if we're moving a real | ||
| 2171 | * record. Otherwise, the action is delayed until | ||
| 2172 | * after removal of the right path in which case we | ||
| 2173 | * can do a simple shift to remove the empty extent. | ||
| 2174 | */ | ||
| 2175 | ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]); | ||
| 2176 | memset(&right_leaf_el->l_recs[0], 0, | ||
| 2177 | sizeof(struct ocfs2_extent_rec)); | ||
| 2178 | } | ||
| 2179 | if (eb->h_next_leaf_blk == 0ULL) { | ||
| 2180 | /* | ||
| 2181 | * Move recs over to get rid of empty extent, decrease | ||
| 2182 | * next_free. This is allowed to remove the last | ||
| 2183 | * extent in our leaf (setting l_next_free_rec to | ||
| 2184 | * zero) - the delete code below won't care. | ||
| 2185 | */ | ||
| 2186 | ocfs2_remove_empty_extent(right_leaf_el); | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
| 2190 | if (ret) | ||
| 2191 | mlog_errno(ret); | ||
| 2192 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); | ||
| 2193 | if (ret) | ||
| 2194 | mlog_errno(ret); | ||
| 2195 | |||
| 2196 | if (del_right_subtree) { | ||
| 2197 | ocfs2_unlink_subtree(inode, handle, left_path, right_path, | ||
| 2198 | subtree_index, dealloc); | ||
| 2199 | ocfs2_update_edge_lengths(inode, handle, left_path); | ||
| 2200 | |||
| 2201 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
| 2202 | di->i_last_eb_blk = eb->h_blkno; | ||
| 2203 | |||
| 2204 | /* | ||
| 2205 | * Removal of the extent in the left leaf was skipped | ||
| 2206 | * above so we could delete the right path | ||
| 2207 | * 1st. | ||
| 2208 | */ | ||
| 2209 | if (right_has_empty) | ||
| 2210 | ocfs2_remove_empty_extent(left_leaf_el); | ||
| 2211 | |||
| 2212 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
| 2213 | if (ret) | ||
| 2214 | mlog_errno(ret); | ||
| 2215 | |||
| 2216 | *deleted = 1; | ||
| 2217 | } else | ||
| 2218 | ocfs2_complete_edge_insert(inode, handle, left_path, right_path, | ||
| 2219 | subtree_index); | ||
| 2220 | |||
| 2221 | out: | ||
| 2222 | return ret; | ||
| 2223 | } | ||
| 2224 | |||
| 2225 | /* | ||
| 2226 | * Given a full path, determine what cpos value would return us a path | ||
| 2227 | * containing the leaf immediately to the right of the current one. | ||
| 2228 | * | ||
| 2229 | * Will return zero if the path passed in is already the rightmost path. | ||
| 2230 | * | ||
| 2231 | * This looks similar, but is subtly different to | ||
| 2232 | * ocfs2_find_cpos_for_left_leaf(). | ||
| 2233 | */ | ||
| 2234 | static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, | ||
| 2235 | struct ocfs2_path *path, u32 *cpos) | ||
| 2236 | { | ||
| 2237 | int i, j, ret = 0; | ||
| 2238 | u64 blkno; | ||
| 2239 | struct ocfs2_extent_list *el; | ||
| 2240 | |||
| 2241 | *cpos = 0; | ||
| 2242 | |||
| 2243 | if (path->p_tree_depth == 0) | ||
| 2244 | return 0; | ||
| 2245 | |||
| 2246 | blkno = path_leaf_bh(path)->b_blocknr; | ||
| 2247 | |||
| 2248 | /* Start at the tree node just above the leaf and work our way up. */ | ||
| 2249 | i = path->p_tree_depth - 1; | ||
| 2250 | while (i >= 0) { | ||
| 2251 | int next_free; | ||
| 2252 | |||
| 2253 | el = path->p_node[i].el; | ||
| 2254 | |||
| 2255 | /* | ||
| 2256 | * Find the extent record just after the one in our | ||
| 2257 | * path. | ||
| 2258 | */ | ||
| 2259 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 2260 | for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { | ||
| 2261 | if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { | ||
| 2262 | if (j == (next_free - 1)) { | ||
| 2263 | if (i == 0) { | ||
| 2264 | /* | ||
| 2265 | * We've determined that the | ||
| 2266 | * path specified is already | ||
| 2267 | * the rightmost one - return a | ||
| 2268 | * cpos of zero. | ||
| 2269 | */ | ||
| 2270 | goto out; | ||
| 2271 | } | ||
| 2272 | /* | ||
| 2273 | * The rightmost record points to our | ||
| 2274 | * leaf - we need to travel up the | ||
| 2275 | * tree one level. | ||
| 2276 | */ | ||
| 2277 | goto next_node; | ||
| 2278 | } | ||
| 2279 | |||
| 2280 | *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos); | ||
| 2281 | goto out; | ||
| 2282 | } | ||
| 2283 | } | ||
| 2284 | |||
| 2285 | /* | ||
| 2286 | * If we got here, we never found a valid node where | ||
| 2287 | * the tree indicated one should be. | ||
| 2288 | */ | ||
| 2289 | ocfs2_error(sb, | ||
| 2290 | "Invalid extent tree at extent block %llu\n", | ||
| 2291 | (unsigned long long)blkno); | ||
| 2292 | ret = -EROFS; | ||
| 2293 | goto out; | ||
| 2294 | |||
| 2295 | next_node: | ||
| 2296 | blkno = path->p_node[i].bh->b_blocknr; | ||
| 2297 | i--; | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | out: | ||
| 2301 | return ret; | ||
| 2302 | } | ||
| 2303 | |||
| 2304 | static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, | ||
| 2305 | handle_t *handle, | ||
| 2306 | struct buffer_head *bh, | ||
| 2307 | struct ocfs2_extent_list *el) | ||
| 2308 | { | ||
| 2309 | int ret; | ||
| 2310 | |||
| 2311 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 2312 | return 0; | ||
| 2313 | |||
| 2314 | ret = ocfs2_journal_access(handle, inode, bh, | ||
| 2315 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2316 | if (ret) { | ||
| 2317 | mlog_errno(ret); | ||
| 2318 | goto out; | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | ocfs2_remove_empty_extent(el); | ||
| 2322 | |||
| 2323 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 2324 | if (ret) | ||
| 2325 | mlog_errno(ret); | ||
| 2326 | |||
| 2327 | out: | ||
| 2328 | return ret; | ||
| 2329 | } | ||
| 2330 | |||
| 2331 | static int __ocfs2_rotate_tree_left(struct inode *inode, | ||
| 2332 | handle_t *handle, int orig_credits, | ||
| 2333 | struct ocfs2_path *path, | ||
| 2334 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
| 2335 | struct ocfs2_path **empty_extent_path) | ||
| 2336 | { | ||
| 2337 | int ret, subtree_root, deleted; | ||
| 2338 | u32 right_cpos; | ||
| 2339 | struct ocfs2_path *left_path = NULL; | ||
| 2340 | struct ocfs2_path *right_path = NULL; | ||
| 2341 | |||
| 2342 | BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); | ||
| 2343 | |||
| 2344 | *empty_extent_path = NULL; | ||
| 2345 | |||
| 2346 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, | ||
| 2347 | &right_cpos); | ||
| 2348 | if (ret) { | ||
| 2349 | mlog_errno(ret); | ||
| 2350 | goto out; | ||
| 2351 | } | ||
| 2352 | |||
| 2353 | left_path = ocfs2_new_path(path_root_bh(path), | ||
| 2354 | path_root_el(path)); | ||
| 2355 | if (!left_path) { | ||
| 2356 | ret = -ENOMEM; | ||
| 2357 | mlog_errno(ret); | ||
| 2358 | goto out; | ||
| 2359 | } | ||
| 2360 | |||
| 2361 | ocfs2_cp_path(left_path, path); | ||
| 2362 | |||
| 2363 | right_path = ocfs2_new_path(path_root_bh(path), | ||
| 2364 | path_root_el(path)); | ||
| 2365 | if (!right_path) { | ||
| 2366 | ret = -ENOMEM; | ||
| 2367 | mlog_errno(ret); | ||
| 2368 | goto out; | ||
| 2369 | } | ||
| 2370 | |||
| 2371 | while (right_cpos) { | ||
| 2372 | ret = ocfs2_find_path(inode, right_path, right_cpos); | ||
| 2373 | if (ret) { | ||
| 2374 | mlog_errno(ret); | ||
| 2375 | goto out; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | subtree_root = ocfs2_find_subtree_root(inode, left_path, | ||
| 2379 | right_path); | ||
| 2380 | |||
| 2381 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", | ||
| 2382 | subtree_root, | ||
| 2383 | (unsigned long long) | ||
| 2384 | right_path->p_node[subtree_root].bh->b_blocknr, | ||
| 2385 | right_path->p_tree_depth); | ||
| 2386 | |||
| 2387 | ret = ocfs2_extend_rotate_transaction(handle, subtree_root, | ||
| 2388 | orig_credits, left_path); | ||
| 2389 | if (ret) { | ||
| 2390 | mlog_errno(ret); | ||
| 2391 | goto out; | ||
| 2392 | } | ||
| 2393 | |||
| 2394 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | ||
| 2395 | right_path, subtree_root, | ||
| 2396 | dealloc, &deleted); | ||
| 2397 | if (ret == -EAGAIN) { | ||
| 2398 | /* | ||
| 2399 | * The rotation has to temporarily stop due to | ||
| 2400 | * the right subtree having an empty | ||
| 2401 | * extent. Pass it back to the caller for a | ||
| 2402 | * fixup. | ||
| 2403 | */ | ||
| 2404 | *empty_extent_path = right_path; | ||
| 2405 | right_path = NULL; | ||
| 2406 | goto out; | ||
| 2407 | } | ||
| 2408 | if (ret) { | ||
| 2409 | mlog_errno(ret); | ||
| 2410 | goto out; | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | /* | ||
| 2414 | * The subtree rotate might have removed records on | ||
| 2415 | * the rightmost edge. If so, then rotation is | ||
| 2416 | * complete. | ||
| 2417 | */ | ||
| 2418 | if (deleted) | ||
| 2419 | break; | ||
| 2420 | |||
| 2421 | ocfs2_mv_path(left_path, right_path); | ||
| 2422 | |||
| 2423 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | ||
| 2424 | &right_cpos); | ||
| 2425 | if (ret) { | ||
| 2426 | mlog_errno(ret); | ||
| 2427 | goto out; | ||
| 2428 | } | ||
| 2429 | } | ||
| 2430 | |||
| 2431 | out: | ||
| 2432 | ocfs2_free_path(right_path); | ||
| 2433 | ocfs2_free_path(left_path); | ||
| 2434 | |||
| 2435 | return ret; | ||
| 2436 | } | ||
| 2437 | |||
| 2438 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | ||
| 2439 | struct ocfs2_path *path, | ||
| 2440 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 2441 | { | ||
| 2442 | int ret, subtree_index; | ||
| 2443 | u32 cpos; | ||
| 2444 | struct ocfs2_path *left_path = NULL; | ||
| 2445 | struct ocfs2_dinode *di; | ||
| 2446 | struct ocfs2_extent_block *eb; | ||
| 2447 | struct ocfs2_extent_list *el; | ||
| 2448 | |||
| 2449 | /* | ||
| 2450 | * XXX: This code assumes that the root is an inode, which is | ||
| 2451 | * true for now but may change as tree code gets generic. | ||
| 2452 | */ | ||
| 2453 | di = (struct ocfs2_dinode *)path_root_bh(path)->b_data; | ||
| 2454 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
| 2455 | ret = -EIO; | ||
| 2456 | ocfs2_error(inode->i_sb, | ||
| 2457 | "Inode %llu has invalid path root", | ||
| 2458 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 2459 | goto out; | ||
| 2460 | } | ||
| 2461 | |||
| 2462 | /* | ||
| 2463 | * There's two ways we handle this depending on | ||
| 2464 | * whether path is the only existing one. | ||
| 2465 | */ | ||
| 2466 | ret = ocfs2_extend_rotate_transaction(handle, 0, | ||
| 2467 | handle->h_buffer_credits, | ||
| 2468 | path); | ||
| 2469 | if (ret) { | ||
| 2470 | mlog_errno(ret); | ||
| 2471 | goto out; | ||
| 2472 | } | ||
| 2473 | |||
| 2474 | ret = ocfs2_journal_access_path(inode, handle, path); | ||
| 2475 | if (ret) { | ||
| 2476 | mlog_errno(ret); | ||
| 2477 | goto out; | ||
| 2478 | } | ||
| 2479 | |||
| 2480 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); | ||
| 2481 | if (ret) { | ||
| 2482 | mlog_errno(ret); | ||
| 2483 | goto out; | ||
| 2484 | } | ||
| 2485 | |||
| 2486 | if (cpos) { | ||
| 2487 | /* | ||
| 2488 | * We have a path to the left of this one - it needs | ||
| 2489 | * an update too. | ||
| 2490 | */ | ||
| 2491 | left_path = ocfs2_new_path(path_root_bh(path), | ||
| 2492 | path_root_el(path)); | ||
| 2493 | if (!left_path) { | ||
| 2494 | ret = -ENOMEM; | ||
| 2495 | mlog_errno(ret); | ||
| 2496 | goto out; | ||
| 2497 | } | ||
| 2498 | |||
| 2499 | ret = ocfs2_find_path(inode, left_path, cpos); | ||
| 2500 | if (ret) { | ||
| 2501 | mlog_errno(ret); | ||
| 2502 | goto out; | ||
| 2503 | } | ||
| 2504 | |||
| 2505 | ret = ocfs2_journal_access_path(inode, handle, left_path); | ||
| 2506 | if (ret) { | ||
| 2507 | mlog_errno(ret); | ||
| 2508 | goto out; | ||
| 2509 | } | ||
| 2510 | |||
| 2511 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | ||
| 2512 | |||
| 2513 | ocfs2_unlink_subtree(inode, handle, left_path, path, | ||
| 2514 | subtree_index, dealloc); | ||
| 2515 | ocfs2_update_edge_lengths(inode, handle, left_path); | ||
| 2516 | |||
| 2517 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | ||
| 2518 | di->i_last_eb_blk = eb->h_blkno; | ||
| 2519 | } else { | ||
| 2520 | /* | ||
| 2521 | * 'path' is also the leftmost path which | ||
| 2522 | * means it must be the only one. This gets | ||
| 2523 | * handled differently because we want to | ||
| 2524 | * revert the inode back to having extents | ||
| 2525 | * in-line. | ||
| 2526 | */ | ||
| 2527 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | ||
| 2528 | |||
| 2529 | el = &di->id2.i_list; | ||
| 2530 | el->l_tree_depth = 0; | ||
| 2531 | el->l_next_free_rec = 0; | ||
| 2532 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
| 2533 | |||
| 2534 | di->i_last_eb_blk = 0; | ||
| 2535 | } | ||
| 2536 | |||
| 2537 | ocfs2_journal_dirty(handle, path_root_bh(path)); | ||
| 2538 | |||
| 2539 | out: | ||
| 2540 | ocfs2_free_path(left_path); | ||
| 2541 | return ret; | ||
| 2542 | } | ||
| 2543 | |||
| 2544 | /* | ||
| 2545 | * Left rotation of btree records. | ||
| 2546 | * | ||
| 2547 | * In many ways, this is (unsurprisingly) the opposite of right | ||
| 2548 | * rotation. We start at some non-rightmost path containing an empty | ||
| 2549 | * extent in the leaf block. The code works its way to the rightmost | ||
| 2550 | * path by rotating records to the left in every subtree. | ||
| 2551 | * | ||
| 2552 | * This is used by any code which reduces the number of extent records | ||
| 2553 | * in a leaf. After removal, an empty record should be placed in the | ||
| 2554 | * leftmost list position. | ||
| 2555 | * | ||
| 2556 | * This won't handle a length update of the rightmost path records if | ||
| 2557 | * the rightmost tree leaf record is removed so the caller is | ||
| 2558 | * responsible for detecting and correcting that. | ||
| 2559 | */ | ||
| 2560 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | ||
| 2561 | struct ocfs2_path *path, | ||
| 2562 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 2563 | { | ||
| 2564 | int ret, orig_credits = handle->h_buffer_credits; | ||
| 2565 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | ||
| 2566 | struct ocfs2_extent_block *eb; | ||
| 2567 | struct ocfs2_extent_list *el; | ||
| 2568 | |||
| 2569 | el = path_leaf_el(path); | ||
| 2570 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 2571 | return 0; | ||
| 2572 | |||
| 2573 | if (path->p_tree_depth == 0) { | ||
| 2574 | rightmost_no_delete: | ||
| 2575 | /* | ||
| 2576 | * In-inode extents. This is trivially handled, so do | ||
| 2577 | * it up front. | ||
| 2578 | */ | ||
| 2579 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | ||
| 2580 | path_leaf_bh(path), | ||
| 2581 | path_leaf_el(path)); | ||
| 2582 | if (ret) | ||
| 2583 | mlog_errno(ret); | ||
| 2584 | goto out; | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | /* | ||
| 2588 | * Handle rightmost branch now. There's several cases: | ||
| 2589 | * 1) simple rotation leaving records in there. That's trivial. | ||
| 2590 | * 2) rotation requiring a branch delete - there's no more | ||
| 2591 | * records left. Two cases of this: | ||
| 2592 | * a) There are branches to the left. | ||
| 2593 | * b) This is also the leftmost (the only) branch. | ||
| 2594 | * | ||
| 2595 | * 1) is handled via ocfs2_rotate_rightmost_leaf_left() | ||
| 2596 | * 2a) we need the left branch so that we can update it with the unlink | ||
| 2597 | * 2b) we need to bring the inode back to inline extents. | ||
| 2598 | */ | ||
| 2599 | |||
| 2600 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
| 2601 | el = &eb->h_list; | ||
| 2602 | if (eb->h_next_leaf_blk == 0) { | ||
| 2603 | /* | ||
| 2604 | * This gets a bit tricky if we're going to delete the | ||
| 2605 | * rightmost path. Get the other cases out of the way | ||
| 2606 | * 1st. | ||
| 2607 | */ | ||
| 2608 | if (le16_to_cpu(el->l_next_free_rec) > 1) | ||
| 2609 | goto rightmost_no_delete; | ||
| 2610 | |||
| 2611 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | ||
| 2612 | ret = -EIO; | ||
| 2613 | ocfs2_error(inode->i_sb, | ||
| 2614 | "Inode %llu has empty extent block at %llu", | ||
| 2615 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 2616 | (unsigned long long)le64_to_cpu(eb->h_blkno)); | ||
| 2617 | goto out; | ||
| 2618 | } | ||
| 2619 | |||
| 2620 | /* | ||
| 2621 | * XXX: The caller can not trust "path" any more after | ||
| 2622 | * this as it will have been deleted. What do we do? | ||
| 2623 | * | ||
| 2624 | * In theory the rotate-for-merge code will never get | ||
| 2625 | * here because it'll always ask for a rotate in a | ||
| 2626 | * nonempty list. | ||
| 2627 | */ | ||
| 2628 | |||
| 2629 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | ||
| 2630 | dealloc); | ||
| 2631 | if (ret) | ||
| 2632 | mlog_errno(ret); | ||
| 2633 | goto out; | ||
| 2634 | } | ||
| 2635 | |||
| 2636 | /* | ||
| 2637 | * Now we can loop, remembering the path we get from -EAGAIN | ||
| 2638 | * and restarting from there. | ||
| 2639 | */ | ||
| 2640 | try_rotate: | ||
| 2641 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | ||
| 2642 | dealloc, &restart_path); | ||
| 2643 | if (ret && ret != -EAGAIN) { | ||
| 2644 | mlog_errno(ret); | ||
| 2645 | goto out; | ||
| 2646 | } | ||
| 2647 | |||
| 2648 | while (ret == -EAGAIN) { | ||
| 2649 | tmp_path = restart_path; | ||
| 2650 | restart_path = NULL; | ||
| 2651 | |||
| 2652 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | ||
| 2653 | tmp_path, dealloc, | ||
| 2654 | &restart_path); | ||
| 2655 | if (ret && ret != -EAGAIN) { | ||
| 2656 | mlog_errno(ret); | ||
| 2657 | goto out; | ||
| 2658 | } | ||
| 2659 | |||
| 2660 | ocfs2_free_path(tmp_path); | ||
| 2661 | tmp_path = NULL; | ||
| 2662 | |||
| 2663 | if (ret == 0) | ||
| 2664 | goto try_rotate; | ||
| 2665 | } | ||
| 2666 | |||
| 2667 | out: | ||
| 2668 | ocfs2_free_path(tmp_path); | ||
| 2669 | ocfs2_free_path(restart_path); | ||
| 2670 | return ret; | ||
| 2671 | } | ||
| 2672 | |||
| 2673 | static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | ||
| 2674 | int index) | ||
| 2675 | { | ||
| 2676 | struct ocfs2_extent_rec *rec = &el->l_recs[index]; | ||
| 2677 | unsigned int size; | ||
| 2678 | |||
| 2679 | if (rec->e_leaf_clusters == 0) { | ||
| 2680 | /* | ||
| 2681 | * We consumed all of the merged-from record. An empty | ||
| 2682 | * extent cannot exist anywhere but the 1st array | ||
| 2683 | * position, so move things over if the merged-from | ||
| 2684 | * record doesn't occupy that position. | ||
| 2685 | * | ||
| 2686 | * This creates a new empty extent so the caller | ||
| 2687 | * should be smart enough to have removed any existing | ||
| 2688 | * ones. | ||
| 2689 | */ | ||
| 2690 | if (index > 0) { | ||
| 2691 | BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); | ||
| 2692 | size = index * sizeof(struct ocfs2_extent_rec); | ||
| 2693 | memmove(&el->l_recs[1], &el->l_recs[0], size); | ||
| 2694 | } | ||
| 2695 | |||
| 2696 | /* | ||
| 2697 | * Always memset - the caller doesn't check whether it | ||
| 2698 | * created an empty extent, so there could be junk in | ||
| 2699 | * the other fields. | ||
| 2700 | */ | ||
| 2701 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | ||
| 2702 | } | ||
| 2703 | } | ||
| 2704 | |||
| 2705 | /* | ||
| 2706 | * Remove split_rec clusters from the record at index and merge them | ||
| 2707 | * onto the beginning of the record at index + 1. | ||
| 2708 | */ | ||
| 2709 | static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | ||
| 2710 | handle_t *handle, | ||
| 2711 | struct ocfs2_extent_rec *split_rec, | ||
| 2712 | struct ocfs2_extent_list *el, int index) | ||
| 2713 | { | ||
| 2714 | int ret; | ||
| 2715 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | ||
| 2716 | struct ocfs2_extent_rec *left_rec; | ||
| 2717 | struct ocfs2_extent_rec *right_rec; | ||
| 2718 | |||
| 2719 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); | ||
| 2720 | |||
| 2721 | left_rec = &el->l_recs[index]; | ||
| 2722 | right_rec = &el->l_recs[index + 1]; | ||
| 2723 | |||
| 2724 | ret = ocfs2_journal_access(handle, inode, bh, | ||
| 2725 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2726 | if (ret) { | ||
| 2727 | mlog_errno(ret); | ||
| 2728 | goto out; | ||
| 2729 | } | ||
| 2730 | |||
| 2731 | le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters); | ||
| 2732 | |||
| 2733 | le32_add_cpu(&right_rec->e_cpos, -split_clusters); | ||
| 2734 | le64_add_cpu(&right_rec->e_blkno, | ||
| 2735 | -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | ||
| 2736 | le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); | ||
| 2737 | |||
| 2738 | ocfs2_cleanup_merge(el, index); | ||
| 2739 | |||
| 2740 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 2741 | if (ret) | ||
| 2742 | mlog_errno(ret); | ||
| 2743 | |||
| 2744 | out: | ||
| 2745 | return ret; | ||
| 2746 | } | ||
| 2747 | |||
| 2748 | /* | ||
| 2749 | * Remove split_rec clusters from the record at index and merge them | ||
| 2750 | * onto the tail of the record at index - 1. | ||
| 2751 | */ | ||
| 2752 | static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | ||
| 2753 | handle_t *handle, | ||
| 2754 | struct ocfs2_extent_rec *split_rec, | ||
| 2755 | struct ocfs2_extent_list *el, int index) | ||
| 2756 | { | ||
| 2757 | int ret, has_empty_extent = 0; | ||
| 2758 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | ||
| 2759 | struct ocfs2_extent_rec *left_rec; | ||
| 2760 | struct ocfs2_extent_rec *right_rec; | ||
| 2761 | |||
| 2762 | BUG_ON(index <= 0); | ||
| 2763 | |||
| 2764 | left_rec = &el->l_recs[index - 1]; | ||
| 2765 | right_rec = &el->l_recs[index]; | ||
| 2766 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 2767 | has_empty_extent = 1; | ||
| 2768 | |||
| 2769 | ret = ocfs2_journal_access(handle, inode, bh, | ||
| 2770 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2771 | if (ret) { | ||
| 2772 | mlog_errno(ret); | ||
| 2773 | goto out; | ||
| 2774 | } | ||
| 2775 | |||
| 2776 | if (has_empty_extent && index == 1) { | ||
| 2777 | /* | ||
| 2778 | * The easy case - we can just plop the record right in. | ||
| 2779 | */ | ||
| 2780 | *left_rec = *split_rec; | ||
| 2781 | |||
| 2782 | has_empty_extent = 0; | ||
| 2783 | } else { | ||
| 2784 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | ||
| 2788 | le64_add_cpu(&right_rec->e_blkno, | ||
| 2789 | ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | ||
| 2790 | le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); | ||
| 2791 | |||
| 2792 | ocfs2_cleanup_merge(el, index); | ||
| 2793 | |||
| 2794 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 2795 | if (ret) | ||
| 2796 | mlog_errno(ret); | ||
| 2797 | |||
| 2798 | out: | ||
| 2799 | return ret; | ||
| 2800 | } | ||
| 2801 | |||
| 2802 | static int ocfs2_try_to_merge_extent(struct inode *inode, | ||
| 2803 | handle_t *handle, | ||
| 2804 | struct ocfs2_path *left_path, | ||
| 2805 | int split_index, | ||
| 2806 | struct ocfs2_extent_rec *split_rec, | ||
| 2807 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
| 2808 | struct ocfs2_merge_ctxt *ctxt) | ||
| 2809 | |||
| 2810 | { | ||
| 2811 | int ret = 0, delete_tail_recs = 0; | ||
| 2812 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | ||
| 2813 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | ||
| 2814 | |||
| 2815 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); | ||
| 2816 | |||
| 2817 | if (ctxt->c_split_covers_rec) { | ||
| 2818 | delete_tail_recs++; | ||
| 2819 | |||
| 2820 | if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || | ||
| 2821 | ctxt->c_has_empty_extent) | ||
| 2822 | delete_tail_recs++; | ||
| 2823 | |||
| 2824 | if (ctxt->c_has_empty_extent) { | ||
| 2825 | /* | ||
| 2826 | * The merge code will need to create an empty | ||
| 2827 | * extent to take the place of the newly | ||
| 2828 | * emptied slot. Remove any pre-existing empty | ||
| 2829 | * extents - having more than one in a leaf is | ||
| 2830 | * illegal. | ||
| 2831 | */ | ||
| 2832 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
| 2833 | dealloc); | ||
| 2834 | if (ret) { | ||
| 2835 | mlog_errno(ret); | ||
| 2836 | goto out; | ||
| 2837 | } | ||
| 2838 | split_index--; | ||
| 2839 | rec = &el->l_recs[split_index]; | ||
| 2840 | } | ||
| 2841 | } | ||
| 2842 | |||
| 2843 | if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { | ||
| 2844 | /* | ||
| 2845 | * Left-right contig implies this. | ||
| 2846 | */ | ||
| 2847 | BUG_ON(!ctxt->c_split_covers_rec); | ||
| 2848 | BUG_ON(split_index == 0); | ||
| 2849 | |||
| 2850 | /* | ||
| 2851 | * Since the leftright insert always covers the entire | ||
| 2852 | * extent, this call will delete the insert record | ||
| 2853 | * entirely, resulting in an empty extent record added to | ||
| 2854 | * the extent block. | ||
| 2855 | * | ||
| 2856 | * Since the adding of an empty extent shifts | ||
| 2857 | * everything back to the right, there's no need to | ||
| 2858 | * update split_index here. | ||
| 2859 | */ | ||
| 2860 | ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), | ||
| 2861 | handle, split_rec, el, split_index); | ||
| 2862 | if (ret) { | ||
| 2863 | mlog_errno(ret); | ||
| 2864 | goto out; | ||
| 2865 | } | ||
| 2866 | |||
| 2867 | /* | ||
| 2868 | * We can only get this from logic error above. | ||
| 2869 | */ | ||
| 2870 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | ||
| 2871 | |||
| 2872 | /* | ||
| 2873 | * The left merge left us with an empty extent, remove | ||
| 2874 | * it. | ||
| 2875 | */ | ||
| 2876 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); | ||
| 2877 | if (ret) { | ||
| 2878 | mlog_errno(ret); | ||
| 2879 | goto out; | ||
| 2880 | } | ||
| 2881 | split_index--; | ||
| 2882 | rec = &el->l_recs[split_index]; | ||
| 2883 | |||
| 2884 | /* | ||
| 2885 | * Note that we don't pass split_rec here on purpose - | ||
| 2886 | * we've merged it into the left side. | ||
| 2887 | */ | ||
| 2888 | ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), | ||
| 2889 | handle, rec, el, split_index); | ||
| 2890 | if (ret) { | ||
| 2891 | mlog_errno(ret); | ||
| 2892 | goto out; | ||
| 2893 | } | ||
| 2894 | |||
| 2895 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | ||
| 2896 | |||
| 2897 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
| 2898 | dealloc); | ||
| 2899 | /* | ||
| 2900 | * Error from this last rotate is not critical, so | ||
| 2901 | * print but don't bubble it up. | ||
| 2902 | */ | ||
| 2903 | if (ret) | ||
| 2904 | mlog_errno(ret); | ||
| 2905 | ret = 0; | ||
| 2906 | } else { | ||
| 2907 | /* | ||
| 2908 | * Merge a record to the left or right. | ||
| 2909 | * | ||
| 2910 | * 'contig_type' is relative to the existing record, | ||
| 2911 | * so for example, if we're "right contig", it's to | ||
| 2912 | * the record on the left (hence the left merge). | ||
| 2913 | */ | ||
| 2914 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | ||
| 2915 | ret = ocfs2_merge_rec_left(inode, | ||
| 2916 | path_leaf_bh(left_path), | ||
| 2917 | handle, split_rec, el, | ||
| 2918 | split_index); | ||
| 2919 | if (ret) { | ||
| 2920 | mlog_errno(ret); | ||
| 2921 | goto out; | ||
| 2922 | } | ||
| 2923 | } else { | ||
| 2924 | ret = ocfs2_merge_rec_right(inode, | ||
| 2925 | path_leaf_bh(left_path), | ||
| 2926 | handle, split_rec, el, | ||
| 2927 | split_index); | ||
| 2928 | if (ret) { | ||
| 2929 | mlog_errno(ret); | ||
| 2930 | goto out; | ||
| 2931 | } | ||
| 2932 | } | ||
| 2933 | |||
| 2934 | if (ctxt->c_split_covers_rec) { | ||
| 2935 | /* | ||
| 2936 | * The merge may have left an empty extent in | ||
| 2937 | * our leaf. Try to rotate it away. | ||
| 2938 | */ | ||
| 2939 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
| 2940 | dealloc); | ||
| 2941 | if (ret) | ||
| 2942 | mlog_errno(ret); | ||
| 2943 | ret = 0; | ||
| 2944 | } | ||
| 2945 | } | ||
| 2946 | |||
| 2947 | out: | ||
| 2948 | return ret; | ||
| 2949 | } | ||
| 2950 | |||
| 2951 | static void ocfs2_subtract_from_rec(struct super_block *sb, | ||
| 2952 | enum ocfs2_split_type split, | ||
| 2953 | struct ocfs2_extent_rec *rec, | ||
| 2954 | struct ocfs2_extent_rec *split_rec) | ||
| 2955 | { | ||
| 2956 | u64 len_blocks; | ||
| 2957 | |||
| 2958 | len_blocks = ocfs2_clusters_to_blocks(sb, | ||
| 2959 | le16_to_cpu(split_rec->e_leaf_clusters)); | ||
| 2960 | |||
| 2961 | if (split == SPLIT_LEFT) { | ||
| 2962 | /* | ||
| 2963 | * Region is on the left edge of the existing | ||
| 2964 | * record. | ||
| 2965 | */ | ||
| 2966 | le32_add_cpu(&rec->e_cpos, | ||
| 2967 | le16_to_cpu(split_rec->e_leaf_clusters)); | ||
| 2968 | le64_add_cpu(&rec->e_blkno, len_blocks); | ||
| 2969 | le16_add_cpu(&rec->e_leaf_clusters, | ||
| 2970 | -le16_to_cpu(split_rec->e_leaf_clusters)); | ||
| 2971 | } else { | ||
| 2972 | /* | ||
| 2973 | * Region is on the right edge of the existing | ||
| 2974 | * record. | ||
| 2975 | */ | ||
| 2976 | le16_add_cpu(&rec->e_leaf_clusters, | ||
| 2977 | -le16_to_cpu(split_rec->e_leaf_clusters)); | ||
| 2978 | } | ||
| 2979 | } | ||
| 2980 | |||
| 1725 | /* | 2981 | /* |
| 1726 | * Do the final bits of extent record insertion at the target leaf | 2982 | * Do the final bits of extent record insertion at the target leaf |
| 1727 | * list. If this leaf is part of an allocation tree, it is assumed | 2983 | * list. If this leaf is part of an allocation tree, it is assumed |
| @@ -1738,6 +2994,15 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
| 1738 | 2994 | ||
| 1739 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); | 2995 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); |
| 1740 | 2996 | ||
| 2997 | if (insert->ins_split != SPLIT_NONE) { | ||
| 2998 | i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); | ||
| 2999 | BUG_ON(i == -1); | ||
| 3000 | rec = &el->l_recs[i]; | ||
| 3001 | ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, | ||
| 3002 | insert_rec); | ||
| 3003 | goto rotate; | ||
| 3004 | } | ||
| 3005 | |||
| 1741 | /* | 3006 | /* |
| 1742 | * Contiguous insert - either left or right. | 3007 | * Contiguous insert - either left or right. |
| 1743 | */ | 3008 | */ |
| @@ -1792,6 +3057,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
| 1792 | return; | 3057 | return; |
| 1793 | } | 3058 | } |
| 1794 | 3059 | ||
| 3060 | rotate: | ||
| 1795 | /* | 3061 | /* |
| 1796 | * Ok, we have to rotate. | 3062 | * Ok, we have to rotate. |
| 1797 | * | 3063 | * |
| @@ -1815,13 +3081,53 @@ static inline void ocfs2_update_dinode_clusters(struct inode *inode, | |||
| 1815 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 3081 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
| 1816 | } | 3082 | } |
| 1817 | 3083 | ||
| 3084 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | ||
| 3085 | handle_t *handle, | ||
| 3086 | struct ocfs2_path *path, | ||
| 3087 | struct ocfs2_extent_rec *insert_rec) | ||
| 3088 | { | ||
| 3089 | int ret, i, next_free; | ||
| 3090 | struct buffer_head *bh; | ||
| 3091 | struct ocfs2_extent_list *el; | ||
| 3092 | struct ocfs2_extent_rec *rec; | ||
| 3093 | |||
| 3094 | /* | ||
| 3095 | * Update everything except the leaf block. | ||
| 3096 | */ | ||
| 3097 | for (i = 0; i < path->p_tree_depth; i++) { | ||
| 3098 | bh = path->p_node[i].bh; | ||
| 3099 | el = path->p_node[i].el; | ||
| 3100 | |||
| 3101 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 3102 | if (next_free == 0) { | ||
| 3103 | ocfs2_error(inode->i_sb, | ||
| 3104 | "Dinode %llu has a bad extent list", | ||
| 3105 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 3106 | ret = -EIO; | ||
| 3107 | return; | ||
| 3108 | } | ||
| 3109 | |||
| 3110 | rec = &el->l_recs[next_free - 1]; | ||
| 3111 | |||
| 3112 | rec->e_int_clusters = insert_rec->e_cpos; | ||
| 3113 | le32_add_cpu(&rec->e_int_clusters, | ||
| 3114 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
| 3115 | le32_add_cpu(&rec->e_int_clusters, | ||
| 3116 | -le32_to_cpu(rec->e_cpos)); | ||
| 3117 | |||
| 3118 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 3119 | if (ret) | ||
| 3120 | mlog_errno(ret); | ||
| 3121 | |||
| 3122 | } | ||
| 3123 | } | ||
| 3124 | |||
| 1818 | static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | 3125 | static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, |
| 1819 | struct ocfs2_extent_rec *insert_rec, | 3126 | struct ocfs2_extent_rec *insert_rec, |
| 1820 | struct ocfs2_path *right_path, | 3127 | struct ocfs2_path *right_path, |
| 1821 | struct ocfs2_path **ret_left_path) | 3128 | struct ocfs2_path **ret_left_path) |
| 1822 | { | 3129 | { |
| 1823 | int ret, i, next_free; | 3130 | int ret, next_free; |
| 1824 | struct buffer_head *bh; | ||
| 1825 | struct ocfs2_extent_list *el; | 3131 | struct ocfs2_extent_list *el; |
| 1826 | struct ocfs2_path *left_path = NULL; | 3132 | struct ocfs2_path *left_path = NULL; |
| 1827 | 3133 | ||
| @@ -1887,40 +3193,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | |||
| 1887 | goto out; | 3193 | goto out; |
| 1888 | } | 3194 | } |
| 1889 | 3195 | ||
| 1890 | el = path_root_el(right_path); | 3196 | ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); |
| 1891 | bh = path_root_bh(right_path); | ||
| 1892 | i = 0; | ||
| 1893 | while (1) { | ||
| 1894 | struct ocfs2_extent_rec *rec; | ||
| 1895 | |||
| 1896 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 1897 | if (next_free == 0) { | ||
| 1898 | ocfs2_error(inode->i_sb, | ||
| 1899 | "Dinode %llu has a bad extent list", | ||
| 1900 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 1901 | ret = -EIO; | ||
| 1902 | goto out; | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | rec = &el->l_recs[next_free - 1]; | ||
| 1906 | |||
| 1907 | rec->e_int_clusters = insert_rec->e_cpos; | ||
| 1908 | le32_add_cpu(&rec->e_int_clusters, | ||
| 1909 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
| 1910 | le32_add_cpu(&rec->e_int_clusters, | ||
| 1911 | -le32_to_cpu(rec->e_cpos)); | ||
| 1912 | |||
| 1913 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 1914 | if (ret) | ||
| 1915 | mlog_errno(ret); | ||
| 1916 | |||
| 1917 | /* Don't touch the leaf node */ | ||
| 1918 | if (++i >= right_path->p_tree_depth) | ||
| 1919 | break; | ||
| 1920 | |||
| 1921 | bh = right_path->p_node[i].bh; | ||
| 1922 | el = right_path->p_node[i].el; | ||
| 1923 | } | ||
| 1924 | 3197 | ||
| 1925 | *ret_left_path = left_path; | 3198 | *ret_left_path = left_path; |
| 1926 | ret = 0; | 3199 | ret = 0; |
| @@ -1931,6 +3204,83 @@ out: | |||
| 1931 | return ret; | 3204 | return ret; |
| 1932 | } | 3205 | } |
| 1933 | 3206 | ||
| 3207 | static void ocfs2_split_record(struct inode *inode, | ||
| 3208 | struct ocfs2_path *left_path, | ||
| 3209 | struct ocfs2_path *right_path, | ||
| 3210 | struct ocfs2_extent_rec *split_rec, | ||
| 3211 | enum ocfs2_split_type split) | ||
| 3212 | { | ||
| 3213 | int index; | ||
| 3214 | u32 cpos = le32_to_cpu(split_rec->e_cpos); | ||
| 3215 | struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; | ||
| 3216 | struct ocfs2_extent_rec *rec, *tmprec; | ||
| 3217 | |||
| 3218 | right_el = path_leaf_el(right_path);; | ||
| 3219 | if (left_path) | ||
| 3220 | left_el = path_leaf_el(left_path); | ||
| 3221 | |||
| 3222 | el = right_el; | ||
| 3223 | insert_el = right_el; | ||
| 3224 | index = ocfs2_search_extent_list(el, cpos); | ||
| 3225 | if (index != -1) { | ||
| 3226 | if (index == 0 && left_path) { | ||
| 3227 | BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); | ||
| 3228 | |||
| 3229 | /* | ||
| 3230 | * This typically means that the record | ||
| 3231 | * started in the left path but moved to the | ||
| 3232 | * right as a result of rotation. We either | ||
| 3233 | * move the existing record to the left, or we | ||
| 3234 | * do the later insert there. | ||
| 3235 | * | ||
| 3236 | * In this case, the left path should always | ||
| 3237 | * exist as the rotate code will have passed | ||
| 3238 | * it back for a post-insert update. | ||
| 3239 | */ | ||
| 3240 | |||
| 3241 | if (split == SPLIT_LEFT) { | ||
| 3242 | /* | ||
| 3243 | * It's a left split. Since we know | ||
| 3244 | * that the rotate code gave us an | ||
| 3245 | * empty extent in the left path, we | ||
| 3246 | * can just do the insert there. | ||
| 3247 | */ | ||
| 3248 | insert_el = left_el; | ||
| 3249 | } else { | ||
| 3250 | /* | ||
| 3251 | * Right split - we have to move the | ||
| 3252 | * existing record over to the left | ||
| 3253 | * leaf. The insert will be into the | ||
| 3254 | * newly created empty extent in the | ||
| 3255 | * right leaf. | ||
| 3256 | */ | ||
| 3257 | tmprec = &right_el->l_recs[index]; | ||
| 3258 | ocfs2_rotate_leaf(left_el, tmprec); | ||
| 3259 | el = left_el; | ||
| 3260 | |||
| 3261 | memset(tmprec, 0, sizeof(*tmprec)); | ||
| 3262 | index = ocfs2_search_extent_list(left_el, cpos); | ||
| 3263 | BUG_ON(index == -1); | ||
| 3264 | } | ||
| 3265 | } | ||
| 3266 | } else { | ||
| 3267 | BUG_ON(!left_path); | ||
| 3268 | BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0])); | ||
| 3269 | /* | ||
| 3270 | * Left path is easy - we can just allow the insert to | ||
| 3271 | * happen. | ||
| 3272 | */ | ||
| 3273 | el = left_el; | ||
| 3274 | insert_el = left_el; | ||
| 3275 | index = ocfs2_search_extent_list(el, cpos); | ||
| 3276 | BUG_ON(index == -1); | ||
| 3277 | } | ||
| 3278 | |||
| 3279 | rec = &el->l_recs[index]; | ||
| 3280 | ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); | ||
| 3281 | ocfs2_rotate_leaf(insert_el, split_rec); | ||
| 3282 | } | ||
| 3283 | |||
| 1934 | /* | 3284 | /* |
| 1935 | * This function only does inserts on an allocation b-tree. For dinode | 3285 | * This function only does inserts on an allocation b-tree. For dinode |
| 1936 | * lists, ocfs2_insert_at_leaf() is called directly. | 3286 | * lists, ocfs2_insert_at_leaf() is called directly. |
| @@ -1948,7 +3298,6 @@ static int ocfs2_insert_path(struct inode *inode, | |||
| 1948 | { | 3298 | { |
| 1949 | int ret, subtree_index; | 3299 | int ret, subtree_index; |
| 1950 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); | 3300 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); |
| 1951 | struct ocfs2_extent_list *el; | ||
| 1952 | 3301 | ||
| 1953 | /* | 3302 | /* |
| 1954 | * Pass both paths to the journal. The majority of inserts | 3303 | * Pass both paths to the journal. The majority of inserts |
| @@ -1984,9 +3333,18 @@ static int ocfs2_insert_path(struct inode *inode, | |||
| 1984 | } | 3333 | } |
| 1985 | } | 3334 | } |
| 1986 | 3335 | ||
| 1987 | el = path_leaf_el(right_path); | 3336 | if (insert->ins_split != SPLIT_NONE) { |
| 3337 | /* | ||
| 3338 | * We could call ocfs2_insert_at_leaf() for some types | ||
| 3339 | * of splits, but it's easier to just let one seperate | ||
| 3340 | * function sort it all out. | ||
| 3341 | */ | ||
| 3342 | ocfs2_split_record(inode, left_path, right_path, | ||
| 3343 | insert_rec, insert->ins_split); | ||
| 3344 | } else | ||
| 3345 | ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), | ||
| 3346 | insert, inode); | ||
| 1988 | 3347 | ||
| 1989 | ocfs2_insert_at_leaf(insert_rec, el, insert, inode); | ||
| 1990 | ret = ocfs2_journal_dirty(handle, leaf_bh); | 3348 | ret = ocfs2_journal_dirty(handle, leaf_bh); |
| 1991 | if (ret) | 3349 | if (ret) |
| 1992 | mlog_errno(ret); | 3350 | mlog_errno(ret); |
| @@ -2075,7 +3433,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 2075 | * can wind up skipping both of these two special cases... | 3433 | * can wind up skipping both of these two special cases... |
| 2076 | */ | 3434 | */ |
| 2077 | if (rotate) { | 3435 | if (rotate) { |
| 2078 | ret = ocfs2_rotate_tree_right(inode, handle, | 3436 | ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, |
| 2079 | le32_to_cpu(insert_rec->e_cpos), | 3437 | le32_to_cpu(insert_rec->e_cpos), |
| 2080 | right_path, &left_path); | 3438 | right_path, &left_path); |
| 2081 | if (ret) { | 3439 | if (ret) { |
| @@ -2100,8 +3458,9 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 2100 | } | 3458 | } |
| 2101 | 3459 | ||
| 2102 | out_update_clusters: | 3460 | out_update_clusters: |
| 2103 | ocfs2_update_dinode_clusters(inode, di, | 3461 | if (type->ins_split == SPLIT_NONE) |
| 2104 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 3462 | ocfs2_update_dinode_clusters(inode, di, |
| 3463 | le16_to_cpu(insert_rec->e_leaf_clusters)); | ||
| 2105 | 3464 | ||
| 2106 | ret = ocfs2_journal_dirty(handle, di_bh); | 3465 | ret = ocfs2_journal_dirty(handle, di_bh); |
| 2107 | if (ret) | 3466 | if (ret) |
| @@ -2114,6 +3473,44 @@ out: | |||
| 2114 | return ret; | 3473 | return ret; |
| 2115 | } | 3474 | } |
| 2116 | 3475 | ||
| 3476 | static enum ocfs2_contig_type | ||
| 3477 | ocfs2_figure_merge_contig_type(struct inode *inode, | ||
| 3478 | struct ocfs2_extent_list *el, int index, | ||
| 3479 | struct ocfs2_extent_rec *split_rec) | ||
| 3480 | { | ||
| 3481 | struct ocfs2_extent_rec *rec; | ||
| 3482 | enum ocfs2_contig_type ret = CONTIG_NONE; | ||
| 3483 | |||
| 3484 | /* | ||
| 3485 | * We're careful to check for an empty extent record here - | ||
| 3486 | * the merge code will know what to do if it sees one. | ||
| 3487 | */ | ||
| 3488 | |||
| 3489 | if (index > 0) { | ||
| 3490 | rec = &el->l_recs[index - 1]; | ||
| 3491 | if (index == 1 && ocfs2_is_empty_extent(rec)) { | ||
| 3492 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | ||
| 3493 | ret = CONTIG_RIGHT; | ||
| 3494 | } else { | ||
| 3495 | ret = ocfs2_extent_contig(inode, rec, split_rec); | ||
| 3496 | } | ||
| 3497 | } | ||
| 3498 | |||
| 3499 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
| 3500 | enum ocfs2_contig_type contig_type; | ||
| 3501 | |||
| 3502 | rec = &el->l_recs[index + 1]; | ||
| 3503 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | ||
| 3504 | |||
| 3505 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | ||
| 3506 | ret = CONTIG_LEFTRIGHT; | ||
| 3507 | else if (ret == CONTIG_NONE) | ||
| 3508 | ret = contig_type; | ||
| 3509 | } | ||
| 3510 | |||
| 3511 | return ret; | ||
| 3512 | } | ||
| 3513 | |||
| 2117 | static void ocfs2_figure_contig_type(struct inode *inode, | 3514 | static void ocfs2_figure_contig_type(struct inode *inode, |
| 2118 | struct ocfs2_insert_type *insert, | 3515 | struct ocfs2_insert_type *insert, |
| 2119 | struct ocfs2_extent_list *el, | 3516 | struct ocfs2_extent_list *el, |
| @@ -2205,6 +3602,8 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 2205 | struct ocfs2_path *path = NULL; | 3602 | struct ocfs2_path *path = NULL; |
| 2206 | struct buffer_head *bh = NULL; | 3603 | struct buffer_head *bh = NULL; |
| 2207 | 3604 | ||
| 3605 | insert->ins_split = SPLIT_NONE; | ||
| 3606 | |||
| 2208 | el = &di->id2.i_list; | 3607 | el = &di->id2.i_list; |
| 2209 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); | 3608 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); |
| 2210 | 3609 | ||
| @@ -2327,9 +3726,10 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 2327 | u32 cpos, | 3726 | u32 cpos, |
| 2328 | u64 start_blk, | 3727 | u64 start_blk, |
| 2329 | u32 new_clusters, | 3728 | u32 new_clusters, |
| 3729 | u8 flags, | ||
| 2330 | struct ocfs2_alloc_context *meta_ac) | 3730 | struct ocfs2_alloc_context *meta_ac) |
| 2331 | { | 3731 | { |
| 2332 | int status, shift; | 3732 | int status; |
| 2333 | struct buffer_head *last_eb_bh = NULL; | 3733 | struct buffer_head *last_eb_bh = NULL; |
| 2334 | struct buffer_head *bh = NULL; | 3734 | struct buffer_head *bh = NULL; |
| 2335 | struct ocfs2_insert_type insert = {0, }; | 3735 | struct ocfs2_insert_type insert = {0, }; |
| @@ -2350,6 +3750,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 2350 | rec.e_cpos = cpu_to_le32(cpos); | 3750 | rec.e_cpos = cpu_to_le32(cpos); |
| 2351 | rec.e_blkno = cpu_to_le64(start_blk); | 3751 | rec.e_blkno = cpu_to_le64(start_blk); |
| 2352 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 3752 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
| 3753 | rec.e_flags = flags; | ||
| 2353 | 3754 | ||
| 2354 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, | 3755 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, |
| 2355 | &insert); | 3756 | &insert); |
| @@ -2364,55 +3765,16 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 2364 | insert.ins_appending, insert.ins_contig, insert.ins_contig_index, | 3765 | insert.ins_appending, insert.ins_contig, insert.ins_contig_index, |
| 2365 | insert.ins_free_records, insert.ins_tree_depth); | 3766 | insert.ins_free_records, insert.ins_tree_depth); |
| 2366 | 3767 | ||
| 2367 | /* | 3768 | if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { |
| 2368 | * Avoid growing the tree unless we're out of records and the | 3769 | status = ocfs2_grow_tree(inode, handle, fe_bh, |
| 2369 | * insert type requres one. | 3770 | &insert.ins_tree_depth, &last_eb_bh, |
| 2370 | */ | 3771 | meta_ac); |
| 2371 | if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) | 3772 | if (status) { |
| 2372 | goto out_add; | ||
| 2373 | |||
| 2374 | shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); | ||
| 2375 | if (shift < 0) { | ||
| 2376 | status = shift; | ||
| 2377 | mlog_errno(status); | ||
| 2378 | goto bail; | ||
| 2379 | } | ||
| 2380 | |||
| 2381 | /* We traveled all the way to the bottom of the allocation tree | ||
| 2382 | * and didn't find room for any more extents - we need to add | ||
| 2383 | * another tree level */ | ||
| 2384 | if (shift) { | ||
| 2385 | BUG_ON(bh); | ||
| 2386 | mlog(0, "need to shift tree depth " | ||
| 2387 | "(current = %d)\n", insert.ins_tree_depth); | ||
| 2388 | |||
| 2389 | /* ocfs2_shift_tree_depth will return us a buffer with | ||
| 2390 | * the new extent block (so we can pass that to | ||
| 2391 | * ocfs2_add_branch). */ | ||
| 2392 | status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, | ||
| 2393 | meta_ac, &bh); | ||
| 2394 | if (status < 0) { | ||
| 2395 | mlog_errno(status); | 3773 | mlog_errno(status); |
| 2396 | goto bail; | 3774 | goto bail; |
| 2397 | } | 3775 | } |
| 2398 | insert.ins_tree_depth++; | ||
| 2399 | /* Special case: we have room now if we shifted from | ||
| 2400 | * tree_depth 0 */ | ||
| 2401 | if (insert.ins_tree_depth == 1) | ||
| 2402 | goto out_add; | ||
| 2403 | } | ||
| 2404 | |||
| 2405 | /* call ocfs2_add_branch to add the final part of the tree with | ||
| 2406 | * the new data. */ | ||
| 2407 | mlog(0, "add branch. bh = %p\n", bh); | ||
| 2408 | status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, | ||
| 2409 | meta_ac); | ||
| 2410 | if (status < 0) { | ||
| 2411 | mlog_errno(status); | ||
| 2412 | goto bail; | ||
| 2413 | } | 3776 | } |
| 2414 | 3777 | ||
| 2415 | out_add: | ||
| 2416 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 3778 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
| 2417 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); | 3779 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); |
| 2418 | if (status < 0) | 3780 | if (status < 0) |
| @@ -2431,7 +3793,720 @@ bail: | |||
| 2431 | return status; | 3793 | return status; |
| 2432 | } | 3794 | } |
| 2433 | 3795 | ||
| 2434 | static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | 3796 | static void ocfs2_make_right_split_rec(struct super_block *sb, |
| 3797 | struct ocfs2_extent_rec *split_rec, | ||
| 3798 | u32 cpos, | ||
| 3799 | struct ocfs2_extent_rec *rec) | ||
| 3800 | { | ||
| 3801 | u32 rec_cpos = le32_to_cpu(rec->e_cpos); | ||
| 3802 | u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters); | ||
| 3803 | |||
| 3804 | memset(split_rec, 0, sizeof(struct ocfs2_extent_rec)); | ||
| 3805 | |||
| 3806 | split_rec->e_cpos = cpu_to_le32(cpos); | ||
| 3807 | split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos); | ||
| 3808 | |||
| 3809 | split_rec->e_blkno = rec->e_blkno; | ||
| 3810 | le64_add_cpu(&split_rec->e_blkno, | ||
| 3811 | ocfs2_clusters_to_blocks(sb, cpos - rec_cpos)); | ||
| 3812 | |||
| 3813 | split_rec->e_flags = rec->e_flags; | ||
| 3814 | } | ||
| 3815 | |||
| 3816 | static int ocfs2_split_and_insert(struct inode *inode, | ||
| 3817 | handle_t *handle, | ||
| 3818 | struct ocfs2_path *path, | ||
| 3819 | struct buffer_head *di_bh, | ||
| 3820 | struct buffer_head **last_eb_bh, | ||
| 3821 | int split_index, | ||
| 3822 | struct ocfs2_extent_rec *orig_split_rec, | ||
| 3823 | struct ocfs2_alloc_context *meta_ac) | ||
| 3824 | { | ||
| 3825 | int ret = 0, depth; | ||
| 3826 | unsigned int insert_range, rec_range, do_leftright = 0; | ||
| 3827 | struct ocfs2_extent_rec tmprec; | ||
| 3828 | struct ocfs2_extent_list *rightmost_el; | ||
| 3829 | struct ocfs2_extent_rec rec; | ||
| 3830 | struct ocfs2_extent_rec split_rec = *orig_split_rec; | ||
| 3831 | struct ocfs2_insert_type insert; | ||
| 3832 | struct ocfs2_extent_block *eb; | ||
| 3833 | struct ocfs2_dinode *di; | ||
| 3834 | |||
| 3835 | leftright: | ||
| 3836 | /* | ||
| 3837 | * Store a copy of the record on the stack - it might move | ||
| 3838 | * around as the tree is manipulated below. | ||
| 3839 | */ | ||
| 3840 | rec = path_leaf_el(path)->l_recs[split_index]; | ||
| 3841 | |||
| 3842 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 3843 | rightmost_el = &di->id2.i_list; | ||
| 3844 | |||
| 3845 | depth = le16_to_cpu(rightmost_el->l_tree_depth); | ||
| 3846 | if (depth) { | ||
| 3847 | BUG_ON(!(*last_eb_bh)); | ||
| 3848 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; | ||
| 3849 | rightmost_el = &eb->h_list; | ||
| 3850 | } | ||
| 3851 | |||
| 3852 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | ||
| 3853 | le16_to_cpu(rightmost_el->l_count)) { | ||
| 3854 | int old_depth = depth; | ||
| 3855 | |||
| 3856 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, | ||
| 3857 | meta_ac); | ||
| 3858 | if (ret) { | ||
| 3859 | mlog_errno(ret); | ||
| 3860 | goto out; | ||
| 3861 | } | ||
| 3862 | |||
| 3863 | if (old_depth != depth) { | ||
| 3864 | eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; | ||
| 3865 | rightmost_el = &eb->h_list; | ||
| 3866 | } | ||
| 3867 | } | ||
| 3868 | |||
| 3869 | memset(&insert, 0, sizeof(struct ocfs2_insert_type)); | ||
| 3870 | insert.ins_appending = APPEND_NONE; | ||
| 3871 | insert.ins_contig = CONTIG_NONE; | ||
| 3872 | insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) | ||
| 3873 | - le16_to_cpu(rightmost_el->l_next_free_rec); | ||
| 3874 | insert.ins_tree_depth = depth; | ||
| 3875 | |||
| 3876 | insert_range = le32_to_cpu(split_rec.e_cpos) + | ||
| 3877 | le16_to_cpu(split_rec.e_leaf_clusters); | ||
| 3878 | rec_range = le32_to_cpu(rec.e_cpos) + | ||
| 3879 | le16_to_cpu(rec.e_leaf_clusters); | ||
| 3880 | |||
| 3881 | if (split_rec.e_cpos == rec.e_cpos) { | ||
| 3882 | insert.ins_split = SPLIT_LEFT; | ||
| 3883 | } else if (insert_range == rec_range) { | ||
| 3884 | insert.ins_split = SPLIT_RIGHT; | ||
| 3885 | } else { | ||
| 3886 | /* | ||
| 3887 | * Left/right split. We fake this as a right split | ||
| 3888 | * first and then make a second pass as a left split. | ||
| 3889 | */ | ||
| 3890 | insert.ins_split = SPLIT_RIGHT; | ||
| 3891 | |||
| 3892 | ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, | ||
| 3893 | &rec); | ||
| 3894 | |||
| 3895 | split_rec = tmprec; | ||
| 3896 | |||
| 3897 | BUG_ON(do_leftright); | ||
| 3898 | do_leftright = 1; | ||
| 3899 | } | ||
| 3900 | |||
| 3901 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, | ||
| 3902 | &insert); | ||
| 3903 | if (ret) { | ||
| 3904 | mlog_errno(ret); | ||
| 3905 | goto out; | ||
| 3906 | } | ||
| 3907 | |||
| 3908 | if (do_leftright == 1) { | ||
| 3909 | u32 cpos; | ||
| 3910 | struct ocfs2_extent_list *el; | ||
| 3911 | |||
| 3912 | do_leftright++; | ||
| 3913 | split_rec = *orig_split_rec; | ||
| 3914 | |||
| 3915 | ocfs2_reinit_path(path, 1); | ||
| 3916 | |||
| 3917 | cpos = le32_to_cpu(split_rec.e_cpos); | ||
| 3918 | ret = ocfs2_find_path(inode, path, cpos); | ||
| 3919 | if (ret) { | ||
| 3920 | mlog_errno(ret); | ||
| 3921 | goto out; | ||
| 3922 | } | ||
| 3923 | |||
| 3924 | el = path_leaf_el(path); | ||
| 3925 | split_index = ocfs2_search_extent_list(el, cpos); | ||
| 3926 | goto leftright; | ||
| 3927 | } | ||
| 3928 | out: | ||
| 3929 | |||
| 3930 | return ret; | ||
| 3931 | } | ||
| 3932 | |||
| 3933 | /* | ||
| 3934 | * Mark part or all of the extent record at split_index in the leaf | ||
| 3935 | * pointed to by path as written. This removes the unwritten | ||
| 3936 | * extent flag. | ||
| 3937 | * | ||
| 3938 | * Care is taken to handle contiguousness so as to not grow the tree. | ||
| 3939 | * | ||
| 3940 | * meta_ac is not strictly necessary - we only truly need it if growth | ||
| 3941 | * of the tree is required. All other cases will degrade into a less | ||
| 3942 | * optimal tree layout. | ||
| 3943 | * | ||
| 3944 | * last_eb_bh should be the rightmost leaf block for any inode with a | ||
| 3945 | * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. | ||
| 3946 | * | ||
| 3947 | * This code is optimized for readability - several passes might be | ||
| 3948 | * made over certain portions of the tree. All of those blocks will | ||
| 3949 | * have been brought into cache (and pinned via the journal), so the | ||
| 3950 | * extra overhead is not expressed in terms of disk reads. | ||
| 3951 | */ | ||
| 3952 | static int __ocfs2_mark_extent_written(struct inode *inode, | ||
| 3953 | struct buffer_head *di_bh, | ||
| 3954 | handle_t *handle, | ||
| 3955 | struct ocfs2_path *path, | ||
| 3956 | int split_index, | ||
| 3957 | struct ocfs2_extent_rec *split_rec, | ||
| 3958 | struct ocfs2_alloc_context *meta_ac, | ||
| 3959 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 3960 | { | ||
| 3961 | int ret = 0; | ||
| 3962 | struct ocfs2_extent_list *el = path_leaf_el(path); | ||
| 3963 | struct buffer_head *eb_bh, *last_eb_bh = NULL; | ||
| 3964 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | ||
| 3965 | struct ocfs2_merge_ctxt ctxt; | ||
| 3966 | struct ocfs2_extent_list *rightmost_el; | ||
| 3967 | |||
| 3968 | if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) { | ||
| 3969 | ret = -EIO; | ||
| 3970 | mlog_errno(ret); | ||
| 3971 | goto out; | ||
| 3972 | } | ||
| 3973 | |||
| 3974 | if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || | ||
| 3975 | ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < | ||
| 3976 | (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { | ||
| 3977 | ret = -EIO; | ||
| 3978 | mlog_errno(ret); | ||
| 3979 | goto out; | ||
| 3980 | } | ||
| 3981 | |||
| 3982 | eb_bh = path_leaf_bh(path); | ||
| 3983 | ret = ocfs2_journal_access(handle, inode, eb_bh, | ||
| 3984 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3985 | if (ret) { | ||
| 3986 | mlog_errno(ret); | ||
| 3987 | goto out; | ||
| 3988 | } | ||
| 3989 | |||
| 3990 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | ||
| 3991 | split_index, | ||
| 3992 | split_rec); | ||
| 3993 | |||
| 3994 | /* | ||
| 3995 | * The core merge / split code wants to know how much room is | ||
| 3996 | * left in this inodes allocation tree, so we pass the | ||
| 3997 | * rightmost extent list. | ||
| 3998 | */ | ||
| 3999 | if (path->p_tree_depth) { | ||
| 4000 | struct ocfs2_extent_block *eb; | ||
| 4001 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 4002 | |||
| 4003 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
| 4004 | le64_to_cpu(di->i_last_eb_blk), | ||
| 4005 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
| 4006 | if (ret) { | ||
| 4007 | mlog_exit(ret); | ||
| 4008 | goto out; | ||
| 4009 | } | ||
| 4010 | |||
| 4011 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | ||
| 4012 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
| 4013 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
| 4014 | ret = -EROFS; | ||
| 4015 | goto out; | ||
| 4016 | } | ||
| 4017 | |||
| 4018 | rightmost_el = &eb->h_list; | ||
| 4019 | } else | ||
| 4020 | rightmost_el = path_root_el(path); | ||
| 4021 | |||
| 4022 | ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec); | ||
| 4023 | if (ctxt.c_used_tail_recs > 0 && | ||
| 4024 | ocfs2_is_empty_extent(&rightmost_el->l_recs[0])) | ||
| 4025 | ctxt.c_used_tail_recs--; | ||
| 4026 | |||
| 4027 | if (rec->e_cpos == split_rec->e_cpos && | ||
| 4028 | rec->e_leaf_clusters == split_rec->e_leaf_clusters) | ||
| 4029 | ctxt.c_split_covers_rec = 1; | ||
| 4030 | else | ||
| 4031 | ctxt.c_split_covers_rec = 0; | ||
| 4032 | |||
| 4033 | ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); | ||
| 4034 | |||
| 4035 | mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " | ||
| 4036 | "has_empty: %u, split_covers: %u\n", split_index, | ||
| 4037 | ctxt.c_contig_type, ctxt.c_used_tail_recs, | ||
| 4038 | ctxt.c_has_empty_extent, ctxt.c_split_covers_rec); | ||
| 4039 | |||
| 4040 | if (ctxt.c_contig_type == CONTIG_NONE) { | ||
| 4041 | if (ctxt.c_split_covers_rec) | ||
| 4042 | el->l_recs[split_index] = *split_rec; | ||
| 4043 | else | ||
| 4044 | ret = ocfs2_split_and_insert(inode, handle, path, di_bh, | ||
| 4045 | &last_eb_bh, split_index, | ||
| 4046 | split_rec, meta_ac); | ||
| 4047 | if (ret) | ||
| 4048 | mlog_errno(ret); | ||
| 4049 | } else { | ||
| 4050 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | ||
| 4051 | split_index, split_rec, | ||
| 4052 | dealloc, &ctxt); | ||
| 4053 | if (ret) | ||
| 4054 | mlog_errno(ret); | ||
| 4055 | } | ||
| 4056 | |||
| 4057 | ocfs2_journal_dirty(handle, eb_bh); | ||
| 4058 | |||
| 4059 | out: | ||
| 4060 | brelse(last_eb_bh); | ||
| 4061 | return ret; | ||
| 4062 | } | ||
| 4063 | |||
| 4064 | /* | ||
| 4065 | * Mark the already-existing extent at cpos as written for len clusters. | ||
| 4066 | * | ||
| 4067 | * If the existing extent is larger than the request, initiate a | ||
| 4068 | * split. An attempt will be made at merging with adjacent extents. | ||
| 4069 | * | ||
| 4070 | * The caller is responsible for passing down meta_ac if we'll need it. | ||
| 4071 | */ | ||
| 4072 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | ||
| 4073 | handle_t *handle, u32 cpos, u32 len, u32 phys, | ||
| 4074 | struct ocfs2_alloc_context *meta_ac, | ||
| 4075 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 4076 | { | ||
| 4077 | int ret, index; | ||
| 4078 | u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); | ||
| 4079 | struct ocfs2_extent_rec split_rec; | ||
| 4080 | struct ocfs2_path *left_path = NULL; | ||
| 4081 | struct ocfs2_extent_list *el; | ||
| 4082 | |||
| 4083 | mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", | ||
| 4084 | inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); | ||
| 4085 | |||
| 4086 | if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { | ||
| 4087 | ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " | ||
| 4088 | "that are being written to, but the feature bit " | ||
| 4089 | "is not set in the super block.", | ||
| 4090 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 4091 | ret = -EROFS; | ||
| 4092 | goto out; | ||
| 4093 | } | ||
| 4094 | |||
| 4095 | /* | ||
| 4096 | * XXX: This should be fixed up so that we just re-insert the | ||
| 4097 | * next extent records. | ||
| 4098 | */ | ||
| 4099 | ocfs2_extent_map_trunc(inode, 0); | ||
| 4100 | |||
| 4101 | left_path = ocfs2_new_inode_path(di_bh); | ||
| 4102 | if (!left_path) { | ||
| 4103 | ret = -ENOMEM; | ||
| 4104 | mlog_errno(ret); | ||
| 4105 | goto out; | ||
| 4106 | } | ||
| 4107 | |||
| 4108 | ret = ocfs2_find_path(inode, left_path, cpos); | ||
| 4109 | if (ret) { | ||
| 4110 | mlog_errno(ret); | ||
| 4111 | goto out; | ||
| 4112 | } | ||
| 4113 | el = path_leaf_el(left_path); | ||
| 4114 | |||
| 4115 | index = ocfs2_search_extent_list(el, cpos); | ||
| 4116 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
| 4117 | ocfs2_error(inode->i_sb, | ||
| 4118 | "Inode %llu has an extent at cpos %u which can no " | ||
| 4119 | "longer be found.\n", | ||
| 4120 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | ||
| 4121 | ret = -EROFS; | ||
| 4122 | goto out; | ||
| 4123 | } | ||
| 4124 | |||
| 4125 | memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); | ||
| 4126 | split_rec.e_cpos = cpu_to_le32(cpos); | ||
| 4127 | split_rec.e_leaf_clusters = cpu_to_le16(len); | ||
| 4128 | split_rec.e_blkno = cpu_to_le64(start_blkno); | ||
| 4129 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | ||
| 4130 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | ||
| 4131 | |||
| 4132 | ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, | ||
| 4133 | index, &split_rec, meta_ac, dealloc); | ||
| 4134 | if (ret) | ||
| 4135 | mlog_errno(ret); | ||
| 4136 | |||
| 4137 | out: | ||
| 4138 | ocfs2_free_path(left_path); | ||
| 4139 | return ret; | ||
| 4140 | } | ||
| 4141 | |||
| 4142 | static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | ||
| 4143 | handle_t *handle, struct ocfs2_path *path, | ||
| 4144 | int index, u32 new_range, | ||
| 4145 | struct ocfs2_alloc_context *meta_ac) | ||
| 4146 | { | ||
| 4147 | int ret, depth, credits = handle->h_buffer_credits; | ||
| 4148 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 4149 | struct buffer_head *last_eb_bh = NULL; | ||
| 4150 | struct ocfs2_extent_block *eb; | ||
| 4151 | struct ocfs2_extent_list *rightmost_el, *el; | ||
| 4152 | struct ocfs2_extent_rec split_rec; | ||
| 4153 | struct ocfs2_extent_rec *rec; | ||
| 4154 | struct ocfs2_insert_type insert; | ||
| 4155 | |||
| 4156 | /* | ||
| 4157 | * Setup the record to split before we grow the tree. | ||
| 4158 | */ | ||
| 4159 | el = path_leaf_el(path); | ||
| 4160 | rec = &el->l_recs[index]; | ||
| 4161 | ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); | ||
| 4162 | |||
| 4163 | depth = path->p_tree_depth; | ||
| 4164 | if (depth > 0) { | ||
| 4165 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
| 4166 | le64_to_cpu(di->i_last_eb_blk), | ||
| 4167 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
| 4168 | if (ret < 0) { | ||
| 4169 | mlog_errno(ret); | ||
| 4170 | goto out; | ||
| 4171 | } | ||
| 4172 | |||
| 4173 | eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; | ||
| 4174 | rightmost_el = &eb->h_list; | ||
| 4175 | } else | ||
| 4176 | rightmost_el = path_leaf_el(path); | ||
| 4177 | |||
| 4178 | credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); | ||
| 4179 | ret = ocfs2_extend_trans(handle, credits); | ||
| 4180 | if (ret) { | ||
| 4181 | mlog_errno(ret); | ||
| 4182 | goto out; | ||
| 4183 | } | ||
| 4184 | |||
| 4185 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | ||
| 4186 | le16_to_cpu(rightmost_el->l_count)) { | ||
| 4187 | int old_depth = depth; | ||
| 4188 | |||
| 4189 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, | ||
| 4190 | meta_ac); | ||
| 4191 | if (ret) { | ||
| 4192 | mlog_errno(ret); | ||
| 4193 | goto out; | ||
| 4194 | } | ||
| 4195 | |||
| 4196 | if (old_depth != depth) { | ||
| 4197 | eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; | ||
| 4198 | rightmost_el = &eb->h_list; | ||
| 4199 | } | ||
| 4200 | } | ||
| 4201 | |||
| 4202 | memset(&insert, 0, sizeof(struct ocfs2_insert_type)); | ||
| 4203 | insert.ins_appending = APPEND_NONE; | ||
| 4204 | insert.ins_contig = CONTIG_NONE; | ||
| 4205 | insert.ins_split = SPLIT_RIGHT; | ||
| 4206 | insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) | ||
| 4207 | - le16_to_cpu(rightmost_el->l_next_free_rec); | ||
| 4208 | insert.ins_tree_depth = depth; | ||
| 4209 | |||
| 4210 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); | ||
| 4211 | if (ret) | ||
| 4212 | mlog_errno(ret); | ||
| 4213 | |||
| 4214 | out: | ||
| 4215 | brelse(last_eb_bh); | ||
| 4216 | return ret; | ||
| 4217 | } | ||
| 4218 | |||
| 4219 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | ||
| 4220 | struct ocfs2_path *path, int index, | ||
| 4221 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
| 4222 | u32 cpos, u32 len) | ||
| 4223 | { | ||
| 4224 | int ret; | ||
| 4225 | u32 left_cpos, rec_range, trunc_range; | ||
| 4226 | int wants_rotate = 0, is_rightmost_tree_rec = 0; | ||
| 4227 | struct super_block *sb = inode->i_sb; | ||
| 4228 | struct ocfs2_path *left_path = NULL; | ||
| 4229 | struct ocfs2_extent_list *el = path_leaf_el(path); | ||
| 4230 | struct ocfs2_extent_rec *rec; | ||
| 4231 | struct ocfs2_extent_block *eb; | ||
| 4232 | |||
| 4233 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | ||
| 4234 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | ||
| 4235 | if (ret) { | ||
| 4236 | mlog_errno(ret); | ||
| 4237 | goto out; | ||
| 4238 | } | ||
| 4239 | |||
| 4240 | index--; | ||
| 4241 | } | ||
| 4242 | |||
| 4243 | if (index == (le16_to_cpu(el->l_next_free_rec) - 1) && | ||
| 4244 | path->p_tree_depth) { | ||
| 4245 | /* | ||
| 4246 | * Check whether this is the rightmost tree record. If | ||
| 4247 | * we remove all of this record or part of its right | ||
| 4248 | * edge then an update of the record lengths above it | ||
| 4249 | * will be required. | ||
| 4250 | */ | ||
| 4251 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | ||
| 4252 | if (eb->h_next_leaf_blk == 0) | ||
| 4253 | is_rightmost_tree_rec = 1; | ||
| 4254 | } | ||
| 4255 | |||
| 4256 | rec = &el->l_recs[index]; | ||
| 4257 | if (index == 0 && path->p_tree_depth && | ||
| 4258 | le32_to_cpu(rec->e_cpos) == cpos) { | ||
| 4259 | /* | ||
| 4260 | * Changing the leftmost offset (via partial or whole | ||
| 4261 | * record truncate) of an interior (or rightmost) path | ||
| 4262 | * means we have to update the subtree that is formed | ||
| 4263 | * by this leaf and the one to it's left. | ||
| 4264 | * | ||
| 4265 | * There are two cases we can skip: | ||
| 4266 | * 1) Path is the leftmost one in our inode tree. | ||
| 4267 | * 2) The leaf is rightmost and will be empty after | ||
| 4268 | * we remove the extent record - the rotate code | ||
| 4269 | * knows how to update the newly formed edge. | ||
| 4270 | */ | ||
| 4271 | |||
| 4272 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, | ||
| 4273 | &left_cpos); | ||
| 4274 | if (ret) { | ||
| 4275 | mlog_errno(ret); | ||
| 4276 | goto out; | ||
| 4277 | } | ||
| 4278 | |||
| 4279 | if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { | ||
| 4280 | left_path = ocfs2_new_path(path_root_bh(path), | ||
| 4281 | path_root_el(path)); | ||
| 4282 | if (!left_path) { | ||
| 4283 | ret = -ENOMEM; | ||
| 4284 | mlog_errno(ret); | ||
| 4285 | goto out; | ||
| 4286 | } | ||
| 4287 | |||
| 4288 | ret = ocfs2_find_path(inode, left_path, left_cpos); | ||
| 4289 | if (ret) { | ||
| 4290 | mlog_errno(ret); | ||
| 4291 | goto out; | ||
| 4292 | } | ||
| 4293 | } | ||
| 4294 | } | ||
| 4295 | |||
| 4296 | ret = ocfs2_extend_rotate_transaction(handle, 0, | ||
| 4297 | handle->h_buffer_credits, | ||
| 4298 | path); | ||
| 4299 | if (ret) { | ||
| 4300 | mlog_errno(ret); | ||
| 4301 | goto out; | ||
| 4302 | } | ||
| 4303 | |||
| 4304 | ret = ocfs2_journal_access_path(inode, handle, path); | ||
| 4305 | if (ret) { | ||
| 4306 | mlog_errno(ret); | ||
| 4307 | goto out; | ||
| 4308 | } | ||
| 4309 | |||
| 4310 | ret = ocfs2_journal_access_path(inode, handle, left_path); | ||
| 4311 | if (ret) { | ||
| 4312 | mlog_errno(ret); | ||
| 4313 | goto out; | ||
| 4314 | } | ||
| 4315 | |||
| 4316 | rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
| 4317 | trunc_range = cpos + len; | ||
| 4318 | |||
| 4319 | if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) { | ||
| 4320 | int next_free; | ||
| 4321 | |||
| 4322 | memset(rec, 0, sizeof(*rec)); | ||
| 4323 | ocfs2_cleanup_merge(el, index); | ||
| 4324 | wants_rotate = 1; | ||
| 4325 | |||
| 4326 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 4327 | if (is_rightmost_tree_rec && next_free > 1) { | ||
| 4328 | /* | ||
| 4329 | * We skip the edge update if this path will | ||
| 4330 | * be deleted by the rotate code. | ||
| 4331 | */ | ||
| 4332 | rec = &el->l_recs[next_free - 1]; | ||
| 4333 | ocfs2_adjust_rightmost_records(inode, handle, path, | ||
| 4334 | rec); | ||
| 4335 | } | ||
| 4336 | } else if (le32_to_cpu(rec->e_cpos) == cpos) { | ||
| 4337 | /* Remove leftmost portion of the record. */ | ||
| 4338 | le32_add_cpu(&rec->e_cpos, len); | ||
| 4339 | le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len)); | ||
| 4340 | le16_add_cpu(&rec->e_leaf_clusters, -len); | ||
| 4341 | } else if (rec_range == trunc_range) { | ||
| 4342 | /* Remove rightmost portion of the record */ | ||
| 4343 | le16_add_cpu(&rec->e_leaf_clusters, -len); | ||
| 4344 | if (is_rightmost_tree_rec) | ||
| 4345 | ocfs2_adjust_rightmost_records(inode, handle, path, rec); | ||
| 4346 | } else { | ||
| 4347 | /* Caller should have trapped this. */ | ||
| 4348 | mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " | ||
| 4349 | "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 4350 | le32_to_cpu(rec->e_cpos), | ||
| 4351 | le16_to_cpu(rec->e_leaf_clusters), cpos, len); | ||
| 4352 | BUG(); | ||
| 4353 | } | ||
| 4354 | |||
| 4355 | if (left_path) { | ||
| 4356 | int subtree_index; | ||
| 4357 | |||
| 4358 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | ||
| 4359 | ocfs2_complete_edge_insert(inode, handle, left_path, path, | ||
| 4360 | subtree_index); | ||
| 4361 | } | ||
| 4362 | |||
| 4363 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | ||
| 4364 | |||
| 4365 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | ||
| 4366 | if (ret) { | ||
| 4367 | mlog_errno(ret); | ||
| 4368 | goto out; | ||
| 4369 | } | ||
| 4370 | |||
| 4371 | out: | ||
| 4372 | ocfs2_free_path(left_path); | ||
| 4373 | return ret; | ||
| 4374 | } | ||
| 4375 | |||
| 4376 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
| 4377 | u32 cpos, u32 len, handle_t *handle, | ||
| 4378 | struct ocfs2_alloc_context *meta_ac, | ||
| 4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 4380 | { | ||
| 4381 | int ret, index; | ||
| 4382 | u32 rec_range, trunc_range; | ||
| 4383 | struct ocfs2_extent_rec *rec; | ||
| 4384 | struct ocfs2_extent_list *el; | ||
| 4385 | struct ocfs2_path *path; | ||
| 4386 | |||
| 4387 | ocfs2_extent_map_trunc(inode, 0); | ||
| 4388 | |||
| 4389 | path = ocfs2_new_inode_path(di_bh); | ||
| 4390 | if (!path) { | ||
| 4391 | ret = -ENOMEM; | ||
| 4392 | mlog_errno(ret); | ||
| 4393 | goto out; | ||
| 4394 | } | ||
| 4395 | |||
| 4396 | ret = ocfs2_find_path(inode, path, cpos); | ||
| 4397 | if (ret) { | ||
| 4398 | mlog_errno(ret); | ||
| 4399 | goto out; | ||
| 4400 | } | ||
| 4401 | |||
| 4402 | el = path_leaf_el(path); | ||
| 4403 | index = ocfs2_search_extent_list(el, cpos); | ||
| 4404 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
| 4405 | ocfs2_error(inode->i_sb, | ||
| 4406 | "Inode %llu has an extent at cpos %u which can no " | ||
| 4407 | "longer be found.\n", | ||
| 4408 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | ||
| 4409 | ret = -EROFS; | ||
| 4410 | goto out; | ||
| 4411 | } | ||
| 4412 | |||
| 4413 | /* | ||
| 4414 | * We have 3 cases of extent removal: | ||
| 4415 | * 1) Range covers the entire extent rec | ||
| 4416 | * 2) Range begins or ends on one edge of the extent rec | ||
| 4417 | * 3) Range is in the middle of the extent rec (no shared edges) | ||
| 4418 | * | ||
| 4419 | * For case 1 we remove the extent rec and left rotate to | ||
| 4420 | * fill the hole. | ||
| 4421 | * | ||
| 4422 | * For case 2 we just shrink the existing extent rec, with a | ||
| 4423 | * tree update if the shrinking edge is also the edge of an | ||
| 4424 | * extent block. | ||
| 4425 | * | ||
| 4426 | * For case 3 we do a right split to turn the extent rec into | ||
| 4427 | * something case 2 can handle. | ||
| 4428 | */ | ||
| 4429 | rec = &el->l_recs[index]; | ||
| 4430 | rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
| 4431 | trunc_range = cpos + len; | ||
| 4432 | |||
| 4433 | BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); | ||
| 4434 | |||
| 4435 | mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " | ||
| 4436 | "(cpos %u, len %u)\n", | ||
| 4437 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, | ||
| 4438 | le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); | ||
| 4439 | |||
| 4440 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | ||
| 4441 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | ||
| 4442 | cpos, len); | ||
| 4443 | if (ret) { | ||
| 4444 | mlog_errno(ret); | ||
| 4445 | goto out; | ||
| 4446 | } | ||
| 4447 | } else { | ||
| 4448 | ret = ocfs2_split_tree(inode, di_bh, handle, path, index, | ||
| 4449 | trunc_range, meta_ac); | ||
| 4450 | if (ret) { | ||
| 4451 | mlog_errno(ret); | ||
| 4452 | goto out; | ||
| 4453 | } | ||
| 4454 | |||
| 4455 | /* | ||
| 4456 | * The split could have manipulated the tree enough to | ||
| 4457 | * move the record location, so we have to look for it again. | ||
| 4458 | */ | ||
| 4459 | ocfs2_reinit_path(path, 1); | ||
| 4460 | |||
| 4461 | ret = ocfs2_find_path(inode, path, cpos); | ||
| 4462 | if (ret) { | ||
| 4463 | mlog_errno(ret); | ||
| 4464 | goto out; | ||
| 4465 | } | ||
| 4466 | |||
| 4467 | el = path_leaf_el(path); | ||
| 4468 | index = ocfs2_search_extent_list(el, cpos); | ||
| 4469 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
| 4470 | ocfs2_error(inode->i_sb, | ||
| 4471 | "Inode %llu: split at cpos %u lost record.", | ||
| 4472 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 4473 | cpos); | ||
| 4474 | ret = -EROFS; | ||
| 4475 | goto out; | ||
| 4476 | } | ||
| 4477 | |||
| 4478 | /* | ||
| 4479 | * Double check our values here. If anything is fishy, | ||
| 4480 | * it's easier to catch it at the top level. | ||
| 4481 | */ | ||
| 4482 | rec = &el->l_recs[index]; | ||
| 4483 | rec_range = le32_to_cpu(rec->e_cpos) + | ||
| 4484 | ocfs2_rec_clusters(el, rec); | ||
| 4485 | if (rec_range != trunc_range) { | ||
| 4486 | ocfs2_error(inode->i_sb, | ||
| 4487 | "Inode %llu: error after split at cpos %u" | ||
| 4488 | "trunc len %u, existing record is (%u,%u)", | ||
| 4489 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 4490 | cpos, len, le32_to_cpu(rec->e_cpos), | ||
| 4491 | ocfs2_rec_clusters(el, rec)); | ||
| 4492 | ret = -EROFS; | ||
| 4493 | goto out; | ||
| 4494 | } | ||
| 4495 | |||
| 4496 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | ||
| 4497 | cpos, len); | ||
| 4498 | if (ret) { | ||
| 4499 | mlog_errno(ret); | ||
| 4500 | goto out; | ||
| 4501 | } | ||
| 4502 | } | ||
| 4503 | |||
| 4504 | out: | ||
| 4505 | ocfs2_free_path(path); | ||
| 4506 | return ret; | ||
| 4507 | } | ||
| 4508 | |||
| 4509 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | ||
| 2435 | { | 4510 | { |
| 2436 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
| 2437 | struct ocfs2_dinode *di; | 4512 | struct ocfs2_dinode *di; |
| @@ -2464,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, | |||
| 2464 | return current_tail == new_start; | 4539 | return current_tail == new_start; |
| 2465 | } | 4540 | } |
| 2466 | 4541 | ||
| 2467 | static int ocfs2_truncate_log_append(struct ocfs2_super *osb, | 4542 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, |
| 2468 | handle_t *handle, | 4543 | handle_t *handle, |
| 2469 | u64 start_blk, | 4544 | u64 start_blk, |
| 2470 | unsigned int num_clusters) | 4545 | unsigned int num_clusters) |
| 2471 | { | 4546 | { |
| 2472 | int status, index; | 4547 | int status, index; |
| 2473 | unsigned int start_cluster, tl_count; | 4548 | unsigned int start_cluster, tl_count; |
| @@ -2623,7 +4698,7 @@ bail: | |||
| 2623 | } | 4698 | } |
| 2624 | 4699 | ||
| 2625 | /* Expects you to already be holding tl_inode->i_mutex */ | 4700 | /* Expects you to already be holding tl_inode->i_mutex */ |
| 2626 | static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | 4701 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) |
| 2627 | { | 4702 | { |
| 2628 | int status; | 4703 | int status; |
| 2629 | unsigned int num_to_flush; | 4704 | unsigned int num_to_flush; |
| @@ -2957,6 +5032,219 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) | |||
| 2957 | return status; | 5032 | return status; |
| 2958 | } | 5033 | } |
| 2959 | 5034 | ||
| 5035 | /* | ||
| 5036 | * Delayed de-allocation of suballocator blocks. | ||
| 5037 | * | ||
| 5038 | * Some sets of block de-allocations might involve multiple suballocator inodes. | ||
| 5039 | * | ||
| 5040 | * The locking for this can get extremely complicated, especially when | ||
| 5041 | * the suballocator inodes to delete from aren't known until deep | ||
| 5042 | * within an unrelated codepath. | ||
| 5043 | * | ||
| 5044 | * ocfs2_extent_block structures are a good example of this - an inode | ||
| 5045 | * btree could have been grown by any number of nodes each allocating | ||
| 5046 | * out of their own suballoc inode. | ||
| 5047 | * | ||
| 5048 | * These structures allow the delay of block de-allocation until a | ||
| 5049 | * later time, when locking of multiple cluster inodes won't cause | ||
| 5050 | * deadlock. | ||
| 5051 | */ | ||
| 5052 | |||
| 5053 | /* | ||
| 5054 | * Describes a single block free from a suballocator | ||
| 5055 | */ | ||
| 5056 | struct ocfs2_cached_block_free { | ||
| 5057 | struct ocfs2_cached_block_free *free_next; | ||
| 5058 | u64 free_blk; | ||
| 5059 | unsigned int free_bit; | ||
| 5060 | }; | ||
| 5061 | |||
| 5062 | struct ocfs2_per_slot_free_list { | ||
| 5063 | struct ocfs2_per_slot_free_list *f_next_suballocator; | ||
| 5064 | int f_inode_type; | ||
| 5065 | int f_slot; | ||
| 5066 | struct ocfs2_cached_block_free *f_first; | ||
| 5067 | }; | ||
| 5068 | |||
| 5069 | static int ocfs2_free_cached_items(struct ocfs2_super *osb, | ||
| 5070 | int sysfile_type, | ||
| 5071 | int slot, | ||
| 5072 | struct ocfs2_cached_block_free *head) | ||
| 5073 | { | ||
| 5074 | int ret; | ||
| 5075 | u64 bg_blkno; | ||
| 5076 | handle_t *handle; | ||
| 5077 | struct inode *inode; | ||
| 5078 | struct buffer_head *di_bh = NULL; | ||
| 5079 | struct ocfs2_cached_block_free *tmp; | ||
| 5080 | |||
| 5081 | inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); | ||
| 5082 | if (!inode) { | ||
| 5083 | ret = -EINVAL; | ||
| 5084 | mlog_errno(ret); | ||
| 5085 | goto out; | ||
| 5086 | } | ||
| 5087 | |||
| 5088 | mutex_lock(&inode->i_mutex); | ||
| 5089 | |||
| 5090 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
| 5091 | if (ret) { | ||
| 5092 | mlog_errno(ret); | ||
| 5093 | goto out_mutex; | ||
| 5094 | } | ||
| 5095 | |||
| 5096 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
| 5097 | if (IS_ERR(handle)) { | ||
| 5098 | ret = PTR_ERR(handle); | ||
| 5099 | mlog_errno(ret); | ||
| 5100 | goto out_unlock; | ||
| 5101 | } | ||
| 5102 | |||
| 5103 | while (head) { | ||
| 5104 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, | ||
| 5105 | head->free_bit); | ||
| 5106 | mlog(0, "Free bit: (bit %u, blkno %llu)\n", | ||
| 5107 | head->free_bit, (unsigned long long)head->free_blk); | ||
| 5108 | |||
| 5109 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, | ||
| 5110 | head->free_bit, bg_blkno, 1); | ||
| 5111 | if (ret) { | ||
| 5112 | mlog_errno(ret); | ||
| 5113 | goto out_journal; | ||
| 5114 | } | ||
| 5115 | |||
| 5116 | ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); | ||
| 5117 | if (ret) { | ||
| 5118 | mlog_errno(ret); | ||
| 5119 | goto out_journal; | ||
| 5120 | } | ||
| 5121 | |||
| 5122 | tmp = head; | ||
| 5123 | head = head->free_next; | ||
| 5124 | kfree(tmp); | ||
| 5125 | } | ||
| 5126 | |||
| 5127 | out_journal: | ||
| 5128 | ocfs2_commit_trans(osb, handle); | ||
| 5129 | |||
| 5130 | out_unlock: | ||
| 5131 | ocfs2_meta_unlock(inode, 1); | ||
| 5132 | brelse(di_bh); | ||
| 5133 | out_mutex: | ||
| 5134 | mutex_unlock(&inode->i_mutex); | ||
| 5135 | iput(inode); | ||
| 5136 | out: | ||
| 5137 | while(head) { | ||
| 5138 | /* Premature exit may have left some dangling items. */ | ||
| 5139 | tmp = head; | ||
| 5140 | head = head->free_next; | ||
| 5141 | kfree(tmp); | ||
| 5142 | } | ||
| 5143 | |||
| 5144 | return ret; | ||
| 5145 | } | ||
| 5146 | |||
| 5147 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
| 5148 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
| 5149 | { | ||
| 5150 | int ret = 0, ret2; | ||
| 5151 | struct ocfs2_per_slot_free_list *fl; | ||
| 5152 | |||
| 5153 | if (!ctxt) | ||
| 5154 | return 0; | ||
| 5155 | |||
| 5156 | while (ctxt->c_first_suballocator) { | ||
| 5157 | fl = ctxt->c_first_suballocator; | ||
| 5158 | |||
| 5159 | if (fl->f_first) { | ||
| 5160 | mlog(0, "Free items: (type %u, slot %d)\n", | ||
| 5161 | fl->f_inode_type, fl->f_slot); | ||
| 5162 | ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, | ||
| 5163 | fl->f_slot, fl->f_first); | ||
| 5164 | if (ret2) | ||
| 5165 | mlog_errno(ret2); | ||
| 5166 | if (!ret) | ||
| 5167 | ret = ret2; | ||
| 5168 | } | ||
| 5169 | |||
| 5170 | ctxt->c_first_suballocator = fl->f_next_suballocator; | ||
| 5171 | kfree(fl); | ||
| 5172 | } | ||
| 5173 | |||
| 5174 | return ret; | ||
| 5175 | } | ||
| 5176 | |||
| 5177 | static struct ocfs2_per_slot_free_list * | ||
| 5178 | ocfs2_find_per_slot_free_list(int type, | ||
| 5179 | int slot, | ||
| 5180 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
| 5181 | { | ||
| 5182 | struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; | ||
| 5183 | |||
| 5184 | while (fl) { | ||
| 5185 | if (fl->f_inode_type == type && fl->f_slot == slot) | ||
| 5186 | return fl; | ||
| 5187 | |||
| 5188 | fl = fl->f_next_suballocator; | ||
| 5189 | } | ||
| 5190 | |||
| 5191 | fl = kmalloc(sizeof(*fl), GFP_NOFS); | ||
| 5192 | if (fl) { | ||
| 5193 | fl->f_inode_type = type; | ||
| 5194 | fl->f_slot = slot; | ||
| 5195 | fl->f_first = NULL; | ||
| 5196 | fl->f_next_suballocator = ctxt->c_first_suballocator; | ||
| 5197 | |||
| 5198 | ctxt->c_first_suballocator = fl; | ||
| 5199 | } | ||
| 5200 | return fl; | ||
| 5201 | } | ||
| 5202 | |||
| 5203 | static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
| 5204 | int type, int slot, u64 blkno, | ||
| 5205 | unsigned int bit) | ||
| 5206 | { | ||
| 5207 | int ret; | ||
| 5208 | struct ocfs2_per_slot_free_list *fl; | ||
| 5209 | struct ocfs2_cached_block_free *item; | ||
| 5210 | |||
| 5211 | fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); | ||
| 5212 | if (fl == NULL) { | ||
| 5213 | ret = -ENOMEM; | ||
| 5214 | mlog_errno(ret); | ||
| 5215 | goto out; | ||
| 5216 | } | ||
| 5217 | |||
| 5218 | item = kmalloc(sizeof(*item), GFP_NOFS); | ||
| 5219 | if (item == NULL) { | ||
| 5220 | ret = -ENOMEM; | ||
| 5221 | mlog_errno(ret); | ||
| 5222 | goto out; | ||
| 5223 | } | ||
| 5224 | |||
| 5225 | mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", | ||
| 5226 | type, slot, bit, (unsigned long long)blkno); | ||
| 5227 | |||
| 5228 | item->free_blk = blkno; | ||
| 5229 | item->free_bit = bit; | ||
| 5230 | item->free_next = fl->f_first; | ||
| 5231 | |||
| 5232 | fl->f_first = item; | ||
| 5233 | |||
| 5234 | ret = 0; | ||
| 5235 | out: | ||
| 5236 | return ret; | ||
| 5237 | } | ||
| 5238 | |||
| 5239 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
| 5240 | struct ocfs2_extent_block *eb) | ||
| 5241 | { | ||
| 5242 | return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, | ||
| 5243 | le16_to_cpu(eb->h_suballoc_slot), | ||
| 5244 | le64_to_cpu(eb->h_blkno), | ||
| 5245 | le16_to_cpu(eb->h_suballoc_bit)); | ||
| 5246 | } | ||
| 5247 | |||
| 2960 | /* This function will figure out whether the currently last extent | 5248 | /* This function will figure out whether the currently last extent |
| 2961 | * block will be deleted, and if it will, what the new last extent | 5249 | * block will be deleted, and if it will, what the new last extent |
| 2962 | * block will be so we can update his h_next_leaf_blk field, as well | 5250 | * block will be so we can update his h_next_leaf_blk field, as well |
| @@ -3238,27 +5526,10 @@ delete: | |||
| 3238 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); | 5526 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); |
| 3239 | BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); | 5527 | BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); |
| 3240 | 5528 | ||
| 3241 | if (le16_to_cpu(eb->h_suballoc_slot) == 0) { | 5529 | ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); |
| 3242 | /* | 5530 | /* An error here is not fatal. */ |
| 3243 | * This code only understands how to | 5531 | if (ret < 0) |
| 3244 | * lock the suballocator in slot 0, | 5532 | mlog_errno(ret); |
| 3245 | * which is fine because allocation is | ||
| 3246 | * only ever done out of that | ||
| 3247 | * suballocator too. A future version | ||
| 3248 | * might change that however, so avoid | ||
| 3249 | * a free if we don't know how to | ||
| 3250 | * handle it. This way an fs incompat | ||
| 3251 | * bit will not be necessary. | ||
| 3252 | */ | ||
| 3253 | ret = ocfs2_free_extent_block(handle, | ||
| 3254 | tc->tc_ext_alloc_inode, | ||
| 3255 | tc->tc_ext_alloc_bh, | ||
| 3256 | eb); | ||
| 3257 | |||
| 3258 | /* An error here is not fatal. */ | ||
| 3259 | if (ret < 0) | ||
| 3260 | mlog_errno(ret); | ||
| 3261 | } | ||
| 3262 | } else { | 5533 | } else { |
| 3263 | deleted_eb = 0; | 5534 | deleted_eb = 0; |
| 3264 | } | 5535 | } |
| @@ -3397,9 +5668,9 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | |||
| 3397 | return ocfs2_journal_dirty_data(handle, bh); | 5668 | return ocfs2_journal_dirty_data(handle, bh); |
| 3398 | } | 5669 | } |
| 3399 | 5670 | ||
| 3400 | static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | 5671 | static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, |
| 3401 | struct page **pages, int numpages, | 5672 | loff_t end, struct page **pages, |
| 3402 | u64 phys, handle_t *handle) | 5673 | int numpages, u64 phys, handle_t *handle) |
| 3403 | { | 5674 | { |
| 3404 | int i, ret, partial = 0; | 5675 | int i, ret, partial = 0; |
| 3405 | void *kaddr; | 5676 | void *kaddr; |
| @@ -3412,26 +5683,14 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | |||
| 3412 | if (numpages == 0) | 5683 | if (numpages == 0) |
| 3413 | goto out; | 5684 | goto out; |
| 3414 | 5685 | ||
| 3415 | from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */ | 5686 | to = PAGE_CACHE_SIZE; |
| 3416 | if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) { | ||
| 3417 | /* | ||
| 3418 | * Since 'from' has been capped to a value below page | ||
| 3419 | * size, this calculation won't be able to overflow | ||
| 3420 | * 'to' | ||
| 3421 | */ | ||
| 3422 | to = ocfs2_align_bytes_to_clusters(sb, from); | ||
| 3423 | |||
| 3424 | /* | ||
| 3425 | * The truncate tail in this case should never contain | ||
| 3426 | * more than one page at maximum. The loop below also | ||
| 3427 | * assumes this. | ||
| 3428 | */ | ||
| 3429 | BUG_ON(numpages != 1); | ||
| 3430 | } | ||
| 3431 | |||
| 3432 | for(i = 0; i < numpages; i++) { | 5687 | for(i = 0; i < numpages; i++) { |
| 3433 | page = pages[i]; | 5688 | page = pages[i]; |
| 3434 | 5689 | ||
| 5690 | from = start & (PAGE_CACHE_SIZE - 1); | ||
| 5691 | if ((end >> PAGE_CACHE_SHIFT) == page->index) | ||
| 5692 | to = end & (PAGE_CACHE_SIZE - 1); | ||
| 5693 | |||
| 3435 | BUG_ON(from > PAGE_CACHE_SIZE); | 5694 | BUG_ON(from > PAGE_CACHE_SIZE); |
| 3436 | BUG_ON(to > PAGE_CACHE_SIZE); | 5695 | BUG_ON(to > PAGE_CACHE_SIZE); |
| 3437 | 5696 | ||
| @@ -3468,10 +5727,7 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | |||
| 3468 | 5727 | ||
| 3469 | flush_dcache_page(page); | 5728 | flush_dcache_page(page); |
| 3470 | 5729 | ||
| 3471 | /* | 5730 | start = (page->index + 1) << PAGE_CACHE_SHIFT; |
| 3472 | * Every page after the 1st one should be completely zero'd. | ||
| 3473 | */ | ||
| 3474 | from = 0; | ||
| 3475 | } | 5731 | } |
| 3476 | out: | 5732 | out: |
| 3477 | if (pages) { | 5733 | if (pages) { |
| @@ -3484,24 +5740,26 @@ out: | |||
| 3484 | } | 5740 | } |
| 3485 | } | 5741 | } |
| 3486 | 5742 | ||
| 3487 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages, | 5743 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, |
| 3488 | int *num, u64 *phys) | 5744 | struct page **pages, int *num, u64 *phys) |
| 3489 | { | 5745 | { |
| 3490 | int i, numpages = 0, ret = 0; | 5746 | int i, numpages = 0, ret = 0; |
| 3491 | unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize; | ||
| 3492 | unsigned int ext_flags; | 5747 | unsigned int ext_flags; |
| 3493 | struct super_block *sb = inode->i_sb; | 5748 | struct super_block *sb = inode->i_sb; |
| 3494 | struct address_space *mapping = inode->i_mapping; | 5749 | struct address_space *mapping = inode->i_mapping; |
| 3495 | unsigned long index; | 5750 | unsigned long index; |
| 3496 | u64 next_cluster_bytes; | 5751 | loff_t last_page_bytes; |
| 3497 | 5752 | ||
| 3498 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); | 5753 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); |
| 5754 | BUG_ON(start > end); | ||
| 3499 | 5755 | ||
| 3500 | /* Cluster boundary, so we don't need to grab any pages. */ | 5756 | if (start == end) |
| 3501 | if ((isize & (csize - 1)) == 0) | ||
| 3502 | goto out; | 5757 | goto out; |
| 3503 | 5758 | ||
| 3504 | ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits, | 5759 | BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != |
| 5760 | (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); | ||
| 5761 | |||
| 5762 | ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits, | ||
| 3505 | phys, NULL, &ext_flags); | 5763 | phys, NULL, &ext_flags); |
| 3506 | if (ret) { | 5764 | if (ret) { |
| 3507 | mlog_errno(ret); | 5765 | mlog_errno(ret); |
| @@ -3517,8 +5775,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page * | |||
| 3517 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | 5775 | if (ext_flags & OCFS2_EXT_UNWRITTEN) |
| 3518 | goto out; | 5776 | goto out; |
| 3519 | 5777 | ||
| 3520 | next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize); | 5778 | last_page_bytes = PAGE_ALIGN(end); |
| 3521 | index = isize >> PAGE_CACHE_SHIFT; | 5779 | index = start >> PAGE_CACHE_SHIFT; |
| 3522 | do { | 5780 | do { |
| 3523 | pages[numpages] = grab_cache_page(mapping, index); | 5781 | pages[numpages] = grab_cache_page(mapping, index); |
| 3524 | if (!pages[numpages]) { | 5782 | if (!pages[numpages]) { |
| @@ -3529,7 +5787,7 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page * | |||
| 3529 | 5787 | ||
| 3530 | numpages++; | 5788 | numpages++; |
| 3531 | index++; | 5789 | index++; |
| 3532 | } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT)); | 5790 | } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT)); |
| 3533 | 5791 | ||
| 3534 | out: | 5792 | out: |
| 3535 | if (ret != 0) { | 5793 | if (ret != 0) { |
| @@ -3558,11 +5816,10 @@ out: | |||
| 3558 | * otherwise block_write_full_page() will skip writeout of pages past | 5816 | * otherwise block_write_full_page() will skip writeout of pages past |
| 3559 | * i_size. The new_i_size parameter is passed for this reason. | 5817 | * i_size. The new_i_size parameter is passed for this reason. |
| 3560 | */ | 5818 | */ |
| 3561 | int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | 5819 | int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, |
| 3562 | u64 new_i_size) | 5820 | u64 range_start, u64 range_end) |
| 3563 | { | 5821 | { |
| 3564 | int ret, numpages; | 5822 | int ret, numpages; |
| 3565 | loff_t endbyte; | ||
| 3566 | struct page **pages = NULL; | 5823 | struct page **pages = NULL; |
| 3567 | u64 phys; | 5824 | u64 phys; |
| 3568 | 5825 | ||
| @@ -3581,7 +5838,8 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | |||
| 3581 | goto out; | 5838 | goto out; |
| 3582 | } | 5839 | } |
| 3583 | 5840 | ||
| 3584 | ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys); | 5841 | ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, |
| 5842 | &numpages, &phys); | ||
| 3585 | if (ret) { | 5843 | if (ret) { |
| 3586 | mlog_errno(ret); | 5844 | mlog_errno(ret); |
| 3587 | goto out; | 5845 | goto out; |
| @@ -3590,17 +5848,16 @@ int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | |||
| 3590 | if (numpages == 0) | 5848 | if (numpages == 0) |
| 3591 | goto out; | 5849 | goto out; |
| 3592 | 5850 | ||
| 3593 | ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys, | 5851 | ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, |
| 3594 | handle); | 5852 | numpages, phys, handle); |
| 3595 | 5853 | ||
| 3596 | /* | 5854 | /* |
| 3597 | * Initiate writeout of the pages we zero'd here. We don't | 5855 | * Initiate writeout of the pages we zero'd here. We don't |
| 3598 | * wait on them - the truncate_inode_pages() call later will | 5856 | * wait on them - the truncate_inode_pages() call later will |
| 3599 | * do that for us. | 5857 | * do that for us. |
| 3600 | */ | 5858 | */ |
| 3601 | endbyte = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); | 5859 | ret = do_sync_mapping_range(inode->i_mapping, range_start, |
| 3602 | ret = do_sync_mapping_range(inode->i_mapping, new_i_size, | 5860 | range_end - 1, SYNC_FILE_RANGE_WRITE); |
| 3603 | endbyte - 1, SYNC_FILE_RANGE_WRITE); | ||
| 3604 | if (ret) | 5861 | if (ret) |
| 3605 | mlog_errno(ret); | 5862 | mlog_errno(ret); |
| 3606 | 5863 | ||
| @@ -3631,8 +5888,6 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
| 3631 | 5888 | ||
| 3632 | mlog_entry_void(); | 5889 | mlog_entry_void(); |
| 3633 | 5890 | ||
| 3634 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 3635 | |||
| 3636 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, | 5891 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, |
| 3637 | i_size_read(inode)); | 5892 | i_size_read(inode)); |
| 3638 | 5893 | ||
| @@ -3754,7 +6009,6 @@ start: | |||
| 3754 | goto start; | 6009 | goto start; |
| 3755 | 6010 | ||
| 3756 | bail: | 6011 | bail: |
| 3757 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 3758 | 6012 | ||
| 3759 | ocfs2_schedule_truncate_log_flush(osb, 1); | 6013 | ocfs2_schedule_truncate_log_flush(osb, 1); |
| 3760 | 6014 | ||
| @@ -3764,6 +6018,8 @@ bail: | |||
| 3764 | if (handle) | 6018 | if (handle) |
| 3765 | ocfs2_commit_trans(osb, handle); | 6019 | ocfs2_commit_trans(osb, handle); |
| 3766 | 6020 | ||
| 6021 | ocfs2_run_deallocs(osb, &tc->tc_dealloc); | ||
| 6022 | |||
| 3767 | ocfs2_free_path(path); | 6023 | ocfs2_free_path(path); |
| 3768 | 6024 | ||
| 3769 | /* This will drop the ext_alloc cluster lock for us */ | 6025 | /* This will drop the ext_alloc cluster lock for us */ |
| @@ -3774,23 +6030,18 @@ bail: | |||
| 3774 | } | 6030 | } |
| 3775 | 6031 | ||
| 3776 | /* | 6032 | /* |
| 3777 | * Expects the inode to already be locked. This will figure out which | 6033 | * Expects the inode to already be locked. |
| 3778 | * inodes need to be locked and will put them on the returned truncate | ||
| 3779 | * context. | ||
| 3780 | */ | 6034 | */ |
| 3781 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, | 6035 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, |
| 3782 | struct inode *inode, | 6036 | struct inode *inode, |
| 3783 | struct buffer_head *fe_bh, | 6037 | struct buffer_head *fe_bh, |
| 3784 | struct ocfs2_truncate_context **tc) | 6038 | struct ocfs2_truncate_context **tc) |
| 3785 | { | 6039 | { |
| 3786 | int status, metadata_delete, i; | 6040 | int status; |
| 3787 | unsigned int new_i_clusters; | 6041 | unsigned int new_i_clusters; |
| 3788 | struct ocfs2_dinode *fe; | 6042 | struct ocfs2_dinode *fe; |
| 3789 | struct ocfs2_extent_block *eb; | 6043 | struct ocfs2_extent_block *eb; |
| 3790 | struct ocfs2_extent_list *el; | ||
| 3791 | struct buffer_head *last_eb_bh = NULL; | 6044 | struct buffer_head *last_eb_bh = NULL; |
| 3792 | struct inode *ext_alloc_inode = NULL; | ||
| 3793 | struct buffer_head *ext_alloc_bh = NULL; | ||
| 3794 | 6045 | ||
| 3795 | mlog_entry_void(); | 6046 | mlog_entry_void(); |
| 3796 | 6047 | ||
| @@ -3810,12 +6061,9 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
| 3810 | mlog_errno(status); | 6061 | mlog_errno(status); |
| 3811 | goto bail; | 6062 | goto bail; |
| 3812 | } | 6063 | } |
| 6064 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | ||
| 3813 | 6065 | ||
| 3814 | metadata_delete = 0; | ||
| 3815 | if (fe->id2.i_list.l_tree_depth) { | 6066 | if (fe->id2.i_list.l_tree_depth) { |
| 3816 | /* If we have a tree, then the truncate may result in | ||
| 3817 | * metadata deletes. Figure this out from the | ||
| 3818 | * rightmost leaf block.*/ | ||
| 3819 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 6067 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), |
| 3820 | &last_eb_bh, OCFS2_BH_CACHED, inode); | 6068 | &last_eb_bh, OCFS2_BH_CACHED, inode); |
| 3821 | if (status < 0) { | 6069 | if (status < 0) { |
| @@ -3830,43 +6078,10 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
| 3830 | status = -EIO; | 6078 | status = -EIO; |
| 3831 | goto bail; | 6079 | goto bail; |
| 3832 | } | 6080 | } |
| 3833 | el = &(eb->h_list); | ||
| 3834 | |||
| 3835 | i = 0; | ||
| 3836 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 3837 | i = 1; | ||
| 3838 | /* | ||
| 3839 | * XXX: Should we check that next_free_rec contains | ||
| 3840 | * the extent? | ||
| 3841 | */ | ||
| 3842 | if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) | ||
| 3843 | metadata_delete = 1; | ||
| 3844 | } | 6081 | } |
| 3845 | 6082 | ||
| 3846 | (*tc)->tc_last_eb_bh = last_eb_bh; | 6083 | (*tc)->tc_last_eb_bh = last_eb_bh; |
| 3847 | 6084 | ||
| 3848 | if (metadata_delete) { | ||
| 3849 | mlog(0, "Will have to delete metadata for this trunc. " | ||
| 3850 | "locking allocator.\n"); | ||
| 3851 | ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); | ||
| 3852 | if (!ext_alloc_inode) { | ||
| 3853 | status = -ENOMEM; | ||
| 3854 | mlog_errno(status); | ||
| 3855 | goto bail; | ||
| 3856 | } | ||
| 3857 | |||
| 3858 | mutex_lock(&ext_alloc_inode->i_mutex); | ||
| 3859 | (*tc)->tc_ext_alloc_inode = ext_alloc_inode; | ||
| 3860 | |||
| 3861 | status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); | ||
| 3862 | if (status < 0) { | ||
| 3863 | mlog_errno(status); | ||
| 3864 | goto bail; | ||
| 3865 | } | ||
| 3866 | (*tc)->tc_ext_alloc_bh = ext_alloc_bh; | ||
| 3867 | (*tc)->tc_ext_alloc_locked = 1; | ||
| 3868 | } | ||
| 3869 | |||
| 3870 | status = 0; | 6085 | status = 0; |
| 3871 | bail: | 6086 | bail: |
| 3872 | if (status < 0) { | 6087 | if (status < 0) { |
| @@ -3880,16 +6095,13 @@ bail: | |||
| 3880 | 6095 | ||
| 3881 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) | 6096 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) |
| 3882 | { | 6097 | { |
| 3883 | if (tc->tc_ext_alloc_inode) { | 6098 | /* |
| 3884 | if (tc->tc_ext_alloc_locked) | 6099 | * The caller is responsible for completing deallocation |
| 3885 | ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); | 6100 | * before freeing the context. |
| 3886 | 6101 | */ | |
| 3887 | mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); | 6102 | if (tc->tc_dealloc.c_first_suballocator != NULL) |
| 3888 | iput(tc->tc_ext_alloc_inode); | 6103 | mlog(ML_NOTICE, |
| 3889 | } | 6104 | "Truncate completion has non-empty dealloc context\n"); |
| 3890 | |||
| 3891 | if (tc->tc_ext_alloc_bh) | ||
| 3892 | brelse(tc->tc_ext_alloc_bh); | ||
| 3893 | 6105 | ||
| 3894 | if (tc->tc_last_eb_bh) | 6106 | if (tc->tc_last_eb_bh) |
| 3895 | brelse(tc->tc_last_eb_bh); | 6107 | brelse(tc->tc_last_eb_bh); |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index fbcb5934a081..990df48ae8d3 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
| @@ -34,7 +34,17 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 34 | u32 cpos, | 34 | u32 cpos, |
| 35 | u64 start_blk, | 35 | u64 start_blk, |
| 36 | u32 new_clusters, | 36 | u32 new_clusters, |
| 37 | u8 flags, | ||
| 37 | struct ocfs2_alloc_context *meta_ac); | 38 | struct ocfs2_alloc_context *meta_ac); |
| 39 | struct ocfs2_cached_dealloc_ctxt; | ||
| 40 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | ||
| 41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | ||
| 42 | struct ocfs2_alloc_context *meta_ac, | ||
| 43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
| 44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
| 45 | u32 cpos, u32 len, handle_t *handle, | ||
| 46 | struct ocfs2_alloc_context *meta_ac, | ||
| 47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
| 38 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
| 39 | struct inode *inode, | 49 | struct inode *inode, |
| 40 | struct ocfs2_dinode *fe); | 50 | struct ocfs2_dinode *fe); |
| @@ -62,17 +72,41 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
| 62 | struct ocfs2_dinode **tl_copy); | 72 | struct ocfs2_dinode **tl_copy); |
| 63 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | 73 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, |
| 64 | struct ocfs2_dinode *tl_copy); | 74 | struct ocfs2_dinode *tl_copy); |
| 75 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); | ||
| 76 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||
| 77 | handle_t *handle, | ||
| 78 | u64 start_blk, | ||
| 79 | unsigned int num_clusters); | ||
| 80 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Process local structure which describes the block unlinks done | ||
| 84 | * during an operation. This is populated via | ||
| 85 | * ocfs2_cache_block_dealloc(). | ||
| 86 | * | ||
| 87 | * ocfs2_run_deallocs() should be called after the potentially | ||
| 88 | * de-allocating routines. No journal handles should be open, and most | ||
| 89 | * locks should have been dropped. | ||
| 90 | */ | ||
| 91 | struct ocfs2_cached_dealloc_ctxt { | ||
| 92 | struct ocfs2_per_slot_free_list *c_first_suballocator; | ||
| 93 | }; | ||
| 94 | static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) | ||
| 95 | { | ||
| 96 | c->c_first_suballocator = NULL; | ||
| 97 | } | ||
| 98 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
| 99 | struct ocfs2_cached_dealloc_ctxt *ctxt); | ||
| 65 | 100 | ||
| 66 | struct ocfs2_truncate_context { | 101 | struct ocfs2_truncate_context { |
| 67 | struct inode *tc_ext_alloc_inode; | 102 | struct ocfs2_cached_dealloc_ctxt tc_dealloc; |
| 68 | struct buffer_head *tc_ext_alloc_bh; | ||
| 69 | int tc_ext_alloc_locked; /* is it cluster locked? */ | 103 | int tc_ext_alloc_locked; /* is it cluster locked? */ |
| 70 | /* these get destroyed once it's passed to ocfs2_commit_truncate. */ | 104 | /* these get destroyed once it's passed to ocfs2_commit_truncate. */ |
| 71 | struct buffer_head *tc_last_eb_bh; | 105 | struct buffer_head *tc_last_eb_bh; |
| 72 | }; | 106 | }; |
| 73 | 107 | ||
| 74 | int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | 108 | int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, |
| 75 | u64 new_i_size); | 109 | u64 range_start, u64 range_end); |
| 76 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, | 110 | int ocfs2_prepare_truncate(struct ocfs2_super *osb, |
| 77 | struct inode *inode, | 111 | struct inode *inode, |
| 78 | struct buffer_head *fe_bh, | 112 | struct buffer_head *fe_bh, |
| @@ -84,6 +118,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
| 84 | 118 | ||
| 85 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, | 119 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, |
| 86 | u32 cpos, struct buffer_head **leaf_bh); | 120 | u32 cpos, struct buffer_head **leaf_bh); |
| 121 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); | ||
| 87 | 122 | ||
| 88 | /* | 123 | /* |
| 89 | * Helper function to look at the # of clusters in an extent record. | 124 | * Helper function to look at the # of clusters in an extent record. |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index a480b09c79b9..84bf6e79de23 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -684,6 +684,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
| 684 | bh = bh->b_this_page, block_start += bsize) { | 684 | bh = bh->b_this_page, block_start += bsize) { |
| 685 | block_end = block_start + bsize; | 685 | block_end = block_start + bsize; |
| 686 | 686 | ||
| 687 | clear_buffer_new(bh); | ||
| 688 | |||
| 687 | /* | 689 | /* |
| 688 | * Ignore blocks outside of our i/o range - | 690 | * Ignore blocks outside of our i/o range - |
| 689 | * they may belong to unallocated clusters. | 691 | * they may belong to unallocated clusters. |
| @@ -698,9 +700,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
| 698 | * For an allocating write with cluster size >= page | 700 | * For an allocating write with cluster size >= page |
| 699 | * size, we always write the entire page. | 701 | * size, we always write the entire page. |
| 700 | */ | 702 | */ |
| 701 | 703 | if (new) | |
| 702 | if (buffer_new(bh)) | 704 | set_buffer_new(bh); |
| 703 | clear_buffer_new(bh); | ||
| 704 | 705 | ||
| 705 | if (!buffer_mapped(bh)) { | 706 | if (!buffer_mapped(bh)) { |
| 706 | map_bh(bh, inode->i_sb, *p_blkno); | 707 | map_bh(bh, inode->i_sb, *p_blkno); |
| @@ -711,7 +712,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
| 711 | if (!buffer_uptodate(bh)) | 712 | if (!buffer_uptodate(bh)) |
| 712 | set_buffer_uptodate(bh); | 713 | set_buffer_uptodate(bh); |
| 713 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && | 714 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && |
| 714 | (block_start < from || block_end > to)) { | 715 | !buffer_new(bh) && |
| 716 | (block_start < from || block_end > to)) { | ||
| 715 | ll_rw_block(READ, 1, &bh); | 717 | ll_rw_block(READ, 1, &bh); |
| 716 | *wait_bh++=bh; | 718 | *wait_bh++=bh; |
| 717 | } | 719 | } |
| @@ -738,18 +740,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
| 738 | bh = head; | 740 | bh = head; |
| 739 | block_start = 0; | 741 | block_start = 0; |
| 740 | do { | 742 | do { |
| 741 | void *kaddr; | ||
| 742 | |||
| 743 | block_end = block_start + bsize; | 743 | block_end = block_start + bsize; |
| 744 | if (block_end <= from) | 744 | if (block_end <= from) |
| 745 | goto next_bh; | 745 | goto next_bh; |
| 746 | if (block_start >= to) | 746 | if (block_start >= to) |
| 747 | break; | 747 | break; |
| 748 | 748 | ||
| 749 | kaddr = kmap_atomic(page, KM_USER0); | 749 | zero_user_page(page, block_start, bh->b_size, KM_USER0); |
| 750 | memset(kaddr+block_start, 0, bh->b_size); | ||
| 751 | flush_dcache_page(page); | ||
| 752 | kunmap_atomic(kaddr, KM_USER0); | ||
| 753 | set_buffer_uptodate(bh); | 750 | set_buffer_uptodate(bh); |
| 754 | mark_buffer_dirty(bh); | 751 | mark_buffer_dirty(bh); |
| 755 | 752 | ||
| @@ -761,217 +758,240 @@ next_bh: | |||
| 761 | return ret; | 758 | return ret; |
| 762 | } | 759 | } |
| 763 | 760 | ||
| 761 | #if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE) | ||
| 762 | #define OCFS2_MAX_CTXT_PAGES 1 | ||
| 763 | #else | ||
| 764 | #define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE) | ||
| 765 | #endif | ||
| 766 | |||
| 767 | #define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) | ||
| 768 | |||
| 764 | /* | 769 | /* |
| 765 | * This will copy user data from the buffer page in the splice | 770 | * Describe the state of a single cluster to be written to. |
| 766 | * context. | ||
| 767 | * | ||
| 768 | * For now, we ignore SPLICE_F_MOVE as that would require some extra | ||
| 769 | * communication out all the way to ocfs2_write(). | ||
| 770 | */ | 771 | */ |
| 771 | int ocfs2_map_and_write_splice_data(struct inode *inode, | 772 | struct ocfs2_write_cluster_desc { |
| 772 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | 773 | u32 c_cpos; |
| 773 | unsigned int *ret_from, unsigned int *ret_to) | 774 | u32 c_phys; |
| 775 | /* | ||
| 776 | * Give this a unique field because c_phys eventually gets | ||
| 777 | * filled. | ||
| 778 | */ | ||
| 779 | unsigned c_new; | ||
| 780 | unsigned c_unwritten; | ||
| 781 | }; | ||
| 782 | |||
| 783 | static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) | ||
| 774 | { | 784 | { |
| 775 | int ret; | 785 | return d->c_new || d->c_unwritten; |
| 776 | unsigned int to, from, cluster_start, cluster_end; | 786 | } |
| 777 | char *src, *dst; | ||
| 778 | struct ocfs2_splice_write_priv *sp = wc->w_private; | ||
| 779 | struct pipe_buffer *buf = sp->s_buf; | ||
| 780 | unsigned long bytes, src_from; | ||
| 781 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 782 | 787 | ||
| 783 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 788 | struct ocfs2_write_ctxt { |
| 784 | &cluster_end); | 789 | /* Logical cluster position / len of write */ |
| 790 | u32 w_cpos; | ||
| 791 | u32 w_clen; | ||
| 785 | 792 | ||
| 786 | from = sp->s_offset; | 793 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; |
| 787 | src_from = sp->s_buf_offset; | ||
| 788 | bytes = wc->w_count; | ||
| 789 | 794 | ||
| 790 | if (wc->w_large_pages) { | 795 | /* |
| 791 | /* | 796 | * This is true if page_size > cluster_size. |
| 792 | * For cluster size < page size, we have to | 797 | * |
| 793 | * calculate pos within the cluster and obey | 798 | * It triggers a set of special cases during write which might |
| 794 | * the rightmost boundary. | 799 | * have to deal with allocating writes to partial pages. |
| 795 | */ | 800 | */ |
| 796 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | 801 | unsigned int w_large_pages; |
| 797 | - (wc->w_pos & (osb->s_clustersize - 1)))); | 802 | |
| 798 | } | 803 | /* |
| 799 | to = from + bytes; | 804 | * Pages involved in this write. |
| 805 | * | ||
| 806 | * w_target_page is the page being written to by the user. | ||
| 807 | * | ||
| 808 | * w_pages is an array of pages which always contains | ||
| 809 | * w_target_page, and in the case of an allocating write with | ||
| 810 | * page_size < cluster size, it will contain zero'd and mapped | ||
| 811 | * pages adjacent to w_target_page which need to be written | ||
| 812 | * out in so that future reads from that region will get | ||
| 813 | * zero's. | ||
| 814 | */ | ||
| 815 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
| 816 | unsigned int w_num_pages; | ||
| 817 | struct page *w_target_page; | ||
| 800 | 818 | ||
| 801 | BUG_ON(from > PAGE_CACHE_SIZE); | 819 | /* |
| 802 | BUG_ON(to > PAGE_CACHE_SIZE); | 820 | * ocfs2_write_end() uses this to know what the real range to |
| 803 | BUG_ON(from < cluster_start); | 821 | * write in the target should be. |
| 804 | BUG_ON(to > cluster_end); | 822 | */ |
| 823 | unsigned int w_target_from; | ||
| 824 | unsigned int w_target_to; | ||
| 805 | 825 | ||
| 806 | if (wc->w_this_page_new) | 826 | /* |
| 807 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 827 | * We could use journal_current_handle() but this is cleaner, |
| 808 | cluster_start, cluster_end, 1); | 828 | * IMHO -Mark |
| 809 | else | 829 | */ |
| 810 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 830 | handle_t *w_handle; |
| 811 | from, to, 0); | 831 | |
| 812 | if (ret) { | 832 | struct buffer_head *w_di_bh; |
| 813 | mlog_errno(ret); | 833 | |
| 814 | goto out; | 834 | struct ocfs2_cached_dealloc_ctxt w_dealloc; |
| 835 | }; | ||
| 836 | |||
| 837 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | ||
| 838 | { | ||
| 839 | int i; | ||
| 840 | |||
| 841 | for(i = 0; i < wc->w_num_pages; i++) { | ||
| 842 | if (wc->w_pages[i] == NULL) | ||
| 843 | continue; | ||
| 844 | |||
| 845 | unlock_page(wc->w_pages[i]); | ||
| 846 | mark_page_accessed(wc->w_pages[i]); | ||
| 847 | page_cache_release(wc->w_pages[i]); | ||
| 815 | } | 848 | } |
| 816 | 849 | ||
| 817 | src = buf->ops->map(sp->s_pipe, buf, 1); | 850 | brelse(wc->w_di_bh); |
| 818 | dst = kmap_atomic(wc->w_this_page, KM_USER1); | 851 | kfree(wc); |
| 819 | memcpy(dst + from, src + src_from, bytes); | 852 | } |
| 820 | kunmap_atomic(wc->w_this_page, KM_USER1); | 853 | |
| 821 | buf->ops->unmap(sp->s_pipe, buf, src); | 854 | static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, |
| 855 | struct ocfs2_super *osb, loff_t pos, | ||
| 856 | unsigned len, struct buffer_head *di_bh) | ||
| 857 | { | ||
| 858 | struct ocfs2_write_ctxt *wc; | ||
| 859 | |||
| 860 | wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); | ||
| 861 | if (!wc) | ||
| 862 | return -ENOMEM; | ||
| 822 | 863 | ||
| 823 | wc->w_finished_copy = 1; | 864 | wc->w_cpos = pos >> osb->s_clustersize_bits; |
| 865 | wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); | ||
| 866 | get_bh(di_bh); | ||
| 867 | wc->w_di_bh = di_bh; | ||
| 824 | 868 | ||
| 825 | *ret_from = from; | 869 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) |
| 826 | *ret_to = to; | 870 | wc->w_large_pages = 1; |
| 827 | out: | 871 | else |
| 872 | wc->w_large_pages = 0; | ||
| 873 | |||
| 874 | ocfs2_init_dealloc_ctxt(&wc->w_dealloc); | ||
| 875 | |||
| 876 | *wcp = wc; | ||
| 828 | 877 | ||
| 829 | return bytes ? (unsigned int)bytes : ret; | 878 | return 0; |
| 830 | } | 879 | } |
| 831 | 880 | ||
| 832 | /* | 881 | /* |
| 833 | * This will copy user data from the iovec in the buffered write | 882 | * If a page has any new buffers, zero them out here, and mark them uptodate |
| 834 | * context. | 883 | * and dirty so they'll be written out (in order to prevent uninitialised |
| 884 | * block data from leaking). And clear the new bit. | ||
| 835 | */ | 885 | */ |
| 836 | int ocfs2_map_and_write_user_data(struct inode *inode, | 886 | static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) |
| 837 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | ||
| 838 | unsigned int *ret_from, unsigned int *ret_to) | ||
| 839 | { | 887 | { |
| 840 | int ret; | 888 | unsigned int block_start, block_end; |
| 841 | unsigned int to, from, cluster_start, cluster_end; | 889 | struct buffer_head *head, *bh; |
| 842 | unsigned long bytes, src_from; | ||
| 843 | char *dst; | ||
| 844 | struct ocfs2_buffered_write_priv *bp = wc->w_private; | ||
| 845 | const struct iovec *cur_iov = bp->b_cur_iov; | ||
| 846 | char __user *buf; | ||
| 847 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 848 | 890 | ||
| 849 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 891 | BUG_ON(!PageLocked(page)); |
| 850 | &cluster_end); | 892 | if (!page_has_buffers(page)) |
| 893 | return; | ||
| 851 | 894 | ||
| 852 | buf = cur_iov->iov_base + bp->b_cur_off; | 895 | bh = head = page_buffers(page); |
| 853 | src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; | 896 | block_start = 0; |
| 897 | do { | ||
| 898 | block_end = block_start + bh->b_size; | ||
| 854 | 899 | ||
| 855 | from = wc->w_pos & (PAGE_CACHE_SIZE - 1); | 900 | if (buffer_new(bh)) { |
| 901 | if (block_end > from && block_start < to) { | ||
| 902 | if (!PageUptodate(page)) { | ||
| 903 | unsigned start, end; | ||
| 856 | 904 | ||
| 857 | /* | 905 | start = max(from, block_start); |
| 858 | * This is a lot of comparisons, but it reads quite | 906 | end = min(to, block_end); |
| 859 | * easily, which is important here. | ||
| 860 | */ | ||
| 861 | /* Stay within the src page */ | ||
| 862 | bytes = PAGE_SIZE - src_from; | ||
| 863 | /* Stay within the vector */ | ||
| 864 | bytes = min(bytes, | ||
| 865 | (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); | ||
| 866 | /* Stay within count */ | ||
| 867 | bytes = min(bytes, (unsigned long)wc->w_count); | ||
| 868 | /* | ||
| 869 | * For clustersize > page size, just stay within | ||
| 870 | * target page, otherwise we have to calculate pos | ||
| 871 | * within the cluster and obey the rightmost | ||
| 872 | * boundary. | ||
| 873 | */ | ||
| 874 | if (wc->w_large_pages) { | ||
| 875 | /* | ||
| 876 | * For cluster size < page size, we have to | ||
| 877 | * calculate pos within the cluster and obey | ||
| 878 | * the rightmost boundary. | ||
| 879 | */ | ||
| 880 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | ||
| 881 | - (wc->w_pos & (osb->s_clustersize - 1)))); | ||
| 882 | } else { | ||
| 883 | /* | ||
| 884 | * cluster size > page size is the most common | ||
| 885 | * case - we just stay within the target page | ||
| 886 | * boundary. | ||
| 887 | */ | ||
| 888 | bytes = min(bytes, PAGE_CACHE_SIZE - from); | ||
| 889 | } | ||
| 890 | 907 | ||
| 891 | to = from + bytes; | 908 | zero_user_page(page, start, end - start, KM_USER0); |
| 909 | set_buffer_uptodate(bh); | ||
| 910 | } | ||
| 892 | 911 | ||
| 893 | BUG_ON(from > PAGE_CACHE_SIZE); | 912 | clear_buffer_new(bh); |
| 894 | BUG_ON(to > PAGE_CACHE_SIZE); | 913 | mark_buffer_dirty(bh); |
| 895 | BUG_ON(from < cluster_start); | 914 | } |
| 896 | BUG_ON(to > cluster_end); | 915 | } |
| 897 | 916 | ||
| 898 | if (wc->w_this_page_new) | 917 | block_start = block_end; |
| 899 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 918 | bh = bh->b_this_page; |
| 900 | cluster_start, cluster_end, 1); | 919 | } while (bh != head); |
| 901 | else | 920 | } |
| 902 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | ||
| 903 | from, to, 0); | ||
| 904 | if (ret) { | ||
| 905 | mlog_errno(ret); | ||
| 906 | goto out; | ||
| 907 | } | ||
| 908 | 921 | ||
| 909 | dst = kmap(wc->w_this_page); | 922 | /* |
| 910 | memcpy(dst + from, bp->b_src_buf + src_from, bytes); | 923 | * Only called when we have a failure during allocating write to write |
| 911 | kunmap(wc->w_this_page); | 924 | * zero's to the newly allocated region. |
| 925 | */ | ||
| 926 | static void ocfs2_write_failure(struct inode *inode, | ||
| 927 | struct ocfs2_write_ctxt *wc, | ||
| 928 | loff_t user_pos, unsigned user_len) | ||
| 929 | { | ||
| 930 | int i; | ||
| 931 | unsigned from, to; | ||
| 932 | struct page *tmppage; | ||
| 912 | 933 | ||
| 913 | /* | 934 | ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); |
| 914 | * XXX: This is slow, but simple. The caller of | ||
| 915 | * ocfs2_buffered_write_cluster() is responsible for | ||
| 916 | * passing through the iovecs, so it's difficult to | ||
| 917 | * predict what our next step is in here after our | ||
| 918 | * initial write. A future version should be pushing | ||
| 919 | * that iovec manipulation further down. | ||
| 920 | * | ||
| 921 | * By setting this, we indicate that a copy from user | ||
| 922 | * data was done, and subsequent calls for this | ||
| 923 | * cluster will skip copying more data. | ||
| 924 | */ | ||
| 925 | wc->w_finished_copy = 1; | ||
| 926 | 935 | ||
| 927 | *ret_from = from; | 936 | if (wc->w_large_pages) { |
| 928 | *ret_to = to; | 937 | from = wc->w_target_from; |
| 929 | out: | 938 | to = wc->w_target_to; |
| 939 | } else { | ||
| 940 | from = 0; | ||
| 941 | to = PAGE_CACHE_SIZE; | ||
| 942 | } | ||
| 943 | |||
| 944 | for(i = 0; i < wc->w_num_pages; i++) { | ||
| 945 | tmppage = wc->w_pages[i]; | ||
| 930 | 946 | ||
| 931 | return bytes ? (unsigned int)bytes : ret; | 947 | if (ocfs2_should_order_data(inode)) |
| 948 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | ||
| 949 | from, to, NULL, | ||
| 950 | ocfs2_journal_dirty_data); | ||
| 951 | |||
| 952 | block_commit_write(tmppage, from, to); | ||
| 953 | } | ||
| 932 | } | 954 | } |
| 933 | 955 | ||
| 934 | /* | 956 | static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, |
| 935 | * Map, fill and write a page to disk. | 957 | struct ocfs2_write_ctxt *wc, |
| 936 | * | 958 | struct page *page, u32 cpos, |
| 937 | * The work of copying data is done via callback. Newly allocated | 959 | loff_t user_pos, unsigned user_len, |
| 938 | * pages which don't take user data will be zero'd (set 'new' to | 960 | int new) |
| 939 | * indicate an allocating write) | ||
| 940 | * | ||
| 941 | * Returns a negative error code or the number of bytes copied into | ||
| 942 | * the page. | ||
| 943 | */ | ||
| 944 | static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | ||
| 945 | u64 *p_blkno, struct page *page, | ||
| 946 | struct ocfs2_write_ctxt *wc, int new) | ||
| 947 | { | 961 | { |
| 948 | int ret, copied = 0; | 962 | int ret; |
| 949 | unsigned int from = 0, to = 0; | 963 | unsigned int map_from = 0, map_to = 0; |
| 950 | unsigned int cluster_start, cluster_end; | 964 | unsigned int cluster_start, cluster_end; |
| 951 | unsigned int zero_from = 0, zero_to = 0; | 965 | unsigned int user_data_from = 0, user_data_to = 0; |
| 952 | 966 | ||
| 953 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, | 967 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, |
| 954 | &cluster_start, &cluster_end); | 968 | &cluster_start, &cluster_end); |
| 955 | 969 | ||
| 956 | if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index | 970 | if (page == wc->w_target_page) { |
| 957 | && !wc->w_finished_copy) { | 971 | map_from = user_pos & (PAGE_CACHE_SIZE - 1); |
| 958 | 972 | map_to = map_from + user_len; | |
| 959 | wc->w_this_page = page; | 973 | |
| 960 | wc->w_this_page_new = new; | 974 | if (new) |
| 961 | ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); | 975 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, |
| 962 | if (ret < 0) { | 976 | cluster_start, cluster_end, |
| 977 | new); | ||
| 978 | else | ||
| 979 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, | ||
| 980 | map_from, map_to, new); | ||
| 981 | if (ret) { | ||
| 963 | mlog_errno(ret); | 982 | mlog_errno(ret); |
| 964 | goto out; | 983 | goto out; |
| 965 | } | 984 | } |
| 966 | 985 | ||
| 967 | copied = ret; | 986 | user_data_from = map_from; |
| 968 | 987 | user_data_to = map_to; | |
| 969 | zero_from = from; | ||
| 970 | zero_to = to; | ||
| 971 | if (new) { | 988 | if (new) { |
| 972 | from = cluster_start; | 989 | map_from = cluster_start; |
| 973 | to = cluster_end; | 990 | map_to = cluster_end; |
| 974 | } | 991 | } |
| 992 | |||
| 993 | wc->w_target_from = map_from; | ||
| 994 | wc->w_target_to = map_to; | ||
| 975 | } else { | 995 | } else { |
| 976 | /* | 996 | /* |
| 977 | * If we haven't allocated the new page yet, we | 997 | * If we haven't allocated the new page yet, we |
| @@ -980,11 +1000,11 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | |||
| 980 | */ | 1000 | */ |
| 981 | BUG_ON(!new); | 1001 | BUG_ON(!new); |
| 982 | 1002 | ||
| 983 | from = cluster_start; | 1003 | map_from = cluster_start; |
| 984 | to = cluster_end; | 1004 | map_to = cluster_end; |
| 985 | 1005 | ||
| 986 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, | 1006 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, |
| 987 | cluster_start, cluster_end, 1); | 1007 | cluster_start, cluster_end, new); |
| 988 | if (ret) { | 1008 | if (ret) { |
| 989 | mlog_errno(ret); | 1009 | mlog_errno(ret); |
| 990 | goto out; | 1010 | goto out; |
| @@ -1003,108 +1023,113 @@ static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | |||
| 1003 | */ | 1023 | */ |
| 1004 | if (new && !PageUptodate(page)) | 1024 | if (new && !PageUptodate(page)) |
| 1005 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), | 1025 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), |
| 1006 | wc->w_cpos, zero_from, zero_to); | 1026 | cpos, user_data_from, user_data_to); |
| 1007 | 1027 | ||
| 1008 | flush_dcache_page(page); | 1028 | flush_dcache_page(page); |
| 1009 | 1029 | ||
| 1010 | if (ocfs2_should_order_data(inode)) { | ||
| 1011 | ret = walk_page_buffers(handle, | ||
| 1012 | page_buffers(page), | ||
| 1013 | from, to, NULL, | ||
| 1014 | ocfs2_journal_dirty_data); | ||
| 1015 | if (ret < 0) | ||
| 1016 | mlog_errno(ret); | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | /* | ||
| 1020 | * We don't use generic_commit_write() because we need to | ||
| 1021 | * handle our own i_size update. | ||
| 1022 | */ | ||
| 1023 | ret = block_commit_write(page, from, to); | ||
| 1024 | if (ret) | ||
| 1025 | mlog_errno(ret); | ||
| 1026 | out: | 1030 | out: |
| 1027 | 1031 | return ret; | |
| 1028 | return copied ? copied : ret; | ||
| 1029 | } | 1032 | } |
| 1030 | 1033 | ||
| 1031 | /* | 1034 | /* |
| 1032 | * Do the actual write of some data into an inode. Optionally allocate | 1035 | * This function will only grab one clusters worth of pages. |
| 1033 | * in order to fulfill the write. | ||
| 1034 | * | ||
| 1035 | * cpos is the logical cluster offset within the file to write at | ||
| 1036 | * | ||
| 1037 | * 'phys' is the physical mapping of that offset. a 'phys' value of | ||
| 1038 | * zero indicates that allocation is required. In this case, data_ac | ||
| 1039 | * and meta_ac should be valid (meta_ac can be null if metadata | ||
| 1040 | * allocation isn't required). | ||
| 1041 | */ | 1036 | */ |
| 1042 | static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, | 1037 | static int ocfs2_grab_pages_for_write(struct address_space *mapping, |
| 1043 | struct buffer_head *di_bh, | 1038 | struct ocfs2_write_ctxt *wc, |
| 1044 | struct ocfs2_alloc_context *data_ac, | 1039 | u32 cpos, loff_t user_pos, int new, |
| 1045 | struct ocfs2_alloc_context *meta_ac, | 1040 | struct page *mmap_page) |
| 1046 | struct ocfs2_write_ctxt *wc) | ||
| 1047 | { | 1041 | { |
| 1048 | int ret, i, numpages = 1, new; | 1042 | int ret = 0, i; |
| 1049 | unsigned int copied = 0; | 1043 | unsigned long start, target_index, index; |
| 1050 | u32 tmp_pos; | ||
| 1051 | u64 v_blkno, p_blkno; | ||
| 1052 | struct address_space *mapping = file->f_mapping; | ||
| 1053 | struct inode *inode = mapping->host; | 1044 | struct inode *inode = mapping->host; |
| 1054 | unsigned long index, start; | ||
| 1055 | struct page **cpages; | ||
| 1056 | 1045 | ||
| 1057 | new = phys == 0 ? 1 : 0; | 1046 | target_index = user_pos >> PAGE_CACHE_SHIFT; |
| 1058 | 1047 | ||
| 1059 | /* | 1048 | /* |
| 1060 | * Figure out how many pages we'll be manipulating here. For | 1049 | * Figure out how many pages we'll be manipulating here. For |
| 1061 | * non allocating write, we just change the one | 1050 | * non allocating write, we just change the one |
| 1062 | * page. Otherwise, we'll need a whole clusters worth. | 1051 | * page. Otherwise, we'll need a whole clusters worth. |
| 1063 | */ | 1052 | */ |
| 1064 | if (new) | ||
| 1065 | numpages = ocfs2_pages_per_cluster(inode->i_sb); | ||
| 1066 | |||
| 1067 | cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); | ||
| 1068 | if (!cpages) { | ||
| 1069 | ret = -ENOMEM; | ||
| 1070 | mlog_errno(ret); | ||
| 1071 | return ret; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /* | ||
| 1075 | * Fill our page array first. That way we've grabbed enough so | ||
| 1076 | * that we can zero and flush if we error after adding the | ||
| 1077 | * extent. | ||
| 1078 | */ | ||
| 1079 | if (new) { | 1053 | if (new) { |
| 1080 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, | 1054 | wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); |
| 1081 | wc->w_cpos); | 1055 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); |
| 1082 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); | ||
| 1083 | } else { | 1056 | } else { |
| 1084 | start = wc->w_pos >> PAGE_CACHE_SHIFT; | 1057 | wc->w_num_pages = 1; |
| 1085 | v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; | 1058 | start = target_index; |
| 1086 | } | 1059 | } |
| 1087 | 1060 | ||
| 1088 | for(i = 0; i < numpages; i++) { | 1061 | for(i = 0; i < wc->w_num_pages; i++) { |
| 1089 | index = start + i; | 1062 | index = start + i; |
| 1090 | 1063 | ||
| 1091 | cpages[i] = find_or_create_page(mapping, index, GFP_NOFS); | 1064 | if (index == target_index && mmap_page) { |
| 1092 | if (!cpages[i]) { | 1065 | /* |
| 1093 | ret = -ENOMEM; | 1066 | * ocfs2_pagemkwrite() is a little different |
| 1094 | mlog_errno(ret); | 1067 | * and wants us to directly use the page |
| 1095 | goto out; | 1068 | * passed in. |
| 1069 | */ | ||
| 1070 | lock_page(mmap_page); | ||
| 1071 | |||
| 1072 | if (mmap_page->mapping != mapping) { | ||
| 1073 | unlock_page(mmap_page); | ||
| 1074 | /* | ||
| 1075 | * Sanity check - the locking in | ||
| 1076 | * ocfs2_pagemkwrite() should ensure | ||
| 1077 | * that this code doesn't trigger. | ||
| 1078 | */ | ||
| 1079 | ret = -EINVAL; | ||
| 1080 | mlog_errno(ret); | ||
| 1081 | goto out; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | page_cache_get(mmap_page); | ||
| 1085 | wc->w_pages[i] = mmap_page; | ||
| 1086 | } else { | ||
| 1087 | wc->w_pages[i] = find_or_create_page(mapping, index, | ||
| 1088 | GFP_NOFS); | ||
| 1089 | if (!wc->w_pages[i]) { | ||
| 1090 | ret = -ENOMEM; | ||
| 1091 | mlog_errno(ret); | ||
| 1092 | goto out; | ||
| 1093 | } | ||
| 1096 | } | 1094 | } |
| 1095 | |||
| 1096 | if (index == target_index) | ||
| 1097 | wc->w_target_page = wc->w_pages[i]; | ||
| 1097 | } | 1098 | } |
| 1099 | out: | ||
| 1100 | return ret; | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | /* | ||
| 1104 | * Prepare a single cluster for write one cluster into the file. | ||
| 1105 | */ | ||
| 1106 | static int ocfs2_write_cluster(struct address_space *mapping, | ||
| 1107 | u32 phys, unsigned int unwritten, | ||
| 1108 | struct ocfs2_alloc_context *data_ac, | ||
| 1109 | struct ocfs2_alloc_context *meta_ac, | ||
| 1110 | struct ocfs2_write_ctxt *wc, u32 cpos, | ||
| 1111 | loff_t user_pos, unsigned user_len) | ||
| 1112 | { | ||
| 1113 | int ret, i, new, should_zero = 0; | ||
| 1114 | u64 v_blkno, p_blkno; | ||
| 1115 | struct inode *inode = mapping->host; | ||
| 1116 | |||
| 1117 | new = phys == 0 ? 1 : 0; | ||
| 1118 | if (new || unwritten) | ||
| 1119 | should_zero = 1; | ||
| 1098 | 1120 | ||
| 1099 | if (new) { | 1121 | if (new) { |
| 1122 | u32 tmp_pos; | ||
| 1123 | |||
| 1100 | /* | 1124 | /* |
| 1101 | * This is safe to call with the page locks - it won't take | 1125 | * This is safe to call with the page locks - it won't take |
| 1102 | * any additional semaphores or cluster locks. | 1126 | * any additional semaphores or cluster locks. |
| 1103 | */ | 1127 | */ |
| 1104 | tmp_pos = wc->w_cpos; | 1128 | tmp_pos = cpos; |
| 1105 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1129 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, |
| 1106 | &tmp_pos, 1, di_bh, handle, | 1130 | &tmp_pos, 1, 0, wc->w_di_bh, |
| 1107 | data_ac, meta_ac, NULL); | 1131 | wc->w_handle, data_ac, |
| 1132 | meta_ac, NULL); | ||
| 1108 | /* | 1133 | /* |
| 1109 | * This shouldn't happen because we must have already | 1134 | * This shouldn't happen because we must have already |
| 1110 | * calculated the correct meta data allocation required. The | 1135 | * calculated the correct meta data allocation required. The |
| @@ -1121,159 +1146,433 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, | |||
| 1121 | mlog_errno(ret); | 1146 | mlog_errno(ret); |
| 1122 | goto out; | 1147 | goto out; |
| 1123 | } | 1148 | } |
| 1149 | } else if (unwritten) { | ||
| 1150 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | ||
| 1151 | wc->w_handle, cpos, 1, phys, | ||
| 1152 | meta_ac, &wc->w_dealloc); | ||
| 1153 | if (ret < 0) { | ||
| 1154 | mlog_errno(ret); | ||
| 1155 | goto out; | ||
| 1156 | } | ||
| 1124 | } | 1157 | } |
| 1125 | 1158 | ||
| 1159 | if (should_zero) | ||
| 1160 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); | ||
| 1161 | else | ||
| 1162 | v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; | ||
| 1163 | |||
| 1164 | /* | ||
| 1165 | * The only reason this should fail is due to an inability to | ||
| 1166 | * find the extent added. | ||
| 1167 | */ | ||
| 1126 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, | 1168 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, |
| 1127 | NULL); | 1169 | NULL); |
| 1128 | if (ret < 0) { | 1170 | if (ret < 0) { |
| 1129 | 1171 | ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, " | |
| 1130 | /* | 1172 | "at logical block %llu", |
| 1131 | * XXX: Should we go readonly here? | 1173 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 1132 | */ | 1174 | (unsigned long long)v_blkno); |
| 1133 | |||
| 1134 | mlog_errno(ret); | ||
| 1135 | goto out; | 1175 | goto out; |
| 1136 | } | 1176 | } |
| 1137 | 1177 | ||
| 1138 | BUG_ON(p_blkno == 0); | 1178 | BUG_ON(p_blkno == 0); |
| 1139 | 1179 | ||
| 1140 | for(i = 0; i < numpages; i++) { | 1180 | for(i = 0; i < wc->w_num_pages; i++) { |
| 1141 | ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], | 1181 | int tmpret; |
| 1142 | wc, new); | 1182 | |
| 1143 | if (ret < 0) { | 1183 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, |
| 1144 | mlog_errno(ret); | 1184 | wc->w_pages[i], cpos, |
| 1145 | goto out; | 1185 | user_pos, user_len, |
| 1186 | should_zero); | ||
| 1187 | if (tmpret) { | ||
| 1188 | mlog_errno(tmpret); | ||
| 1189 | if (ret == 0) | ||
| 1190 | tmpret = ret; | ||
| 1146 | } | 1191 | } |
| 1147 | |||
| 1148 | copied += ret; | ||
| 1149 | } | 1192 | } |
| 1150 | 1193 | ||
| 1194 | /* | ||
| 1195 | * We only have cleanup to do in case of allocating write. | ||
| 1196 | */ | ||
| 1197 | if (ret && new) | ||
| 1198 | ocfs2_write_failure(inode, wc, user_pos, user_len); | ||
| 1199 | |||
| 1151 | out: | 1200 | out: |
| 1152 | for(i = 0; i < numpages; i++) { | 1201 | |
| 1153 | unlock_page(cpages[i]); | 1202 | return ret; |
| 1154 | mark_page_accessed(cpages[i]); | 1203 | } |
| 1155 | page_cache_release(cpages[i]); | 1204 | |
| 1205 | static int ocfs2_write_cluster_by_desc(struct address_space *mapping, | ||
| 1206 | struct ocfs2_alloc_context *data_ac, | ||
| 1207 | struct ocfs2_alloc_context *meta_ac, | ||
| 1208 | struct ocfs2_write_ctxt *wc, | ||
| 1209 | loff_t pos, unsigned len) | ||
| 1210 | { | ||
| 1211 | int ret, i; | ||
| 1212 | struct ocfs2_write_cluster_desc *desc; | ||
| 1213 | |||
| 1214 | for (i = 0; i < wc->w_clen; i++) { | ||
| 1215 | desc = &wc->w_desc[i]; | ||
| 1216 | |||
| 1217 | ret = ocfs2_write_cluster(mapping, desc->c_phys, | ||
| 1218 | desc->c_unwritten, data_ac, meta_ac, | ||
| 1219 | wc, desc->c_cpos, pos, len); | ||
| 1220 | if (ret) { | ||
| 1221 | mlog_errno(ret); | ||
| 1222 | goto out; | ||
| 1223 | } | ||
| 1156 | } | 1224 | } |
| 1157 | kfree(cpages); | ||
| 1158 | 1225 | ||
| 1159 | return copied ? copied : ret; | 1226 | ret = 0; |
| 1227 | out: | ||
| 1228 | return ret; | ||
| 1160 | } | 1229 | } |
| 1161 | 1230 | ||
| 1162 | static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, | 1231 | /* |
| 1163 | struct ocfs2_super *osb, loff_t pos, | 1232 | * ocfs2_write_end() wants to know which parts of the target page it |
| 1164 | size_t count, ocfs2_page_writer *cb, | 1233 | * should complete the write on. It's easiest to compute them ahead of |
| 1165 | void *cb_priv) | 1234 | * time when a more complete view of the write is available. |
| 1235 | */ | ||
| 1236 | static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | ||
| 1237 | struct ocfs2_write_ctxt *wc, | ||
| 1238 | loff_t pos, unsigned len, int alloc) | ||
| 1166 | { | 1239 | { |
| 1167 | wc->w_count = count; | 1240 | struct ocfs2_write_cluster_desc *desc; |
| 1168 | wc->w_pos = pos; | ||
| 1169 | wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; | ||
| 1170 | wc->w_finished_copy = 0; | ||
| 1171 | 1241 | ||
| 1172 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) | 1242 | wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1); |
| 1173 | wc->w_large_pages = 1; | 1243 | wc->w_target_to = wc->w_target_from + len; |
| 1174 | else | ||
| 1175 | wc->w_large_pages = 0; | ||
| 1176 | 1244 | ||
| 1177 | wc->w_write_data_page = cb; | 1245 | if (alloc == 0) |
| 1178 | wc->w_private = cb_priv; | 1246 | return; |
| 1247 | |||
| 1248 | /* | ||
| 1249 | * Allocating write - we may have different boundaries based | ||
| 1250 | * on page size and cluster size. | ||
| 1251 | * | ||
| 1252 | * NOTE: We can no longer compute one value from the other as | ||
| 1253 | * the actual write length and user provided length may be | ||
| 1254 | * different. | ||
| 1255 | */ | ||
| 1256 | |||
| 1257 | if (wc->w_large_pages) { | ||
| 1258 | /* | ||
| 1259 | * We only care about the 1st and last cluster within | ||
| 1260 | * our range and whether they should be zero'd or not. Either | ||
| 1261 | * value may be extended out to the start/end of a | ||
| 1262 | * newly allocated cluster. | ||
| 1263 | */ | ||
| 1264 | desc = &wc->w_desc[0]; | ||
| 1265 | if (ocfs2_should_zero_cluster(desc)) | ||
| 1266 | ocfs2_figure_cluster_boundaries(osb, | ||
| 1267 | desc->c_cpos, | ||
| 1268 | &wc->w_target_from, | ||
| 1269 | NULL); | ||
| 1270 | |||
| 1271 | desc = &wc->w_desc[wc->w_clen - 1]; | ||
| 1272 | if (ocfs2_should_zero_cluster(desc)) | ||
| 1273 | ocfs2_figure_cluster_boundaries(osb, | ||
| 1274 | desc->c_cpos, | ||
| 1275 | NULL, | ||
| 1276 | &wc->w_target_to); | ||
| 1277 | } else { | ||
| 1278 | wc->w_target_from = 0; | ||
| 1279 | wc->w_target_to = PAGE_CACHE_SIZE; | ||
| 1280 | } | ||
| 1179 | } | 1281 | } |
| 1180 | 1282 | ||
| 1181 | /* | 1283 | /* |
| 1182 | * Write a cluster to an inode. The cluster may not be allocated yet, | 1284 | * Populate each single-cluster write descriptor in the write context |
| 1183 | * in which case it will be. This only exists for buffered writes - | 1285 | * with information about the i/o to be done. |
| 1184 | * O_DIRECT takes a more "traditional" path through the kernel. | ||
| 1185 | * | ||
| 1186 | * The caller is responsible for incrementing pos, written counts, etc | ||
| 1187 | * | 1286 | * |
| 1188 | * For file systems that don't support sparse files, pre-allocation | 1287 | * Returns the number of clusters that will have to be allocated, as |
| 1189 | * and page zeroing up until cpos should be done prior to this | 1288 | * well as a worst case estimate of the number of extent records that |
| 1190 | * function call. | 1289 | * would have to be created during a write to an unwritten region. |
| 1191 | * | ||
| 1192 | * Callers should be holding i_sem, and the rw cluster lock. | ||
| 1193 | * | ||
| 1194 | * Returns the number of user bytes written, or less than zero for | ||
| 1195 | * error. | ||
| 1196 | */ | 1290 | */ |
| 1197 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | 1291 | static int ocfs2_populate_write_desc(struct inode *inode, |
| 1198 | size_t count, ocfs2_page_writer *actor, | 1292 | struct ocfs2_write_ctxt *wc, |
| 1199 | void *priv) | 1293 | unsigned int *clusters_to_alloc, |
| 1294 | unsigned int *extents_to_split) | ||
| 1295 | { | ||
| 1296 | int ret; | ||
| 1297 | struct ocfs2_write_cluster_desc *desc; | ||
| 1298 | unsigned int num_clusters = 0; | ||
| 1299 | unsigned int ext_flags = 0; | ||
| 1300 | u32 phys = 0; | ||
| 1301 | int i; | ||
| 1302 | |||
| 1303 | *clusters_to_alloc = 0; | ||
| 1304 | *extents_to_split = 0; | ||
| 1305 | |||
| 1306 | for (i = 0; i < wc->w_clen; i++) { | ||
| 1307 | desc = &wc->w_desc[i]; | ||
| 1308 | desc->c_cpos = wc->w_cpos + i; | ||
| 1309 | |||
| 1310 | if (num_clusters == 0) { | ||
| 1311 | /* | ||
| 1312 | * Need to look up the next extent record. | ||
| 1313 | */ | ||
| 1314 | ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, | ||
| 1315 | &num_clusters, &ext_flags); | ||
| 1316 | if (ret) { | ||
| 1317 | mlog_errno(ret); | ||
| 1318 | goto out; | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | /* | ||
| 1322 | * Assume worst case - that we're writing in | ||
| 1323 | * the middle of the extent. | ||
| 1324 | * | ||
| 1325 | * We can assume that the write proceeds from | ||
| 1326 | * left to right, in which case the extent | ||
| 1327 | * insert code is smart enough to coalesce the | ||
| 1328 | * next splits into the previous records created. | ||
| 1329 | */ | ||
| 1330 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
| 1331 | *extents_to_split = *extents_to_split + 2; | ||
| 1332 | } else if (phys) { | ||
| 1333 | /* | ||
| 1334 | * Only increment phys if it doesn't describe | ||
| 1335 | * a hole. | ||
| 1336 | */ | ||
| 1337 | phys++; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | desc->c_phys = phys; | ||
| 1341 | if (phys == 0) { | ||
| 1342 | desc->c_new = 1; | ||
| 1343 | *clusters_to_alloc = *clusters_to_alloc + 1; | ||
| 1344 | } | ||
| 1345 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
| 1346 | desc->c_unwritten = 1; | ||
| 1347 | |||
| 1348 | num_clusters--; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | ret = 0; | ||
| 1352 | out: | ||
| 1353 | return ret; | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | int ocfs2_write_begin_nolock(struct address_space *mapping, | ||
| 1357 | loff_t pos, unsigned len, unsigned flags, | ||
| 1358 | struct page **pagep, void **fsdata, | ||
| 1359 | struct buffer_head *di_bh, struct page *mmap_page) | ||
| 1200 | { | 1360 | { |
| 1201 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; | 1361 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; |
| 1202 | ssize_t written = 0; | 1362 | unsigned int clusters_to_alloc, extents_to_split; |
| 1203 | u32 phys; | 1363 | struct ocfs2_write_ctxt *wc; |
| 1204 | struct inode *inode = file->f_mapping->host; | 1364 | struct inode *inode = mapping->host; |
| 1205 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1206 | struct buffer_head *di_bh = NULL; | ||
| 1207 | struct ocfs2_dinode *di; | 1366 | struct ocfs2_dinode *di; |
| 1208 | struct ocfs2_alloc_context *data_ac = NULL; | 1367 | struct ocfs2_alloc_context *data_ac = NULL; |
| 1209 | struct ocfs2_alloc_context *meta_ac = NULL; | 1368 | struct ocfs2_alloc_context *meta_ac = NULL; |
| 1210 | handle_t *handle; | 1369 | handle_t *handle; |
| 1211 | struct ocfs2_write_ctxt wc; | ||
| 1212 | |||
| 1213 | ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); | ||
| 1214 | 1370 | ||
| 1215 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1371 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); |
| 1216 | if (ret) { | 1372 | if (ret) { |
| 1217 | mlog_errno(ret); | 1373 | mlog_errno(ret); |
| 1218 | goto out; | 1374 | return ret; |
| 1219 | } | 1375 | } |
| 1220 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1221 | |||
| 1222 | /* | ||
| 1223 | * Take alloc sem here to prevent concurrent lookups. That way | ||
| 1224 | * the mapping, zeroing and tree manipulation within | ||
| 1225 | * ocfs2_write() will be safe against ->readpage(). This | ||
| 1226 | * should also serve to lock out allocation from a shared | ||
| 1227 | * writeable region. | ||
| 1228 | */ | ||
| 1229 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1230 | 1376 | ||
| 1231 | ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); | 1377 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, |
| 1378 | &extents_to_split); | ||
| 1232 | if (ret) { | 1379 | if (ret) { |
| 1233 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
| 1234 | goto out_meta; | 1381 | goto out; |
| 1235 | } | 1382 | } |
| 1236 | 1383 | ||
| 1237 | /* phys == 0 means that allocation is required. */ | 1384 | di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; |
| 1238 | if (phys == 0) { | 1385 | |
| 1239 | ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); | 1386 | /* |
| 1387 | * We set w_target_from, w_target_to here so that | ||
| 1388 | * ocfs2_write_end() knows which range in the target page to | ||
| 1389 | * write out. An allocation requires that we write the entire | ||
| 1390 | * cluster range. | ||
| 1391 | */ | ||
| 1392 | if (clusters_to_alloc || extents_to_split) { | ||
| 1393 | /* | ||
| 1394 | * XXX: We are stretching the limits of | ||
| 1395 | * ocfs2_lock_allocators(). It greatly over-estimates | ||
| 1396 | * the work to be done. | ||
| 1397 | */ | ||
| 1398 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | ||
| 1399 | extents_to_split, &data_ac, &meta_ac); | ||
| 1240 | if (ret) { | 1400 | if (ret) { |
| 1241 | mlog_errno(ret); | 1401 | mlog_errno(ret); |
| 1242 | goto out_meta; | 1402 | goto out; |
| 1243 | } | 1403 | } |
| 1244 | 1404 | ||
| 1245 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); | 1405 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, |
| 1246 | } | 1406 | clusters_to_alloc); |
| 1247 | 1407 | ||
| 1248 | ret = ocfs2_data_lock(inode, 1); | ||
| 1249 | if (ret) { | ||
| 1250 | mlog_errno(ret); | ||
| 1251 | goto out_meta; | ||
| 1252 | } | 1408 | } |
| 1253 | 1409 | ||
| 1410 | ocfs2_set_target_boundaries(osb, wc, pos, len, | ||
| 1411 | clusters_to_alloc + extents_to_split); | ||
| 1412 | |||
| 1254 | handle = ocfs2_start_trans(osb, credits); | 1413 | handle = ocfs2_start_trans(osb, credits); |
| 1255 | if (IS_ERR(handle)) { | 1414 | if (IS_ERR(handle)) { |
| 1256 | ret = PTR_ERR(handle); | 1415 | ret = PTR_ERR(handle); |
| 1257 | mlog_errno(ret); | 1416 | mlog_errno(ret); |
| 1258 | goto out_data; | 1417 | goto out; |
| 1259 | } | 1418 | } |
| 1260 | 1419 | ||
| 1261 | written = ocfs2_write(file, phys, handle, di_bh, data_ac, | 1420 | wc->w_handle = handle; |
| 1262 | meta_ac, &wc); | 1421 | |
| 1263 | if (written < 0) { | 1422 | /* |
| 1264 | ret = written; | 1423 | * We don't want this to fail in ocfs2_write_end(), so do it |
| 1424 | * here. | ||
| 1425 | */ | ||
| 1426 | ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, | ||
| 1427 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1428 | if (ret) { | ||
| 1265 | mlog_errno(ret); | 1429 | mlog_errno(ret); |
| 1266 | goto out_commit; | 1430 | goto out_commit; |
| 1267 | } | 1431 | } |
| 1268 | 1432 | ||
| 1269 | ret = ocfs2_journal_access(handle, inode, di_bh, | 1433 | /* |
| 1270 | OCFS2_JOURNAL_ACCESS_WRITE); | 1434 | * Fill our page array first. That way we've grabbed enough so |
| 1435 | * that we can zero and flush if we error after adding the | ||
| 1436 | * extent. | ||
| 1437 | */ | ||
| 1438 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, | ||
| 1439 | clusters_to_alloc + extents_to_split, | ||
| 1440 | mmap_page); | ||
| 1271 | if (ret) { | 1441 | if (ret) { |
| 1272 | mlog_errno(ret); | 1442 | mlog_errno(ret); |
| 1273 | goto out_commit; | 1443 | goto out_commit; |
| 1274 | } | 1444 | } |
| 1275 | 1445 | ||
| 1276 | pos += written; | 1446 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, |
| 1447 | len); | ||
| 1448 | if (ret) { | ||
| 1449 | mlog_errno(ret); | ||
| 1450 | goto out_commit; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | if (data_ac) | ||
| 1454 | ocfs2_free_alloc_context(data_ac); | ||
| 1455 | if (meta_ac) | ||
| 1456 | ocfs2_free_alloc_context(meta_ac); | ||
| 1457 | |||
| 1458 | *pagep = wc->w_target_page; | ||
| 1459 | *fsdata = wc; | ||
| 1460 | return 0; | ||
| 1461 | out_commit: | ||
| 1462 | ocfs2_commit_trans(osb, handle); | ||
| 1463 | |||
| 1464 | out: | ||
| 1465 | ocfs2_free_write_ctxt(wc); | ||
| 1466 | |||
| 1467 | if (data_ac) | ||
| 1468 | ocfs2_free_alloc_context(data_ac); | ||
| 1469 | if (meta_ac) | ||
| 1470 | ocfs2_free_alloc_context(meta_ac); | ||
| 1471 | return ret; | ||
| 1472 | } | ||
| 1473 | |||
| 1474 | int ocfs2_write_begin(struct file *file, struct address_space *mapping, | ||
| 1475 | loff_t pos, unsigned len, unsigned flags, | ||
| 1476 | struct page **pagep, void **fsdata) | ||
| 1477 | { | ||
| 1478 | int ret; | ||
| 1479 | struct buffer_head *di_bh = NULL; | ||
| 1480 | struct inode *inode = mapping->host; | ||
| 1481 | |||
| 1482 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
| 1483 | if (ret) { | ||
| 1484 | mlog_errno(ret); | ||
| 1485 | return ret; | ||
| 1486 | } | ||
| 1487 | |||
| 1488 | /* | ||
| 1489 | * Take alloc sem here to prevent concurrent lookups. That way | ||
| 1490 | * the mapping, zeroing and tree manipulation within | ||
| 1491 | * ocfs2_write() will be safe against ->readpage(). This | ||
| 1492 | * should also serve to lock out allocation from a shared | ||
| 1493 | * writeable region. | ||
| 1494 | */ | ||
| 1495 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1496 | |||
| 1497 | ret = ocfs2_data_lock(inode, 1); | ||
| 1498 | if (ret) { | ||
| 1499 | mlog_errno(ret); | ||
| 1500 | goto out_fail; | ||
| 1501 | } | ||
| 1502 | |||
| 1503 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | ||
| 1504 | fsdata, di_bh, NULL); | ||
| 1505 | if (ret) { | ||
| 1506 | mlog_errno(ret); | ||
| 1507 | goto out_fail_data; | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | brelse(di_bh); | ||
| 1511 | |||
| 1512 | return 0; | ||
| 1513 | |||
| 1514 | out_fail_data: | ||
| 1515 | ocfs2_data_unlock(inode, 1); | ||
| 1516 | out_fail: | ||
| 1517 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1518 | |||
| 1519 | brelse(di_bh); | ||
| 1520 | ocfs2_meta_unlock(inode, 1); | ||
| 1521 | |||
| 1522 | return ret; | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | int ocfs2_write_end_nolock(struct address_space *mapping, | ||
| 1526 | loff_t pos, unsigned len, unsigned copied, | ||
| 1527 | struct page *page, void *fsdata) | ||
| 1528 | { | ||
| 1529 | int i; | ||
| 1530 | unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1); | ||
| 1531 | struct inode *inode = mapping->host; | ||
| 1532 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1533 | struct ocfs2_write_ctxt *wc = fsdata; | ||
| 1534 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; | ||
| 1535 | handle_t *handle = wc->w_handle; | ||
| 1536 | struct page *tmppage; | ||
| 1537 | |||
| 1538 | if (unlikely(copied < len)) { | ||
| 1539 | if (!PageUptodate(wc->w_target_page)) | ||
| 1540 | copied = 0; | ||
| 1541 | |||
| 1542 | ocfs2_zero_new_buffers(wc->w_target_page, start+copied, | ||
| 1543 | start+len); | ||
| 1544 | } | ||
| 1545 | flush_dcache_page(wc->w_target_page); | ||
| 1546 | |||
| 1547 | for(i = 0; i < wc->w_num_pages; i++) { | ||
| 1548 | tmppage = wc->w_pages[i]; | ||
| 1549 | |||
| 1550 | if (tmppage == wc->w_target_page) { | ||
| 1551 | from = wc->w_target_from; | ||
| 1552 | to = wc->w_target_to; | ||
| 1553 | |||
| 1554 | BUG_ON(from > PAGE_CACHE_SIZE || | ||
| 1555 | to > PAGE_CACHE_SIZE || | ||
| 1556 | to < from); | ||
| 1557 | } else { | ||
| 1558 | /* | ||
| 1559 | * Pages adjacent to the target (if any) imply | ||
| 1560 | * a hole-filling write in which case we want | ||
| 1561 | * to flush their entire range. | ||
| 1562 | */ | ||
| 1563 | from = 0; | ||
| 1564 | to = PAGE_CACHE_SIZE; | ||
| 1565 | } | ||
| 1566 | |||
| 1567 | if (ocfs2_should_order_data(inode)) | ||
| 1568 | walk_page_buffers(wc->w_handle, page_buffers(tmppage), | ||
| 1569 | from, to, NULL, | ||
| 1570 | ocfs2_journal_dirty_data); | ||
| 1571 | |||
| 1572 | block_commit_write(tmppage, from, to); | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | pos += copied; | ||
| 1277 | if (pos > inode->i_size) { | 1576 | if (pos > inode->i_size) { |
| 1278 | i_size_write(inode, pos); | 1577 | i_size_write(inode, pos); |
| 1279 | mark_inode_dirty(inode); | 1578 | mark_inode_dirty(inode); |
| @@ -1283,29 +1582,31 @@ ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | |||
| 1283 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1582 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 1284 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 1583 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
| 1285 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1584 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
| 1585 | ocfs2_journal_dirty(handle, wc->w_di_bh); | ||
| 1286 | 1586 | ||
| 1287 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
| 1288 | if (ret) | ||
| 1289 | mlog_errno(ret); | ||
| 1290 | |||
| 1291 | out_commit: | ||
| 1292 | ocfs2_commit_trans(osb, handle); | 1587 | ocfs2_commit_trans(osb, handle); |
| 1293 | 1588 | ||
| 1294 | out_data: | 1589 | ocfs2_run_deallocs(osb, &wc->w_dealloc); |
| 1295 | ocfs2_data_unlock(inode, 1); | 1590 | |
| 1591 | ocfs2_free_write_ctxt(wc); | ||
| 1592 | |||
| 1593 | return copied; | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | int ocfs2_write_end(struct file *file, struct address_space *mapping, | ||
| 1597 | loff_t pos, unsigned len, unsigned copied, | ||
| 1598 | struct page *page, void *fsdata) | ||
| 1599 | { | ||
| 1600 | int ret; | ||
| 1601 | struct inode *inode = mapping->host; | ||
| 1296 | 1602 | ||
| 1297 | out_meta: | 1603 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
| 1604 | |||
| 1605 | ocfs2_data_unlock(inode, 1); | ||
| 1298 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1606 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 1299 | ocfs2_meta_unlock(inode, 1); | 1607 | ocfs2_meta_unlock(inode, 1); |
| 1300 | 1608 | ||
| 1301 | out: | 1609 | return ret; |
| 1302 | brelse(di_bh); | ||
| 1303 | if (data_ac) | ||
| 1304 | ocfs2_free_alloc_context(data_ac); | ||
| 1305 | if (meta_ac) | ||
| 1306 | ocfs2_free_alloc_context(meta_ac); | ||
| 1307 | |||
| 1308 | return written ? written : ret; | ||
| 1309 | } | 1610 | } |
| 1310 | 1611 | ||
| 1311 | const struct address_space_operations ocfs2_aops = { | 1612 | const struct address_space_operations ocfs2_aops = { |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 45821d479b5a..389579bd64e3 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -42,57 +42,22 @@ int walk_page_buffers( handle_t *handle, | |||
| 42 | int (*fn)( handle_t *handle, | 42 | int (*fn)( handle_t *handle, |
| 43 | struct buffer_head *bh)); | 43 | struct buffer_head *bh)); |
| 44 | 44 | ||
| 45 | struct ocfs2_write_ctxt; | 45 | int ocfs2_write_begin(struct file *file, struct address_space *mapping, |
| 46 | typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, | 46 | loff_t pos, unsigned len, unsigned flags, |
| 47 | u64 *, unsigned int *, unsigned int *); | 47 | struct page **pagep, void **fsdata); |
| 48 | 48 | ||
| 49 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | 49 | int ocfs2_write_end(struct file *file, struct address_space *mapping, |
| 50 | size_t count, ocfs2_page_writer *actor, | 50 | loff_t pos, unsigned len, unsigned copied, |
| 51 | void *priv); | 51 | struct page *page, void *fsdata); |
| 52 | 52 | ||
| 53 | struct ocfs2_write_ctxt { | 53 | int ocfs2_write_end_nolock(struct address_space *mapping, |
| 54 | size_t w_count; | 54 | loff_t pos, unsigned len, unsigned copied, |
| 55 | loff_t w_pos; | 55 | struct page *page, void *fsdata); |
| 56 | u32 w_cpos; | ||
| 57 | unsigned int w_finished_copy; | ||
| 58 | 56 | ||
| 59 | /* This is true if page_size > cluster_size */ | 57 | int ocfs2_write_begin_nolock(struct address_space *mapping, |
| 60 | unsigned int w_large_pages; | 58 | loff_t pos, unsigned len, unsigned flags, |
| 61 | 59 | struct page **pagep, void **fsdata, | |
| 62 | /* Filler callback and private data */ | 60 | struct buffer_head *di_bh, struct page *mmap_page); |
| 63 | ocfs2_page_writer *w_write_data_page; | ||
| 64 | void *w_private; | ||
| 65 | |||
| 66 | /* Only valid for the filler callback */ | ||
| 67 | struct page *w_this_page; | ||
| 68 | unsigned int w_this_page_new; | ||
| 69 | }; | ||
| 70 | |||
| 71 | struct ocfs2_buffered_write_priv { | ||
| 72 | char *b_src_buf; | ||
| 73 | const struct iovec *b_cur_iov; /* Current iovec */ | ||
| 74 | size_t b_cur_off; /* Offset in the | ||
| 75 | * current iovec */ | ||
| 76 | }; | ||
| 77 | int ocfs2_map_and_write_user_data(struct inode *inode, | ||
| 78 | struct ocfs2_write_ctxt *wc, | ||
| 79 | u64 *p_blkno, | ||
| 80 | unsigned int *ret_from, | ||
| 81 | unsigned int *ret_to); | ||
| 82 | |||
| 83 | struct ocfs2_splice_write_priv { | ||
| 84 | struct splice_desc *s_sd; | ||
| 85 | struct pipe_buffer *s_buf; | ||
| 86 | struct pipe_inode_info *s_pipe; | ||
| 87 | /* Neither offset value is ever larger than one page */ | ||
| 88 | unsigned int s_offset; | ||
| 89 | unsigned int s_buf_offset; | ||
| 90 | }; | ||
| 91 | int ocfs2_map_and_write_splice_data(struct inode *inode, | ||
| 92 | struct ocfs2_write_ctxt *wc, | ||
| 93 | u64 *p_blkno, | ||
| 94 | unsigned int *ret_from, | ||
| 95 | unsigned int *ret_to); | ||
| 96 | 61 | ||
| 97 | /* all ocfs2_dio_end_io()'s fault */ | 62 | /* all ocfs2_dio_end_io()'s fault */ |
| 98 | #define ocfs2_iocb_is_rw_locked(iocb) \ | 63 | #define ocfs2_iocb_is_rw_locked(iocb) \ |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 979113479c66..2bd7f788cf34 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1335 | ret = wait_event_interruptible(o2hb_steady_queue, | 1335 | ret = wait_event_interruptible(o2hb_steady_queue, |
| 1336 | atomic_read(®->hr_steady_iterations) == 0); | 1336 | atomic_read(®->hr_steady_iterations) == 0); |
| 1337 | if (ret) { | 1337 | if (ret) { |
| 1338 | /* We got interrupted (hello ptrace!). Clean up */ | ||
| 1338 | spin_lock(&o2hb_live_lock); | 1339 | spin_lock(&o2hb_live_lock); |
| 1339 | hb_task = reg->hr_task; | 1340 | hb_task = reg->hr_task; |
| 1340 | reg->hr_task = NULL; | 1341 | reg->hr_task = NULL; |
| @@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1345 | goto out; | 1346 | goto out; |
| 1346 | } | 1347 | } |
| 1347 | 1348 | ||
| 1348 | ret = count; | 1349 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
| 1350 | spin_lock(&o2hb_live_lock); | ||
| 1351 | hb_task = reg->hr_task; | ||
| 1352 | spin_unlock(&o2hb_live_lock); | ||
| 1353 | |||
| 1354 | if (hb_task) | ||
| 1355 | ret = count; | ||
| 1356 | else | ||
| 1357 | ret = -EIO; | ||
| 1358 | |||
| 1349 | out: | 1359 | out: |
| 1350 | if (filp) | 1360 | if (filp) |
| 1351 | fput(filp); | 1361 | fput(filp); |
| @@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
| 1523 | if (hb_task) | 1533 | if (hb_task) |
| 1524 | kthread_stop(hb_task); | 1534 | kthread_stop(hb_task); |
| 1525 | 1535 | ||
| 1536 | /* | ||
| 1537 | * If we're racing a dev_write(), we need to wake them. They will | ||
| 1538 | * check reg->hr_task | ||
| 1539 | */ | ||
| 1540 | if (atomic_read(®->hr_steady_iterations) != 0) { | ||
| 1541 | atomic_set(®->hr_steady_iterations, 0); | ||
| 1542 | wake_up(&o2hb_steady_queue); | ||
| 1543 | } | ||
| 1544 | |||
| 1526 | config_item_put(item); | 1545 | config_item_put(item); |
| 1527 | } | 1546 | } |
| 1528 | 1547 | ||
| @@ -1665,7 +1684,67 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, | |||
| 1665 | } | 1684 | } |
| 1666 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); | 1685 | EXPORT_SYMBOL_GPL(o2hb_setup_callback); |
| 1667 | 1686 | ||
| 1668 | int o2hb_register_callback(struct o2hb_callback_func *hc) | 1687 | static struct o2hb_region *o2hb_find_region(const char *region_uuid) |
| 1688 | { | ||
| 1689 | struct o2hb_region *p, *reg = NULL; | ||
| 1690 | |||
| 1691 | assert_spin_locked(&o2hb_live_lock); | ||
| 1692 | |||
| 1693 | list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { | ||
| 1694 | if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { | ||
| 1695 | reg = p; | ||
| 1696 | break; | ||
| 1697 | } | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | return reg; | ||
| 1701 | } | ||
| 1702 | |||
| 1703 | static int o2hb_region_get(const char *region_uuid) | ||
| 1704 | { | ||
| 1705 | int ret = 0; | ||
| 1706 | struct o2hb_region *reg; | ||
| 1707 | |||
| 1708 | spin_lock(&o2hb_live_lock); | ||
| 1709 | |||
| 1710 | reg = o2hb_find_region(region_uuid); | ||
| 1711 | if (!reg) | ||
| 1712 | ret = -ENOENT; | ||
| 1713 | spin_unlock(&o2hb_live_lock); | ||
| 1714 | |||
| 1715 | if (ret) | ||
| 1716 | goto out; | ||
| 1717 | |||
| 1718 | ret = o2nm_depend_this_node(); | ||
| 1719 | if (ret) | ||
| 1720 | goto out; | ||
| 1721 | |||
| 1722 | ret = o2nm_depend_item(®->hr_item); | ||
| 1723 | if (ret) | ||
| 1724 | o2nm_undepend_this_node(); | ||
| 1725 | |||
| 1726 | out: | ||
| 1727 | return ret; | ||
| 1728 | } | ||
| 1729 | |||
| 1730 | static void o2hb_region_put(const char *region_uuid) | ||
| 1731 | { | ||
| 1732 | struct o2hb_region *reg; | ||
| 1733 | |||
| 1734 | spin_lock(&o2hb_live_lock); | ||
| 1735 | |||
| 1736 | reg = o2hb_find_region(region_uuid); | ||
| 1737 | |||
| 1738 | spin_unlock(&o2hb_live_lock); | ||
| 1739 | |||
| 1740 | if (reg) { | ||
| 1741 | o2nm_undepend_item(®->hr_item); | ||
| 1742 | o2nm_undepend_this_node(); | ||
| 1743 | } | ||
| 1744 | } | ||
| 1745 | |||
| 1746 | int o2hb_register_callback(const char *region_uuid, | ||
| 1747 | struct o2hb_callback_func *hc) | ||
| 1669 | { | 1748 | { |
| 1670 | struct o2hb_callback_func *tmp; | 1749 | struct o2hb_callback_func *tmp; |
| 1671 | struct list_head *iter; | 1750 | struct list_head *iter; |
| @@ -1681,6 +1760,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc) | |||
| 1681 | goto out; | 1760 | goto out; |
| 1682 | } | 1761 | } |
| 1683 | 1762 | ||
| 1763 | if (region_uuid) { | ||
| 1764 | ret = o2hb_region_get(region_uuid); | ||
| 1765 | if (ret) | ||
| 1766 | goto out; | ||
| 1767 | } | ||
| 1768 | |||
| 1684 | down_write(&o2hb_callback_sem); | 1769 | down_write(&o2hb_callback_sem); |
| 1685 | 1770 | ||
| 1686 | list_for_each(iter, &hbcall->list) { | 1771 | list_for_each(iter, &hbcall->list) { |
| @@ -1702,16 +1787,21 @@ out: | |||
| 1702 | } | 1787 | } |
| 1703 | EXPORT_SYMBOL_GPL(o2hb_register_callback); | 1788 | EXPORT_SYMBOL_GPL(o2hb_register_callback); |
| 1704 | 1789 | ||
| 1705 | void o2hb_unregister_callback(struct o2hb_callback_func *hc) | 1790 | void o2hb_unregister_callback(const char *region_uuid, |
| 1791 | struct o2hb_callback_func *hc) | ||
| 1706 | { | 1792 | { |
| 1707 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); | 1793 | BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); |
| 1708 | 1794 | ||
| 1709 | mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", | 1795 | mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", |
| 1710 | __builtin_return_address(0), hc); | 1796 | __builtin_return_address(0), hc); |
| 1711 | 1797 | ||
| 1798 | /* XXX Can this happen _with_ a region reference? */ | ||
| 1712 | if (list_empty(&hc->hc_item)) | 1799 | if (list_empty(&hc->hc_item)) |
| 1713 | return; | 1800 | return; |
| 1714 | 1801 | ||
| 1802 | if (region_uuid) | ||
| 1803 | o2hb_region_put(region_uuid); | ||
| 1804 | |||
| 1715 | down_write(&o2hb_callback_sem); | 1805 | down_write(&o2hb_callback_sem); |
| 1716 | 1806 | ||
| 1717 | list_del_init(&hc->hc_item); | 1807 | list_del_init(&hc->hc_item); |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index cc6d40b39771..35397dd5ecdb 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
| @@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, | |||
| 69 | o2hb_cb_func *func, | 69 | o2hb_cb_func *func, |
| 70 | void *data, | 70 | void *data, |
| 71 | int priority); | 71 | int priority); |
| 72 | int o2hb_register_callback(struct o2hb_callback_func *hc); | 72 | int o2hb_register_callback(const char *region_uuid, |
| 73 | void o2hb_unregister_callback(struct o2hb_callback_func *hc); | 73 | struct o2hb_callback_func *hc); |
| 74 | void o2hb_unregister_callback(const char *region_uuid, | ||
| 75 | struct o2hb_callback_func *hc); | ||
| 74 | void o2hb_fill_node_map(unsigned long *map, | 76 | void o2hb_fill_node_map(unsigned long *map, |
| 75 | unsigned bytes); | 77 | unsigned bytes); |
| 76 | void o2hb_init(void); | 78 | void o2hb_init(void); |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 9f5ad0f01ce0..af2070da308b 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -900,6 +900,46 @@ static struct o2nm_cluster_group o2nm_cluster_group = { | |||
| 900 | }, | 900 | }, |
| 901 | }; | 901 | }; |
| 902 | 902 | ||
| 903 | int o2nm_depend_item(struct config_item *item) | ||
| 904 | { | ||
| 905 | return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); | ||
| 906 | } | ||
| 907 | |||
| 908 | void o2nm_undepend_item(struct config_item *item) | ||
| 909 | { | ||
| 910 | configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item); | ||
| 911 | } | ||
| 912 | |||
| 913 | int o2nm_depend_this_node(void) | ||
| 914 | { | ||
| 915 | int ret = 0; | ||
| 916 | struct o2nm_node *local_node; | ||
| 917 | |||
| 918 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | ||
| 919 | if (!local_node) { | ||
| 920 | ret = -EINVAL; | ||
| 921 | goto out; | ||
| 922 | } | ||
| 923 | |||
| 924 | ret = o2nm_depend_item(&local_node->nd_item); | ||
| 925 | o2nm_node_put(local_node); | ||
| 926 | |||
| 927 | out: | ||
| 928 | return ret; | ||
| 929 | } | ||
| 930 | |||
| 931 | void o2nm_undepend_this_node(void) | ||
| 932 | { | ||
| 933 | struct o2nm_node *local_node; | ||
| 934 | |||
| 935 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | ||
| 936 | BUG_ON(!local_node); | ||
| 937 | |||
| 938 | o2nm_undepend_item(&local_node->nd_item); | ||
| 939 | o2nm_node_put(local_node); | ||
| 940 | } | ||
| 941 | |||
| 942 | |||
| 903 | static void __exit exit_o2nm(void) | 943 | static void __exit exit_o2nm(void) |
| 904 | { | 944 | { |
| 905 | if (ocfs2_table_header) | 945 | if (ocfs2_table_header) |
| @@ -934,7 +974,7 @@ static int __init init_o2nm(void) | |||
| 934 | goto out_sysctl; | 974 | goto out_sysctl; |
| 935 | 975 | ||
| 936 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); | 976 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); |
| 937 | init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem); | 977 | mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); |
| 938 | ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); | 978 | ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys); |
| 939 | if (ret) { | 979 | if (ret) { |
| 940 | printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); | 980 | printk(KERN_ERR "nodemanager: Registration returned %d\n", ret); |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 070522138ae2..7c860361b8dd 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
| @@ -77,4 +77,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr); | |||
| 77 | void o2nm_node_get(struct o2nm_node *node); | 77 | void o2nm_node_get(struct o2nm_node *node); |
| 78 | void o2nm_node_put(struct o2nm_node *node); | 78 | void o2nm_node_put(struct o2nm_node *node); |
| 79 | 79 | ||
| 80 | int o2nm_depend_item(struct config_item *item); | ||
| 81 | void o2nm_undepend_item(struct config_item *item); | ||
| 82 | int o2nm_depend_this_node(void); | ||
| 83 | void o2nm_undepend_this_node(void); | ||
| 84 | |||
| 80 | #endif /* O2CLUSTER_NODEMANAGER_H */ | 85 | #endif /* O2CLUSTER_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0b229a9c7952..f0bdfd944c44 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -261,14 +261,12 @@ out: | |||
| 261 | 261 | ||
| 262 | static void o2net_complete_nodes_nsw(struct o2net_node *nn) | 262 | static void o2net_complete_nodes_nsw(struct o2net_node *nn) |
| 263 | { | 263 | { |
| 264 | struct list_head *iter, *tmp; | 264 | struct o2net_status_wait *nsw, *tmp; |
| 265 | unsigned int num_kills = 0; | 265 | unsigned int num_kills = 0; |
| 266 | struct o2net_status_wait *nsw; | ||
| 267 | 266 | ||
| 268 | assert_spin_locked(&nn->nn_lock); | 267 | assert_spin_locked(&nn->nn_lock); |
| 269 | 268 | ||
| 270 | list_for_each_safe(iter, tmp, &nn->nn_status_list) { | 269 | list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) { |
| 271 | nsw = list_entry(iter, struct o2net_status_wait, ns_node_item); | ||
| 272 | o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); | 270 | o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); |
| 273 | num_kills++; | 271 | num_kills++; |
| 274 | } | 272 | } |
| @@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler); | |||
| 764 | 762 | ||
| 765 | void o2net_unregister_handler_list(struct list_head *list) | 763 | void o2net_unregister_handler_list(struct list_head *list) |
| 766 | { | 764 | { |
| 767 | struct list_head *pos, *n; | 765 | struct o2net_msg_handler *nmh, *n; |
| 768 | struct o2net_msg_handler *nmh; | ||
| 769 | 766 | ||
| 770 | write_lock(&o2net_handler_lock); | 767 | write_lock(&o2net_handler_lock); |
| 771 | list_for_each_safe(pos, n, list) { | 768 | list_for_each_entry_safe(nmh, n, list, nh_unregister_item) { |
| 772 | nmh = list_entry(pos, struct o2net_msg_handler, | ||
| 773 | nh_unregister_item); | ||
| 774 | mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", | 769 | mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", |
| 775 | nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); | 770 | nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); |
| 776 | rb_erase(&nmh->nh_node, &o2net_handler_tree); | 771 | rb_erase(&nmh->nh_node, &o2net_handler_tree); |
| @@ -1638,8 +1633,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
| 1638 | 1633 | ||
| 1639 | void o2net_unregister_hb_callbacks(void) | 1634 | void o2net_unregister_hb_callbacks(void) |
| 1640 | { | 1635 | { |
| 1641 | o2hb_unregister_callback(&o2net_hb_up); | 1636 | o2hb_unregister_callback(NULL, &o2net_hb_up); |
| 1642 | o2hb_unregister_callback(&o2net_hb_down); | 1637 | o2hb_unregister_callback(NULL, &o2net_hb_down); |
| 1643 | } | 1638 | } |
| 1644 | 1639 | ||
| 1645 | int o2net_register_hb_callbacks(void) | 1640 | int o2net_register_hb_callbacks(void) |
| @@ -1651,9 +1646,9 @@ int o2net_register_hb_callbacks(void) | |||
| 1651 | o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, | 1646 | o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, |
| 1652 | o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); | 1647 | o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); |
| 1653 | 1648 | ||
| 1654 | ret = o2hb_register_callback(&o2net_hb_up); | 1649 | ret = o2hb_register_callback(NULL, &o2net_hb_up); |
| 1655 | if (ret == 0) | 1650 | if (ret == 0) |
| 1656 | ret = o2hb_register_callback(&o2net_hb_down); | 1651 | ret = o2hb_register_callback(NULL, &o2net_hb_down); |
| 1657 | 1652 | ||
| 1658 | if (ret) | 1653 | if (ret) |
| 1659 | o2net_unregister_hb_callbacks(); | 1654 | o2net_unregister_hb_callbacks(); |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c441ef1f2bad..0d5fdde959c8 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -368,7 +368,7 @@ int ocfs2_do_extend_dir(struct super_block *sb, | |||
| 368 | u32 offset = OCFS2_I(dir)->ip_clusters; | 368 | u32 offset = OCFS2_I(dir)->ip_clusters; |
| 369 | 369 | ||
| 370 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, | 370 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, |
| 371 | 1, parent_fe_bh, handle, | 371 | 1, 0, parent_fe_bh, handle, |
| 372 | data_ac, meta_ac, NULL); | 372 | data_ac, meta_ac, NULL); |
| 373 | BUG_ON(status == -EAGAIN); | 373 | BUG_ON(status == -EAGAIN); |
| 374 | if (status < 0) { | 374 | if (status < 0) { |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index d836b98dd99a..6954565b8ccb 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -1128,8 +1128,8 @@ bail: | |||
| 1128 | 1128 | ||
| 1129 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) | 1129 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) |
| 1130 | { | 1130 | { |
| 1131 | o2hb_unregister_callback(&dlm->dlm_hb_up); | 1131 | o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); |
| 1132 | o2hb_unregister_callback(&dlm->dlm_hb_down); | 1132 | o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); |
| 1133 | o2net_unregister_handler_list(&dlm->dlm_domain_handlers); | 1133 | o2net_unregister_handler_list(&dlm->dlm_domain_handlers); |
| 1134 | } | 1134 | } |
| 1135 | 1135 | ||
| @@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
| 1141 | 1141 | ||
| 1142 | o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, | 1142 | o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, |
| 1143 | dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); | 1143 | dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); |
| 1144 | status = o2hb_register_callback(&dlm->dlm_hb_down); | 1144 | status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); |
| 1145 | if (status) | 1145 | if (status) |
| 1146 | goto bail; | 1146 | goto bail; |
| 1147 | 1147 | ||
| 1148 | o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, | 1148 | o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, |
| 1149 | dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); | 1149 | dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); |
| 1150 | status = o2hb_register_callback(&dlm->dlm_hb_up); | 1150 | status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); |
| 1151 | if (status) | 1151 | if (status) |
| 1152 | goto bail; | 1152 | goto bail; |
| 1153 | 1153 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 6edffca99d98..65b2b9b92688 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle) | |||
| 192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) | 192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) |
| 193 | { | 193 | { |
| 194 | struct dlm_master_list_entry *mle; | 194 | struct dlm_master_list_entry *mle; |
| 195 | struct list_head *iter; | ||
| 196 | 195 | ||
| 197 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); | 196 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); |
| 198 | spin_lock(&dlm->master_lock); | 197 | spin_lock(&dlm->master_lock); |
| 199 | list_for_each(iter, &dlm->master_list) { | 198 | list_for_each_entry(mle, &dlm->master_list, list) |
| 200 | mle = list_entry(iter, struct dlm_master_list_entry, list); | ||
| 201 | dlm_print_one_mle(mle); | 199 | dlm_print_one_mle(mle); |
| 202 | } | ||
| 203 | spin_unlock(&dlm->master_lock); | 200 | spin_unlock(&dlm->master_lock); |
| 204 | } | 201 | } |
| 205 | 202 | ||
| 206 | int dlm_dump_all_mles(const char __user *data, unsigned int len) | 203 | int dlm_dump_all_mles(const char __user *data, unsigned int len) |
| 207 | { | 204 | { |
| 208 | struct list_head *iter; | ||
| 209 | struct dlm_ctxt *dlm; | 205 | struct dlm_ctxt *dlm; |
| 210 | 206 | ||
| 211 | spin_lock(&dlm_domain_lock); | 207 | spin_lock(&dlm_domain_lock); |
| 212 | list_for_each(iter, &dlm_domains) { | 208 | list_for_each_entry(dlm, &dlm_domains, list) { |
| 213 | dlm = list_entry (iter, struct dlm_ctxt, list); | ||
| 214 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); | 209 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); |
| 215 | dlm_dump_mles(dlm); | 210 | dlm_dump_mles(dlm); |
| 216 | } | 211 | } |
| @@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
| 454 | char *name, unsigned int namelen) | 449 | char *name, unsigned int namelen) |
| 455 | { | 450 | { |
| 456 | struct dlm_master_list_entry *tmpmle; | 451 | struct dlm_master_list_entry *tmpmle; |
| 457 | struct list_head *iter; | ||
| 458 | 452 | ||
| 459 | assert_spin_locked(&dlm->master_lock); | 453 | assert_spin_locked(&dlm->master_lock); |
| 460 | 454 | ||
| 461 | list_for_each(iter, &dlm->master_list) { | 455 | list_for_each_entry(tmpmle, &dlm->master_list, list) { |
| 462 | tmpmle = list_entry(iter, struct dlm_master_list_entry, list); | ||
| 463 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) | 456 | if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) |
| 464 | continue; | 457 | continue; |
| 465 | dlm_get_mle(tmpmle); | 458 | dlm_get_mle(tmpmle); |
| @@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
| 472 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) | 465 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) |
| 473 | { | 466 | { |
| 474 | struct dlm_master_list_entry *mle; | 467 | struct dlm_master_list_entry *mle; |
| 475 | struct list_head *iter; | ||
| 476 | 468 | ||
| 477 | assert_spin_locked(&dlm->spinlock); | 469 | assert_spin_locked(&dlm->spinlock); |
| 478 | 470 | ||
| 479 | list_for_each(iter, &dlm->mle_hb_events) { | 471 | list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { |
| 480 | mle = list_entry(iter, struct dlm_master_list_entry, | ||
| 481 | hb_events); | ||
| 482 | if (node_up) | 472 | if (node_up) |
| 483 | dlm_mle_node_up(dlm, mle, NULL, idx); | 473 | dlm_mle_node_up(dlm, mle, NULL, idx); |
| 484 | else | 474 | else |
| @@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
| 2434 | int ret; | 2424 | int ret; |
| 2435 | int i; | 2425 | int i; |
| 2436 | int count = 0; | 2426 | int count = 0; |
| 2437 | struct list_head *queue, *iter; | 2427 | struct list_head *queue; |
| 2438 | struct dlm_lock *lock; | 2428 | struct dlm_lock *lock; |
| 2439 | 2429 | ||
| 2440 | assert_spin_locked(&res->spinlock); | 2430 | assert_spin_locked(&res->spinlock); |
| @@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
| 2453 | ret = 0; | 2443 | ret = 0; |
| 2454 | queue = &res->granted; | 2444 | queue = &res->granted; |
| 2455 | for (i = 0; i < 3; i++) { | 2445 | for (i = 0; i < 3; i++) { |
| 2456 | list_for_each(iter, queue) { | 2446 | list_for_each_entry(lock, queue, list) { |
| 2457 | lock = list_entry(iter, struct dlm_lock, list); | ||
| 2458 | ++count; | 2447 | ++count; |
| 2459 | if (lock->ml.node == dlm->node_num) { | 2448 | if (lock->ml.node == dlm->node_num) { |
| 2460 | mlog(0, "found a lock owned by this node still " | 2449 | mlog(0, "found a lock owned by this node still " |
| @@ -2923,18 +2912,16 @@ again: | |||
| 2923 | static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | 2912 | static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, |
| 2924 | struct dlm_lock_resource *res) | 2913 | struct dlm_lock_resource *res) |
| 2925 | { | 2914 | { |
| 2926 | struct list_head *iter, *iter2; | ||
| 2927 | struct list_head *queue = &res->granted; | 2915 | struct list_head *queue = &res->granted; |
| 2928 | int i, bit; | 2916 | int i, bit; |
| 2929 | struct dlm_lock *lock; | 2917 | struct dlm_lock *lock, *next; |
| 2930 | 2918 | ||
| 2931 | assert_spin_locked(&res->spinlock); | 2919 | assert_spin_locked(&res->spinlock); |
| 2932 | 2920 | ||
| 2933 | BUG_ON(res->owner == dlm->node_num); | 2921 | BUG_ON(res->owner == dlm->node_num); |
| 2934 | 2922 | ||
| 2935 | for (i=0; i<3; i++) { | 2923 | for (i=0; i<3; i++) { |
| 2936 | list_for_each_safe(iter, iter2, queue) { | 2924 | list_for_each_entry_safe(lock, next, queue, list) { |
| 2937 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2938 | if (lock->ml.node != dlm->node_num) { | 2925 | if (lock->ml.node != dlm->node_num) { |
| 2939 | mlog(0, "putting lock for node %u\n", | 2926 | mlog(0, "putting lock for node %u\n", |
| 2940 | lock->ml.node); | 2927 | lock->ml.node); |
| @@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | |||
| 2976 | { | 2963 | { |
| 2977 | int i; | 2964 | int i; |
| 2978 | struct list_head *queue = &res->granted; | 2965 | struct list_head *queue = &res->granted; |
| 2979 | struct list_head *iter; | ||
| 2980 | struct dlm_lock *lock; | 2966 | struct dlm_lock *lock; |
| 2981 | int nodenum; | 2967 | int nodenum; |
| 2982 | 2968 | ||
| @@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | |||
| 2984 | 2970 | ||
| 2985 | spin_lock(&res->spinlock); | 2971 | spin_lock(&res->spinlock); |
| 2986 | for (i=0; i<3; i++) { | 2972 | for (i=0; i<3; i++) { |
| 2987 | list_for_each(iter, queue) { | 2973 | list_for_each_entry(lock, queue, list) { |
| 2988 | /* up to the caller to make sure this node | 2974 | /* up to the caller to make sure this node |
| 2989 | * is alive */ | 2975 | * is alive */ |
| 2990 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2991 | if (lock->ml.node != dlm->node_num) { | 2976 | if (lock->ml.node != dlm->node_num) { |
| 2992 | spin_unlock(&res->spinlock); | 2977 | spin_unlock(&res->spinlock); |
| 2993 | return lock->ml.node; | 2978 | return lock->ml.node; |
| @@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
| 3234 | 3219 | ||
| 3235 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) | 3220 | void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) |
| 3236 | { | 3221 | { |
| 3237 | struct list_head *iter, *iter2; | 3222 | struct dlm_master_list_entry *mle, *next; |
| 3238 | struct dlm_master_list_entry *mle; | ||
| 3239 | struct dlm_lock_resource *res; | 3223 | struct dlm_lock_resource *res; |
| 3240 | unsigned int hash; | 3224 | unsigned int hash; |
| 3241 | 3225 | ||
| @@ -3245,9 +3229,7 @@ top: | |||
| 3245 | 3229 | ||
| 3246 | /* clean the master list */ | 3230 | /* clean the master list */ |
| 3247 | spin_lock(&dlm->master_lock); | 3231 | spin_lock(&dlm->master_lock); |
| 3248 | list_for_each_safe(iter, iter2, &dlm->master_list) { | 3232 | list_for_each_entry_safe(mle, next, &dlm->master_list, list) { |
| 3249 | mle = list_entry(iter, struct dlm_master_list_entry, list); | ||
| 3250 | |||
| 3251 | BUG_ON(mle->type != DLM_MLE_BLOCK && | 3233 | BUG_ON(mle->type != DLM_MLE_BLOCK && |
| 3252 | mle->type != DLM_MLE_MASTER && | 3234 | mle->type != DLM_MLE_MASTER && |
| 3253 | mle->type != DLM_MLE_MIGRATION); | 3235 | mle->type != DLM_MLE_MIGRATION); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 671c4ed58ee2..a2c33160bfd6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work) | |||
| 158 | struct dlm_ctxt *dlm = | 158 | struct dlm_ctxt *dlm = |
| 159 | container_of(work, struct dlm_ctxt, dispatched_work); | 159 | container_of(work, struct dlm_ctxt, dispatched_work); |
| 160 | LIST_HEAD(tmp_list); | 160 | LIST_HEAD(tmp_list); |
| 161 | struct list_head *iter, *iter2; | 161 | struct dlm_work_item *item, *next; |
| 162 | struct dlm_work_item *item; | ||
| 163 | dlm_workfunc_t *workfunc; | 162 | dlm_workfunc_t *workfunc; |
| 164 | int tot=0; | 163 | int tot=0; |
| 165 | 164 | ||
| @@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work) | |||
| 167 | list_splice_init(&dlm->work_list, &tmp_list); | 166 | list_splice_init(&dlm->work_list, &tmp_list); |
| 168 | spin_unlock(&dlm->work_lock); | 167 | spin_unlock(&dlm->work_lock); |
| 169 | 168 | ||
| 170 | list_for_each_safe(iter, iter2, &tmp_list) { | 169 | list_for_each_entry(item, &tmp_list, list) { |
| 171 | tot++; | 170 | tot++; |
| 172 | } | 171 | } |
| 173 | mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); | 172 | mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); |
| 174 | 173 | ||
| 175 | list_for_each_safe(iter, iter2, &tmp_list) { | 174 | list_for_each_entry_safe(item, next, &tmp_list, list) { |
| 176 | item = list_entry(iter, struct dlm_work_item, list); | ||
| 177 | workfunc = item->func; | 175 | workfunc = item->func; |
| 178 | list_del_init(&item->list); | 176 | list_del_init(&item->list); |
| 179 | 177 | ||
| @@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 549 | { | 547 | { |
| 550 | int status = 0; | 548 | int status = 0; |
| 551 | struct dlm_reco_node_data *ndata; | 549 | struct dlm_reco_node_data *ndata; |
| 552 | struct list_head *iter; | ||
| 553 | int all_nodes_done; | 550 | int all_nodes_done; |
| 554 | int destroy = 0; | 551 | int destroy = 0; |
| 555 | int pass = 0; | 552 | int pass = 0; |
| @@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 567 | 564 | ||
| 568 | /* safe to access the node data list without a lock, since this | 565 | /* safe to access the node data list without a lock, since this |
| 569 | * process is the only one to change the list */ | 566 | * process is the only one to change the list */ |
| 570 | list_for_each(iter, &dlm->reco.node_data) { | 567 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
| 571 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
| 572 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); | 568 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); |
| 573 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; | 569 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; |
| 574 | 570 | ||
| @@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 655 | * done, or if anyone died */ | 651 | * done, or if anyone died */ |
| 656 | all_nodes_done = 1; | 652 | all_nodes_done = 1; |
| 657 | spin_lock(&dlm_reco_state_lock); | 653 | spin_lock(&dlm_reco_state_lock); |
| 658 | list_for_each(iter, &dlm->reco.node_data) { | 654 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
| 659 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
| 660 | |||
| 661 | mlog(0, "checking recovery state of node %u\n", | 655 | mlog(0, "checking recovery state of node %u\n", |
| 662 | ndata->node_num); | 656 | ndata->node_num); |
| 663 | switch (ndata->state) { | 657 | switch (ndata->state) { |
| @@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 774 | 768 | ||
| 775 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | 769 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) |
| 776 | { | 770 | { |
| 777 | struct list_head *iter, *iter2; | 771 | struct dlm_reco_node_data *ndata, *next; |
| 778 | struct dlm_reco_node_data *ndata; | ||
| 779 | LIST_HEAD(tmplist); | 772 | LIST_HEAD(tmplist); |
| 780 | 773 | ||
| 781 | spin_lock(&dlm_reco_state_lock); | 774 | spin_lock(&dlm_reco_state_lock); |
| 782 | list_splice_init(&dlm->reco.node_data, &tmplist); | 775 | list_splice_init(&dlm->reco.node_data, &tmplist); |
| 783 | spin_unlock(&dlm_reco_state_lock); | 776 | spin_unlock(&dlm_reco_state_lock); |
| 784 | 777 | ||
| 785 | list_for_each_safe(iter, iter2, &tmplist) { | 778 | list_for_each_entry_safe(ndata, next, &tmplist, list) { |
| 786 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
| 787 | list_del_init(&ndata->list); | 779 | list_del_init(&ndata->list); |
| 788 | kfree(ndata); | 780 | kfree(ndata); |
| 789 | } | 781 | } |
| @@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) | |||
| 876 | struct dlm_lock_resource *res; | 868 | struct dlm_lock_resource *res; |
| 877 | struct dlm_ctxt *dlm; | 869 | struct dlm_ctxt *dlm; |
| 878 | LIST_HEAD(resources); | 870 | LIST_HEAD(resources); |
| 879 | struct list_head *iter; | ||
| 880 | int ret; | 871 | int ret; |
| 881 | u8 dead_node, reco_master; | 872 | u8 dead_node, reco_master; |
| 882 | int skip_all_done = 0; | 873 | int skip_all_done = 0; |
| @@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) | |||
| 920 | 911 | ||
| 921 | /* any errors returned will be due to the new_master dying, | 912 | /* any errors returned will be due to the new_master dying, |
| 922 | * the dlm_reco_thread should detect this */ | 913 | * the dlm_reco_thread should detect this */ |
| 923 | list_for_each(iter, &resources) { | 914 | list_for_each_entry(res, &resources, recovering) { |
| 924 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
| 925 | ret = dlm_send_one_lockres(dlm, res, mres, reco_master, | 915 | ret = dlm_send_one_lockres(dlm, res, mres, reco_master, |
| 926 | DLM_MRES_RECOVERY); | 916 | DLM_MRES_RECOVERY); |
| 927 | if (ret < 0) { | 917 | if (ret < 0) { |
| @@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 983 | { | 973 | { |
| 984 | struct dlm_ctxt *dlm = data; | 974 | struct dlm_ctxt *dlm = data; |
| 985 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; | 975 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; |
| 986 | struct list_head *iter; | ||
| 987 | struct dlm_reco_node_data *ndata = NULL; | 976 | struct dlm_reco_node_data *ndata = NULL; |
| 988 | int ret = -EINVAL; | 977 | int ret = -EINVAL; |
| 989 | 978 | ||
| @@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 1000 | dlm->reco.dead_node, done->node_idx, dlm->node_num); | 989 | dlm->reco.dead_node, done->node_idx, dlm->node_num); |
| 1001 | 990 | ||
| 1002 | spin_lock(&dlm_reco_state_lock); | 991 | spin_lock(&dlm_reco_state_lock); |
| 1003 | list_for_each(iter, &dlm->reco.node_data) { | 992 | list_for_each_entry(ndata, &dlm->reco.node_data, list) { |
| 1004 | ndata = list_entry (iter, struct dlm_reco_node_data, list); | ||
| 1005 | if (ndata->node_num != done->node_idx) | 993 | if (ndata->node_num != done->node_idx) |
| 1006 | continue; | 994 | continue; |
| 1007 | 995 | ||
| @@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, | |||
| 1049 | struct list_head *list, | 1037 | struct list_head *list, |
| 1050 | u8 dead_node) | 1038 | u8 dead_node) |
| 1051 | { | 1039 | { |
| 1052 | struct dlm_lock_resource *res; | 1040 | struct dlm_lock_resource *res, *next; |
| 1053 | struct list_head *iter, *iter2; | ||
| 1054 | struct dlm_lock *lock; | 1041 | struct dlm_lock *lock; |
| 1055 | 1042 | ||
| 1056 | spin_lock(&dlm->spinlock); | 1043 | spin_lock(&dlm->spinlock); |
| 1057 | list_for_each_safe(iter, iter2, &dlm->reco.resources) { | 1044 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { |
| 1058 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
| 1059 | /* always prune any $RECOVERY entries for dead nodes, | 1045 | /* always prune any $RECOVERY entries for dead nodes, |
| 1060 | * otherwise hangs can occur during later recovery */ | 1046 | * otherwise hangs can occur during later recovery */ |
| 1061 | if (dlm_is_recovery_lock(res->lockname.name, | 1047 | if (dlm_is_recovery_lock(res->lockname.name, |
| @@ -1169,7 +1155,7 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, | |||
| 1169 | u8 flags, u8 master) | 1155 | u8 flags, u8 master) |
| 1170 | { | 1156 | { |
| 1171 | /* mres here is one full page */ | 1157 | /* mres here is one full page */ |
| 1172 | memset(mres, 0, PAGE_SIZE); | 1158 | clear_page(mres); |
| 1173 | mres->lockname_len = namelen; | 1159 | mres->lockname_len = namelen; |
| 1174 | memcpy(mres->lockname, lockname, namelen); | 1160 | memcpy(mres->lockname, lockname, namelen); |
| 1175 | mres->num_locks = 0; | 1161 | mres->num_locks = 0; |
| @@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
| 1252 | struct dlm_migratable_lockres *mres, | 1238 | struct dlm_migratable_lockres *mres, |
| 1253 | u8 send_to, u8 flags) | 1239 | u8 send_to, u8 flags) |
| 1254 | { | 1240 | { |
| 1255 | struct list_head *queue, *iter; | 1241 | struct list_head *queue; |
| 1256 | int total_locks, i; | 1242 | int total_locks, i; |
| 1257 | u64 mig_cookie = 0; | 1243 | u64 mig_cookie = 0; |
| 1258 | struct dlm_lock *lock; | 1244 | struct dlm_lock *lock; |
| @@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
| 1278 | total_locks = 0; | 1264 | total_locks = 0; |
| 1279 | for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { | 1265 | for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { |
| 1280 | queue = dlm_list_idx_to_ptr(res, i); | 1266 | queue = dlm_list_idx_to_ptr(res, i); |
| 1281 | list_for_each(iter, queue) { | 1267 | list_for_each_entry(lock, queue, list) { |
| 1282 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 1283 | |||
| 1284 | /* add another lock. */ | 1268 | /* add another lock. */ |
| 1285 | total_locks++; | 1269 | total_locks++; |
| 1286 | if (!dlm_add_lock_to_array(lock, mres, i)) | 1270 | if (!dlm_add_lock_to_array(lock, mres, i)) |
| @@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
| 1717 | struct dlm_lockstatus *lksb = NULL; | 1701 | struct dlm_lockstatus *lksb = NULL; |
| 1718 | int ret = 0; | 1702 | int ret = 0; |
| 1719 | int i, j, bad; | 1703 | int i, j, bad; |
| 1720 | struct list_head *iter; | ||
| 1721 | struct dlm_lock *lock = NULL; | 1704 | struct dlm_lock *lock = NULL; |
| 1722 | u8 from = O2NM_MAX_NODES; | 1705 | u8 from = O2NM_MAX_NODES; |
| 1723 | unsigned int added = 0; | 1706 | unsigned int added = 0; |
| @@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
| 1755 | spin_lock(&res->spinlock); | 1738 | spin_lock(&res->spinlock); |
| 1756 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { | 1739 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
| 1757 | tmpq = dlm_list_idx_to_ptr(res, j); | 1740 | tmpq = dlm_list_idx_to_ptr(res, j); |
| 1758 | list_for_each(iter, tmpq) { | 1741 | list_for_each_entry(lock, tmpq, list) { |
| 1759 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 1760 | if (lock->ml.cookie != ml->cookie) | 1742 | if (lock->ml.cookie != ml->cookie) |
| 1761 | lock = NULL; | 1743 | lock = NULL; |
| 1762 | else | 1744 | else |
| @@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
| 1930 | struct dlm_lock_resource *res) | 1912 | struct dlm_lock_resource *res) |
| 1931 | { | 1913 | { |
| 1932 | int i; | 1914 | int i; |
| 1933 | struct list_head *queue, *iter, *iter2; | 1915 | struct list_head *queue; |
| 1934 | struct dlm_lock *lock; | 1916 | struct dlm_lock *lock, *next; |
| 1935 | 1917 | ||
| 1936 | res->state |= DLM_LOCK_RES_RECOVERING; | 1918 | res->state |= DLM_LOCK_RES_RECOVERING; |
| 1937 | if (!list_empty(&res->recovering)) { | 1919 | if (!list_empty(&res->recovering)) { |
| @@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
| 1947 | /* find any pending locks and put them back on proper list */ | 1929 | /* find any pending locks and put them back on proper list */ |
| 1948 | for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { | 1930 | for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { |
| 1949 | queue = dlm_list_idx_to_ptr(res, i); | 1931 | queue = dlm_list_idx_to_ptr(res, i); |
| 1950 | list_for_each_safe(iter, iter2, queue) { | 1932 | list_for_each_entry_safe(lock, next, queue, list) { |
| 1951 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 1952 | dlm_lock_get(lock); | 1933 | dlm_lock_get(lock); |
| 1953 | if (lock->convert_pending) { | 1934 | if (lock->convert_pending) { |
| 1954 | /* move converting lock back to granted */ | 1935 | /* move converting lock back to granted */ |
| @@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 2013 | u8 dead_node, u8 new_master) | 1994 | u8 dead_node, u8 new_master) |
| 2014 | { | 1995 | { |
| 2015 | int i; | 1996 | int i; |
| 2016 | struct list_head *iter, *iter2; | ||
| 2017 | struct hlist_node *hash_iter; | 1997 | struct hlist_node *hash_iter; |
| 2018 | struct hlist_head *bucket; | 1998 | struct hlist_head *bucket; |
| 2019 | 1999 | struct dlm_lock_resource *res, *next; | |
| 2020 | struct dlm_lock_resource *res; | ||
| 2021 | 2000 | ||
| 2022 | mlog_entry_void(); | 2001 | mlog_entry_void(); |
| 2023 | 2002 | ||
| 2024 | assert_spin_locked(&dlm->spinlock); | 2003 | assert_spin_locked(&dlm->spinlock); |
| 2025 | 2004 | ||
| 2026 | list_for_each_safe(iter, iter2, &dlm->reco.resources) { | 2005 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { |
| 2027 | res = list_entry (iter, struct dlm_lock_resource, recovering); | ||
| 2028 | if (res->owner == dead_node) { | 2006 | if (res->owner == dead_node) { |
| 2029 | list_del_init(&res->recovering); | 2007 | list_del_init(&res->recovering); |
| 2030 | spin_lock(&res->spinlock); | 2008 | spin_lock(&res->spinlock); |
| @@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local) | |||
| 2099 | static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | 2077 | static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, |
| 2100 | struct dlm_lock_resource *res, u8 dead_node) | 2078 | struct dlm_lock_resource *res, u8 dead_node) |
| 2101 | { | 2079 | { |
| 2102 | struct list_head *iter, *queue; | 2080 | struct list_head *queue; |
| 2103 | struct dlm_lock *lock; | 2081 | struct dlm_lock *lock; |
| 2104 | int blank_lvb = 0, local = 0; | 2082 | int blank_lvb = 0, local = 0; |
| 2105 | int i; | 2083 | int i; |
| @@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | |||
| 2121 | 2099 | ||
| 2122 | for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { | 2100 | for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { |
| 2123 | queue = dlm_list_idx_to_ptr(res, i); | 2101 | queue = dlm_list_idx_to_ptr(res, i); |
| 2124 | list_for_each(iter, queue) { | 2102 | list_for_each_entry(lock, queue, list) { |
| 2125 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2126 | if (lock->ml.node == search_node) { | 2103 | if (lock->ml.node == search_node) { |
| 2127 | if (dlm_lvb_needs_invalidation(lock, local)) { | 2104 | if (dlm_lvb_needs_invalidation(lock, local)) { |
| 2128 | /* zero the lksb lvb and lockres lvb */ | 2105 | /* zero the lksb lvb and lockres lvb */ |
| @@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, | |||
| 2143 | static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | 2120 | static void dlm_free_dead_locks(struct dlm_ctxt *dlm, |
| 2144 | struct dlm_lock_resource *res, u8 dead_node) | 2121 | struct dlm_lock_resource *res, u8 dead_node) |
| 2145 | { | 2122 | { |
| 2146 | struct list_head *iter, *tmpiter; | 2123 | struct dlm_lock *lock, *next; |
| 2147 | struct dlm_lock *lock; | ||
| 2148 | unsigned int freed = 0; | 2124 | unsigned int freed = 0; |
| 2149 | 2125 | ||
| 2150 | /* this node is the lockres master: | 2126 | /* this node is the lockres master: |
| @@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
| 2155 | assert_spin_locked(&res->spinlock); | 2131 | assert_spin_locked(&res->spinlock); |
| 2156 | 2132 | ||
| 2157 | /* TODO: check pending_asts, pending_basts here */ | 2133 | /* TODO: check pending_asts, pending_basts here */ |
| 2158 | list_for_each_safe(iter, tmpiter, &res->granted) { | 2134 | list_for_each_entry_safe(lock, next, &res->granted, list) { |
| 2159 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2160 | if (lock->ml.node == dead_node) { | 2135 | if (lock->ml.node == dead_node) { |
| 2161 | list_del_init(&lock->list); | 2136 | list_del_init(&lock->list); |
| 2162 | dlm_lock_put(lock); | 2137 | dlm_lock_put(lock); |
| 2163 | freed++; | 2138 | freed++; |
| 2164 | } | 2139 | } |
| 2165 | } | 2140 | } |
| 2166 | list_for_each_safe(iter, tmpiter, &res->converting) { | 2141 | list_for_each_entry_safe(lock, next, &res->converting, list) { |
| 2167 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2168 | if (lock->ml.node == dead_node) { | 2142 | if (lock->ml.node == dead_node) { |
| 2169 | list_del_init(&lock->list); | 2143 | list_del_init(&lock->list); |
| 2170 | dlm_lock_put(lock); | 2144 | dlm_lock_put(lock); |
| 2171 | freed++; | 2145 | freed++; |
| 2172 | } | 2146 | } |
| 2173 | } | 2147 | } |
| 2174 | list_for_each_safe(iter, tmpiter, &res->blocked) { | 2148 | list_for_each_entry_safe(lock, next, &res->blocked, list) { |
| 2175 | lock = list_entry (iter, struct dlm_lock, list); | ||
| 2176 | if (lock->ml.node == dead_node) { | 2149 | if (lock->ml.node == dead_node) { |
| 2177 | list_del_init(&lock->list); | 2150 | list_del_init(&lock->list); |
| 2178 | dlm_lock_put(lock); | 2151 | dlm_lock_put(lock); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index d1bd305ef0d7..f71250ed166f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level) | |||
| 600 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, | 600 | static void lockres_set_flags(struct ocfs2_lock_res *lockres, |
| 601 | unsigned long newflags) | 601 | unsigned long newflags) |
| 602 | { | 602 | { |
| 603 | struct list_head *pos, *tmp; | 603 | struct ocfs2_mask_waiter *mw, *tmp; |
| 604 | struct ocfs2_mask_waiter *mw; | ||
| 605 | 604 | ||
| 606 | assert_spin_locked(&lockres->l_lock); | 605 | assert_spin_locked(&lockres->l_lock); |
| 607 | 606 | ||
| 608 | lockres->l_flags = newflags; | 607 | lockres->l_flags = newflags; |
| 609 | 608 | ||
| 610 | list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) { | 609 | list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { |
| 611 | mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item); | ||
| 612 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) | 610 | if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) |
| 613 | continue; | 611 | continue; |
| 614 | 612 | ||
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h index f226b2207628..ff257628af16 100644 --- a/fs/ocfs2/endian.h +++ b/fs/ocfs2/endian.h | |||
| @@ -32,6 +32,11 @@ static inline void le32_add_cpu(__le32 *var, u32 val) | |||
| 32 | *var = cpu_to_le32(le32_to_cpu(*var) + val); | 32 | *var = cpu_to_le32(le32_to_cpu(*var) + val); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | static inline void le64_add_cpu(__le64 *var, u64 val) | ||
| 36 | { | ||
| 37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | ||
| 38 | } | ||
| 39 | |||
| 35 | static inline void le32_and_cpu(__le32 *var, u32 val) | 40 | static inline void le32_and_cpu(__le32 *var, u32 val) |
| 36 | { | 41 | { |
| 37 | *var = cpu_to_le32(le32_to_cpu(*var) & val); | 42 | *var = cpu_to_le32(le32_to_cpu(*var) & val); |
diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index 5b77ee7866ef..e08bed9e45a0 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | #ifndef OCFS2_EXPORT_H | 26 | #ifndef OCFS2_EXPORT_H |
| 27 | #define OCFS2_EXPORT_H | 27 | #define OCFS2_EXPORT_H |
| 28 | 28 | ||
| 29 | #include <linux/exportfs.h> | ||
| 30 | |||
| 29 | extern struct export_operations ocfs2_export_ops; | 31 | extern struct export_operations ocfs2_export_ops; |
| 30 | 32 | ||
| 31 | #endif /* OCFS2_EXPORT_H */ | 33 | #endif /* OCFS2_EXPORT_H */ |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index ba2b2ab1c6e4..03c1d365c78b 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
| @@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, | |||
| 109 | */ | 109 | */ |
| 110 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) | 110 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) |
| 111 | { | 111 | { |
| 112 | struct list_head *p, *n; | 112 | struct ocfs2_extent_map_item *emi, *n; |
| 113 | struct ocfs2_extent_map_item *emi; | ||
| 114 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 113 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| 115 | struct ocfs2_extent_map *em = &oi->ip_extent_map; | 114 | struct ocfs2_extent_map *em = &oi->ip_extent_map; |
| 116 | LIST_HEAD(tmp_list); | 115 | LIST_HEAD(tmp_list); |
| 117 | unsigned int range; | 116 | unsigned int range; |
| 118 | 117 | ||
| 119 | spin_lock(&oi->ip_lock); | 118 | spin_lock(&oi->ip_lock); |
| 120 | list_for_each_safe(p, n, &em->em_list) { | 119 | list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { |
| 121 | emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); | ||
| 122 | |||
| 123 | if (emi->ei_cpos >= cpos) { | 120 | if (emi->ei_cpos >= cpos) { |
| 124 | /* Full truncate of this record. */ | 121 | /* Full truncate of this record. */ |
| 125 | list_move(&emi->ei_list, &tmp_list); | 122 | list_move(&emi->ei_list, &tmp_list); |
| @@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) | |||
| 136 | } | 133 | } |
| 137 | spin_unlock(&oi->ip_lock); | 134 | spin_unlock(&oi->ip_lock); |
| 138 | 135 | ||
| 139 | list_for_each_safe(p, n, &tmp_list) { | 136 | list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { |
| 140 | emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); | ||
| 141 | list_del(&emi->ei_list); | 137 | list_del(&emi->ei_list); |
| 142 | kfree(emi); | 138 | kfree(emi); |
| 143 | } | 139 | } |
| @@ -377,37 +373,6 @@ out: | |||
| 377 | return ret; | 373 | return ret; |
| 378 | } | 374 | } |
| 379 | 375 | ||
| 380 | /* | ||
| 381 | * Return the index of the extent record which contains cluster #v_cluster. | ||
| 382 | * -1 is returned if it was not found. | ||
| 383 | * | ||
| 384 | * Should work fine on interior and exterior nodes. | ||
| 385 | */ | ||
| 386 | static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, | ||
| 387 | u32 v_cluster) | ||
| 388 | { | ||
| 389 | int ret = -1; | ||
| 390 | int i; | ||
| 391 | struct ocfs2_extent_rec *rec; | ||
| 392 | u32 rec_end, rec_start, clusters; | ||
| 393 | |||
| 394 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
| 395 | rec = &el->l_recs[i]; | ||
| 396 | |||
| 397 | rec_start = le32_to_cpu(rec->e_cpos); | ||
| 398 | clusters = ocfs2_rec_clusters(el, rec); | ||
| 399 | |||
| 400 | rec_end = rec_start + clusters; | ||
| 401 | |||
| 402 | if (v_cluster >= rec_start && v_cluster < rec_end) { | ||
| 403 | ret = i; | ||
| 404 | break; | ||
| 405 | } | ||
| 406 | } | ||
| 407 | |||
| 408 | return ret; | ||
| 409 | } | ||
| 410 | |||
| 411 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, |
| 412 | u32 *p_cluster, u32 *num_clusters, | 377 | u32 *p_cluster, u32 *num_clusters, |
| 413 | unsigned int *extent_flags) | 378 | unsigned int *extent_flags) |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4979b6675717..f04c7aa834cb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -263,6 +263,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
| 263 | int status; | 263 | int status; |
| 264 | handle_t *handle; | 264 | handle_t *handle; |
| 265 | struct ocfs2_dinode *di; | 265 | struct ocfs2_dinode *di; |
| 266 | u64 cluster_bytes; | ||
| 266 | 267 | ||
| 267 | mlog_entry_void(); | 268 | mlog_entry_void(); |
| 268 | 269 | ||
| @@ -286,7 +287,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
| 286 | /* | 287 | /* |
| 287 | * Do this before setting i_size. | 288 | * Do this before setting i_size. |
| 288 | */ | 289 | */ |
| 289 | status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size); | 290 | cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); |
| 291 | status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, | ||
| 292 | cluster_bytes); | ||
| 290 | if (status) { | 293 | if (status) { |
| 291 | mlog_errno(status); | 294 | mlog_errno(status); |
| 292 | goto out_commit; | 295 | goto out_commit; |
| @@ -326,9 +329,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 326 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 329 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 327 | (unsigned long long)new_i_size); | 330 | (unsigned long long)new_i_size); |
| 328 | 331 | ||
| 329 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
| 330 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
| 331 | |||
| 332 | fe = (struct ocfs2_dinode *) di_bh->b_data; | 332 | fe = (struct ocfs2_dinode *) di_bh->b_data; |
| 333 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 333 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
| 334 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 334 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); |
| @@ -363,16 +363,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 363 | if (new_i_size == le64_to_cpu(fe->i_size)) | 363 | if (new_i_size == le64_to_cpu(fe->i_size)) |
| 364 | goto bail; | 364 | goto bail; |
| 365 | 365 | ||
| 366 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 367 | |||
| 366 | /* This forces other nodes to sync and drop their pages. Do | 368 | /* This forces other nodes to sync and drop their pages. Do |
| 367 | * this even if we have a truncate without allocation change - | 369 | * this even if we have a truncate without allocation change - |
| 368 | * ocfs2 cluster sizes can be much greater than page size, so | 370 | * ocfs2 cluster sizes can be much greater than page size, so |
| 369 | * we have to truncate them anyway. */ | 371 | * we have to truncate them anyway. */ |
| 370 | status = ocfs2_data_lock(inode, 1); | 372 | status = ocfs2_data_lock(inode, 1); |
| 371 | if (status < 0) { | 373 | if (status < 0) { |
| 374 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 375 | |||
| 372 | mlog_errno(status); | 376 | mlog_errno(status); |
| 373 | goto bail; | 377 | goto bail; |
| 374 | } | 378 | } |
| 375 | 379 | ||
| 380 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
| 381 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
| 382 | |||
| 376 | /* alright, we're going to need to do a full blown alloc size | 383 | /* alright, we're going to need to do a full blown alloc size |
| 377 | * change. Orphan the inode so that recovery can complete the | 384 | * change. Orphan the inode so that recovery can complete the |
| 378 | * truncate if necessary. This does the task of marking | 385 | * truncate if necessary. This does the task of marking |
| @@ -399,6 +406,8 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
| 399 | bail_unlock_data: | 406 | bail_unlock_data: |
| 400 | ocfs2_data_unlock(inode, 1); | 407 | ocfs2_data_unlock(inode, 1); |
| 401 | 408 | ||
| 409 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 410 | |||
| 402 | bail: | 411 | bail: |
| 403 | 412 | ||
| 404 | mlog_exit(status); | 413 | mlog_exit(status); |
| @@ -419,6 +428,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
| 419 | struct inode *inode, | 428 | struct inode *inode, |
| 420 | u32 *logical_offset, | 429 | u32 *logical_offset, |
| 421 | u32 clusters_to_add, | 430 | u32 clusters_to_add, |
| 431 | int mark_unwritten, | ||
| 422 | struct buffer_head *fe_bh, | 432 | struct buffer_head *fe_bh, |
| 423 | handle_t *handle, | 433 | handle_t *handle, |
| 424 | struct ocfs2_alloc_context *data_ac, | 434 | struct ocfs2_alloc_context *data_ac, |
| @@ -431,9 +441,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
| 431 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | 441 | enum ocfs2_alloc_restarted reason = RESTART_NONE; |
| 432 | u32 bit_off, num_bits; | 442 | u32 bit_off, num_bits; |
| 433 | u64 block; | 443 | u64 block; |
| 444 | u8 flags = 0; | ||
| 434 | 445 | ||
| 435 | BUG_ON(!clusters_to_add); | 446 | BUG_ON(!clusters_to_add); |
| 436 | 447 | ||
| 448 | if (mark_unwritten) | ||
| 449 | flags = OCFS2_EXT_UNWRITTEN; | ||
| 450 | |||
| 437 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | 451 | free_extents = ocfs2_num_free_extents(osb, inode, fe); |
| 438 | if (free_extents < 0) { | 452 | if (free_extents < 0) { |
| 439 | status = free_extents; | 453 | status = free_extents; |
| @@ -483,7 +497,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
| 483 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 497 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 484 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | 498 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, |
| 485 | *logical_offset, block, num_bits, | 499 | *logical_offset, block, num_bits, |
| 486 | meta_ac); | 500 | flags, meta_ac); |
| 487 | if (status < 0) { | 501 | if (status < 0) { |
| 488 | mlog_errno(status); | 502 | mlog_errno(status); |
| 489 | goto leave; | 503 | goto leave; |
| @@ -516,25 +530,31 @@ leave: | |||
| 516 | * For a given allocation, determine which allocators will need to be | 530 | * For a given allocation, determine which allocators will need to be |
| 517 | * accessed, and lock them, reserving the appropriate number of bits. | 531 | * accessed, and lock them, reserving the appropriate number of bits. |
| 518 | * | 532 | * |
| 519 | * Called from ocfs2_extend_allocation() for file systems which don't | 533 | * Sparse file systems call this from ocfs2_write_begin_nolock() |
| 520 | * support holes, and from ocfs2_write() for file systems which | 534 | * and ocfs2_allocate_unwritten_extents(). |
| 521 | * understand sparse inodes. | 535 | * |
| 536 | * File systems which don't support holes call this from | ||
| 537 | * ocfs2_extend_allocation(). | ||
| 522 | */ | 538 | */ |
| 523 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | 539 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, |
| 524 | u32 clusters_to_add, | 540 | u32 clusters_to_add, u32 extents_to_split, |
| 525 | struct ocfs2_alloc_context **data_ac, | 541 | struct ocfs2_alloc_context **data_ac, |
| 526 | struct ocfs2_alloc_context **meta_ac) | 542 | struct ocfs2_alloc_context **meta_ac) |
| 527 | { | 543 | { |
| 528 | int ret, num_free_extents; | 544 | int ret = 0, num_free_extents; |
| 545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
| 529 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 530 | 547 | ||
| 531 | *meta_ac = NULL; | 548 | *meta_ac = NULL; |
| 532 | *data_ac = NULL; | 549 | if (data_ac) |
| 550 | *data_ac = NULL; | ||
| 551 | |||
| 552 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
| 533 | 553 | ||
| 534 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 554 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
| 535 | "clusters_to_add = %u\n", | 555 | "clusters_to_add = %u, extents_to_split = %u\n", |
| 536 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 556 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), |
| 537 | le32_to_cpu(di->i_clusters), clusters_to_add); | 557 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); |
| 538 | 558 | ||
| 539 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 559 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); |
| 540 | if (num_free_extents < 0) { | 560 | if (num_free_extents < 0) { |
| @@ -552,9 +572,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
| 552 | * | 572 | * |
| 553 | * Most of the time we'll only be seeing this 1 cluster at a time | 573 | * Most of the time we'll only be seeing this 1 cluster at a time |
| 554 | * anyway. | 574 | * anyway. |
| 575 | * | ||
| 576 | * Always lock for any unwritten extents - we might want to | ||
| 577 | * add blocks during a split. | ||
| 555 | */ | 578 | */ |
| 556 | if (!num_free_extents || | 579 | if (!num_free_extents || |
| 557 | (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { | 580 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { |
| 558 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | 581 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); |
| 559 | if (ret < 0) { | 582 | if (ret < 0) { |
| 560 | if (ret != -ENOSPC) | 583 | if (ret != -ENOSPC) |
| @@ -563,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
| 563 | } | 586 | } |
| 564 | } | 587 | } |
| 565 | 588 | ||
| 589 | if (clusters_to_add == 0) | ||
| 590 | goto out; | ||
| 591 | |||
| 566 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | 592 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); |
| 567 | if (ret < 0) { | 593 | if (ret < 0) { |
| 568 | if (ret != -ENOSPC) | 594 | if (ret != -ENOSPC) |
| @@ -585,14 +611,13 @@ out: | |||
| 585 | return ret; | 611 | return ret; |
| 586 | } | 612 | } |
| 587 | 613 | ||
| 588 | static int ocfs2_extend_allocation(struct inode *inode, | 614 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, |
| 589 | u32 clusters_to_add) | 615 | u32 clusters_to_add, int mark_unwritten) |
| 590 | { | 616 | { |
| 591 | int status = 0; | 617 | int status = 0; |
| 592 | int restart_func = 0; | 618 | int restart_func = 0; |
| 593 | int drop_alloc_sem = 0; | ||
| 594 | int credits; | 619 | int credits; |
| 595 | u32 prev_clusters, logical_start; | 620 | u32 prev_clusters; |
| 596 | struct buffer_head *bh = NULL; | 621 | struct buffer_head *bh = NULL; |
| 597 | struct ocfs2_dinode *fe = NULL; | 622 | struct ocfs2_dinode *fe = NULL; |
| 598 | handle_t *handle = NULL; | 623 | handle_t *handle = NULL; |
| @@ -607,7 +632,7 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
| 607 | * This function only exists for file systems which don't | 632 | * This function only exists for file systems which don't |
| 608 | * support holes. | 633 | * support holes. |
| 609 | */ | 634 | */ |
| 610 | BUG_ON(ocfs2_sparse_alloc(osb)); | 635 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
| 611 | 636 | ||
| 612 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 637 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, |
| 613 | OCFS2_BH_CACHED, inode); | 638 | OCFS2_BH_CACHED, inode); |
| @@ -623,19 +648,10 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
| 623 | goto leave; | 648 | goto leave; |
| 624 | } | 649 | } |
| 625 | 650 | ||
| 626 | logical_start = OCFS2_I(inode)->ip_clusters; | ||
| 627 | |||
| 628 | restart_all: | 651 | restart_all: |
| 629 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 652 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
| 630 | 653 | ||
| 631 | /* blocks peope in read/write from reading our allocation | 654 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, |
| 632 | * until we're done changing it. We depend on i_mutex to block | ||
| 633 | * other extend/truncate calls while we're here. Ordering wrt | ||
| 634 | * start_trans is important here -- always do it before! */ | ||
| 635 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 636 | drop_alloc_sem = 1; | ||
| 637 | |||
| 638 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, | ||
| 639 | &meta_ac); | 655 | &meta_ac); |
| 640 | if (status) { | 656 | if (status) { |
| 641 | mlog_errno(status); | 657 | mlog_errno(status); |
| @@ -668,6 +684,7 @@ restarted_transaction: | |||
| 668 | inode, | 684 | inode, |
| 669 | &logical_start, | 685 | &logical_start, |
| 670 | clusters_to_add, | 686 | clusters_to_add, |
| 687 | mark_unwritten, | ||
| 671 | bh, | 688 | bh, |
| 672 | handle, | 689 | handle, |
| 673 | data_ac, | 690 | data_ac, |
| @@ -720,10 +737,6 @@ restarted_transaction: | |||
| 720 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 737 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); |
| 721 | 738 | ||
| 722 | leave: | 739 | leave: |
| 723 | if (drop_alloc_sem) { | ||
| 724 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 725 | drop_alloc_sem = 0; | ||
| 726 | } | ||
| 727 | if (handle) { | 740 | if (handle) { |
| 728 | ocfs2_commit_trans(osb, handle); | 741 | ocfs2_commit_trans(osb, handle); |
| 729 | handle = NULL; | 742 | handle = NULL; |
| @@ -749,6 +762,25 @@ leave: | |||
| 749 | return status; | 762 | return status; |
| 750 | } | 763 | } |
| 751 | 764 | ||
| 765 | static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
| 766 | u32 clusters_to_add, int mark_unwritten) | ||
| 767 | { | ||
| 768 | int ret; | ||
| 769 | |||
| 770 | /* | ||
| 771 | * The alloc sem blocks peope in read/write from reading our | ||
| 772 | * allocation until we're done changing it. We depend on | ||
| 773 | * i_mutex to block other extend/truncate calls while we're | ||
| 774 | * here. | ||
| 775 | */ | ||
| 776 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 777 | ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add, | ||
| 778 | mark_unwritten); | ||
| 779 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 780 | |||
| 781 | return ret; | ||
| 782 | } | ||
| 783 | |||
| 752 | /* Some parts of this taken from generic_cont_expand, which turned out | 784 | /* Some parts of this taken from generic_cont_expand, which turned out |
| 753 | * to be too fragile to do exactly what we need without us having to | 785 | * to be too fragile to do exactly what we need without us having to |
| 754 | * worry about recursive locking in ->prepare_write() and | 786 | * worry about recursive locking in ->prepare_write() and |
| @@ -890,7 +922,9 @@ static int ocfs2_extend_file(struct inode *inode, | |||
| 890 | } | 922 | } |
| 891 | 923 | ||
| 892 | if (clusters_to_add) { | 924 | if (clusters_to_add) { |
| 893 | ret = ocfs2_extend_allocation(inode, clusters_to_add); | 925 | ret = ocfs2_extend_allocation(inode, |
| 926 | OCFS2_I(inode)->ip_clusters, | ||
| 927 | clusters_to_add, 0); | ||
| 894 | if (ret < 0) { | 928 | if (ret < 0) { |
| 895 | mlog_errno(ret); | 929 | mlog_errno(ret); |
| 896 | goto out_unlock; | 930 | goto out_unlock; |
| @@ -995,6 +1029,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 995 | goto bail_unlock; | 1029 | goto bail_unlock; |
| 996 | } | 1030 | } |
| 997 | 1031 | ||
| 1032 | /* | ||
| 1033 | * This will intentionally not wind up calling vmtruncate(), | ||
| 1034 | * since all the work for a size change has been done above. | ||
| 1035 | * Otherwise, we could get into problems with truncate as | ||
| 1036 | * ip_alloc_sem is used there to protect against i_size | ||
| 1037 | * changes. | ||
| 1038 | */ | ||
| 998 | status = inode_setattr(inode, attr); | 1039 | status = inode_setattr(inode, attr); |
| 999 | if (status < 0) { | 1040 | if (status < 0) { |
| 1000 | mlog_errno(status); | 1041 | mlog_errno(status); |
| @@ -1070,17 +1111,16 @@ out: | |||
| 1070 | return ret; | 1111 | return ret; |
| 1071 | } | 1112 | } |
| 1072 | 1113 | ||
| 1073 | static int ocfs2_write_remove_suid(struct inode *inode) | 1114 | static int __ocfs2_write_remove_suid(struct inode *inode, |
| 1115 | struct buffer_head *bh) | ||
| 1074 | { | 1116 | { |
| 1075 | int ret; | 1117 | int ret; |
| 1076 | struct buffer_head *bh = NULL; | ||
| 1077 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1078 | handle_t *handle; | 1118 | handle_t *handle; |
| 1079 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1119 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1080 | struct ocfs2_dinode *di; | 1120 | struct ocfs2_dinode *di; |
| 1081 | 1121 | ||
| 1082 | mlog_entry("(Inode %llu, mode 0%o)\n", | 1122 | mlog_entry("(Inode %llu, mode 0%o)\n", |
| 1083 | (unsigned long long)oi->ip_blkno, inode->i_mode); | 1123 | (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); |
| 1084 | 1124 | ||
| 1085 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1125 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
| 1086 | if (handle == NULL) { | 1126 | if (handle == NULL) { |
| @@ -1089,17 +1129,11 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
| 1089 | goto out; | 1129 | goto out; |
| 1090 | } | 1130 | } |
| 1091 | 1131 | ||
| 1092 | ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
| 1093 | if (ret < 0) { | ||
| 1094 | mlog_errno(ret); | ||
| 1095 | goto out_trans; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | ret = ocfs2_journal_access(handle, inode, bh, | 1132 | ret = ocfs2_journal_access(handle, inode, bh, |
| 1099 | OCFS2_JOURNAL_ACCESS_WRITE); | 1133 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 1100 | if (ret < 0) { | 1134 | if (ret < 0) { |
| 1101 | mlog_errno(ret); | 1135 | mlog_errno(ret); |
| 1102 | goto out_bh; | 1136 | goto out_trans; |
| 1103 | } | 1137 | } |
| 1104 | 1138 | ||
| 1105 | inode->i_mode &= ~S_ISUID; | 1139 | inode->i_mode &= ~S_ISUID; |
| @@ -1112,8 +1146,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
| 1112 | ret = ocfs2_journal_dirty(handle, bh); | 1146 | ret = ocfs2_journal_dirty(handle, bh); |
| 1113 | if (ret < 0) | 1147 | if (ret < 0) |
| 1114 | mlog_errno(ret); | 1148 | mlog_errno(ret); |
| 1115 | out_bh: | 1149 | |
| 1116 | brelse(bh); | ||
| 1117 | out_trans: | 1150 | out_trans: |
| 1118 | ocfs2_commit_trans(osb, handle); | 1151 | ocfs2_commit_trans(osb, handle); |
| 1119 | out: | 1152 | out: |
| @@ -1159,6 +1192,460 @@ out: | |||
| 1159 | return ret; | 1192 | return ret; |
| 1160 | } | 1193 | } |
| 1161 | 1194 | ||
| 1195 | static int ocfs2_write_remove_suid(struct inode *inode) | ||
| 1196 | { | ||
| 1197 | int ret; | ||
| 1198 | struct buffer_head *bh = NULL; | ||
| 1199 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1200 | |||
| 1201 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
| 1202 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
| 1203 | if (ret < 0) { | ||
| 1204 | mlog_errno(ret); | ||
| 1205 | goto out; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | ret = __ocfs2_write_remove_suid(inode, bh); | ||
| 1209 | out: | ||
| 1210 | brelse(bh); | ||
| 1211 | return ret; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | /* | ||
| 1215 | * Allocate enough extents to cover the region starting at byte offset | ||
| 1216 | * start for len bytes. Existing extents are skipped, any extents | ||
| 1217 | * added are marked as "unwritten". | ||
| 1218 | */ | ||
| 1219 | static int ocfs2_allocate_unwritten_extents(struct inode *inode, | ||
| 1220 | u64 start, u64 len) | ||
| 1221 | { | ||
| 1222 | int ret; | ||
| 1223 | u32 cpos, phys_cpos, clusters, alloc_size; | ||
| 1224 | |||
| 1225 | /* | ||
| 1226 | * We consider both start and len to be inclusive. | ||
| 1227 | */ | ||
| 1228 | cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
| 1229 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); | ||
| 1230 | clusters -= cpos; | ||
| 1231 | |||
| 1232 | while (clusters) { | ||
| 1233 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
| 1234 | &alloc_size, NULL); | ||
| 1235 | if (ret) { | ||
| 1236 | mlog_errno(ret); | ||
| 1237 | goto out; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | /* | ||
| 1241 | * Hole or existing extent len can be arbitrary, so | ||
| 1242 | * cap it to our own allocation request. | ||
| 1243 | */ | ||
| 1244 | if (alloc_size > clusters) | ||
| 1245 | alloc_size = clusters; | ||
| 1246 | |||
| 1247 | if (phys_cpos) { | ||
| 1248 | /* | ||
| 1249 | * We already have an allocation at this | ||
| 1250 | * region so we can safely skip it. | ||
| 1251 | */ | ||
| 1252 | goto next; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); | ||
| 1256 | if (ret) { | ||
| 1257 | if (ret != -ENOSPC) | ||
| 1258 | mlog_errno(ret); | ||
| 1259 | goto out; | ||
| 1260 | } | ||
| 1261 | |||
| 1262 | next: | ||
| 1263 | cpos += alloc_size; | ||
| 1264 | clusters -= alloc_size; | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | ret = 0; | ||
| 1268 | out: | ||
| 1269 | return ret; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | static int __ocfs2_remove_inode_range(struct inode *inode, | ||
| 1273 | struct buffer_head *di_bh, | ||
| 1274 | u32 cpos, u32 phys_cpos, u32 len, | ||
| 1275 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 1276 | { | ||
| 1277 | int ret; | ||
| 1278 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 1279 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1280 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 1281 | handle_t *handle; | ||
| 1282 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 1283 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1284 | |||
| 1285 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||
| 1286 | if (ret) { | ||
| 1287 | mlog_errno(ret); | ||
| 1288 | return ret; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | mutex_lock(&tl_inode->i_mutex); | ||
| 1292 | |||
| 1293 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 1294 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 1295 | if (ret < 0) { | ||
| 1296 | mlog_errno(ret); | ||
| 1297 | goto out; | ||
| 1298 | } | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
| 1302 | if (handle == NULL) { | ||
| 1303 | ret = -ENOMEM; | ||
| 1304 | mlog_errno(ret); | ||
| 1305 | goto out; | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
| 1309 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1310 | if (ret) { | ||
| 1311 | mlog_errno(ret); | ||
| 1312 | goto out; | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||
| 1316 | dealloc); | ||
| 1317 | if (ret) { | ||
| 1318 | mlog_errno(ret); | ||
| 1319 | goto out_commit; | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | OCFS2_I(inode)->ip_clusters -= len; | ||
| 1323 | di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||
| 1324 | |||
| 1325 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
| 1326 | if (ret) { | ||
| 1327 | mlog_errno(ret); | ||
| 1328 | goto out_commit; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
| 1332 | if (ret) | ||
| 1333 | mlog_errno(ret); | ||
| 1334 | |||
| 1335 | out_commit: | ||
| 1336 | ocfs2_commit_trans(osb, handle); | ||
| 1337 | out: | ||
| 1338 | mutex_unlock(&tl_inode->i_mutex); | ||
| 1339 | |||
| 1340 | if (meta_ac) | ||
| 1341 | ocfs2_free_alloc_context(meta_ac); | ||
| 1342 | |||
| 1343 | return ret; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | /* | ||
| 1347 | * Truncate a byte range, avoiding pages within partial clusters. This | ||
| 1348 | * preserves those pages for the zeroing code to write to. | ||
| 1349 | */ | ||
| 1350 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||
| 1351 | u64 byte_len) | ||
| 1352 | { | ||
| 1353 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1354 | loff_t start, end; | ||
| 1355 | struct address_space *mapping = inode->i_mapping; | ||
| 1356 | |||
| 1357 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||
| 1358 | end = byte_start + byte_len; | ||
| 1359 | end = end & ~(osb->s_clustersize - 1); | ||
| 1360 | |||
| 1361 | if (start < end) { | ||
| 1362 | unmap_mapping_range(mapping, start, end - start, 0); | ||
| 1363 | truncate_inode_pages_range(mapping, start, end - 1); | ||
| 1364 | } | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | static int ocfs2_zero_partial_clusters(struct inode *inode, | ||
| 1368 | u64 start, u64 len) | ||
| 1369 | { | ||
| 1370 | int ret = 0; | ||
| 1371 | u64 tmpend, end = start + len; | ||
| 1372 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1373 | unsigned int csize = osb->s_clustersize; | ||
| 1374 | handle_t *handle; | ||
| 1375 | |||
| 1376 | /* | ||
| 1377 | * The "start" and "end" values are NOT necessarily part of | ||
| 1378 | * the range whose allocation is being deleted. Rather, this | ||
| 1379 | * is what the user passed in with the request. We must zero | ||
| 1380 | * partial clusters here. There's no need to worry about | ||
| 1381 | * physical allocation - the zeroing code knows to skip holes. | ||
| 1382 | */ | ||
| 1383 | mlog(0, "byte start: %llu, end: %llu\n", | ||
| 1384 | (unsigned long long)start, (unsigned long long)end); | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * If both edges are on a cluster boundary then there's no | ||
| 1388 | * zeroing required as the region is part of the allocation to | ||
| 1389 | * be truncated. | ||
| 1390 | */ | ||
| 1391 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||
| 1392 | goto out; | ||
| 1393 | |||
| 1394 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1395 | if (handle == NULL) { | ||
| 1396 | ret = -ENOMEM; | ||
| 1397 | mlog_errno(ret); | ||
| 1398 | goto out; | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | /* | ||
| 1402 | * We want to get the byte offset of the end of the 1st cluster. | ||
| 1403 | */ | ||
| 1404 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||
| 1405 | if (tmpend > end) | ||
| 1406 | tmpend = end; | ||
| 1407 | |||
| 1408 | mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||
| 1409 | (unsigned long long)start, (unsigned long long)tmpend); | ||
| 1410 | |||
| 1411 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||
| 1412 | if (ret) | ||
| 1413 | mlog_errno(ret); | ||
| 1414 | |||
| 1415 | if (tmpend < end) { | ||
| 1416 | /* | ||
| 1417 | * This may make start and end equal, but the zeroing | ||
| 1418 | * code will skip any work in that case so there's no | ||
| 1419 | * need to catch it up here. | ||
| 1420 | */ | ||
| 1421 | start = end & ~(osb->s_clustersize - 1); | ||
| 1422 | |||
| 1423 | mlog(0, "2nd range: start: %llu, end: %llu\n", | ||
| 1424 | (unsigned long long)start, (unsigned long long)end); | ||
| 1425 | |||
| 1426 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||
| 1427 | if (ret) | ||
| 1428 | mlog_errno(ret); | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | ocfs2_commit_trans(osb, handle); | ||
| 1432 | out: | ||
| 1433 | return ret; | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | static int ocfs2_remove_inode_range(struct inode *inode, | ||
| 1437 | struct buffer_head *di_bh, u64 byte_start, | ||
| 1438 | u64 byte_len) | ||
| 1439 | { | ||
| 1440 | int ret = 0; | ||
| 1441 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||
| 1442 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1443 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 1444 | |||
| 1445 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
| 1446 | |||
| 1447 | if (byte_len == 0) | ||
| 1448 | return 0; | ||
| 1449 | |||
| 1450 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||
| 1451 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||
| 1452 | if (trunc_len >= trunc_start) | ||
| 1453 | trunc_len -= trunc_start; | ||
| 1454 | else | ||
| 1455 | trunc_len = 0; | ||
| 1456 | |||
| 1457 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||
| 1458 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 1459 | (unsigned long long)byte_start, | ||
| 1460 | (unsigned long long)byte_len, trunc_start, trunc_len); | ||
| 1461 | |||
| 1462 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||
| 1463 | if (ret) { | ||
| 1464 | mlog_errno(ret); | ||
| 1465 | goto out; | ||
| 1466 | } | ||
| 1467 | |||
| 1468 | cpos = trunc_start; | ||
| 1469 | while (trunc_len) { | ||
| 1470 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
| 1471 | &alloc_size, NULL); | ||
| 1472 | if (ret) { | ||
| 1473 | mlog_errno(ret); | ||
| 1474 | goto out; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | if (alloc_size > trunc_len) | ||
| 1478 | alloc_size = trunc_len; | ||
| 1479 | |||
| 1480 | /* Only do work for non-holes */ | ||
| 1481 | if (phys_cpos != 0) { | ||
| 1482 | ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||
| 1483 | phys_cpos, alloc_size, | ||
| 1484 | &dealloc); | ||
| 1485 | if (ret) { | ||
| 1486 | mlog_errno(ret); | ||
| 1487 | goto out; | ||
| 1488 | } | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | cpos += alloc_size; | ||
| 1492 | trunc_len -= alloc_size; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||
| 1496 | |||
| 1497 | out: | ||
| 1498 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 1499 | ocfs2_run_deallocs(osb, &dealloc); | ||
| 1500 | |||
| 1501 | return ret; | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | /* | ||
| 1505 | * Parts of this function taken from xfs_change_file_space() | ||
| 1506 | */ | ||
| 1507 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | ||
| 1508 | struct ocfs2_space_resv *sr) | ||
| 1509 | { | ||
| 1510 | int ret; | ||
| 1511 | s64 llen; | ||
| 1512 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 1513 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1514 | struct buffer_head *di_bh = NULL; | ||
| 1515 | handle_t *handle; | ||
| 1516 | unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); | ||
| 1517 | |||
| 1518 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && | ||
| 1519 | !ocfs2_writes_unwritten_extents(osb)) | ||
| 1520 | return -ENOTTY; | ||
| 1521 | else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && | ||
| 1522 | !ocfs2_sparse_alloc(osb)) | ||
| 1523 | return -ENOTTY; | ||
| 1524 | |||
| 1525 | if (!S_ISREG(inode->i_mode)) | ||
| 1526 | return -EINVAL; | ||
| 1527 | |||
| 1528 | if (!(file->f_mode & FMODE_WRITE)) | ||
| 1529 | return -EBADF; | ||
| 1530 | |||
| 1531 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 1532 | return -EROFS; | ||
| 1533 | |||
| 1534 | mutex_lock(&inode->i_mutex); | ||
| 1535 | |||
| 1536 | /* | ||
| 1537 | * This prevents concurrent writes on other nodes | ||
| 1538 | */ | ||
| 1539 | ret = ocfs2_rw_lock(inode, 1); | ||
| 1540 | if (ret) { | ||
| 1541 | mlog_errno(ret); | ||
| 1542 | goto out; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
| 1546 | if (ret) { | ||
| 1547 | mlog_errno(ret); | ||
| 1548 | goto out_rw_unlock; | ||
| 1549 | } | ||
| 1550 | |||
| 1551 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
| 1552 | ret = -EPERM; | ||
| 1553 | goto out_meta_unlock; | ||
| 1554 | } | ||
| 1555 | |||
| 1556 | switch (sr->l_whence) { | ||
| 1557 | case 0: /*SEEK_SET*/ | ||
| 1558 | break; | ||
| 1559 | case 1: /*SEEK_CUR*/ | ||
| 1560 | sr->l_start += file->f_pos; | ||
| 1561 | break; | ||
| 1562 | case 2: /*SEEK_END*/ | ||
| 1563 | sr->l_start += i_size_read(inode); | ||
| 1564 | break; | ||
| 1565 | default: | ||
| 1566 | ret = -EINVAL; | ||
| 1567 | goto out_meta_unlock; | ||
| 1568 | } | ||
| 1569 | sr->l_whence = 0; | ||
| 1570 | |||
| 1571 | llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; | ||
| 1572 | |||
| 1573 | if (sr->l_start < 0 | ||
| 1574 | || sr->l_start > max_off | ||
| 1575 | || (sr->l_start + llen) < 0 | ||
| 1576 | || (sr->l_start + llen) > max_off) { | ||
| 1577 | ret = -EINVAL; | ||
| 1578 | goto out_meta_unlock; | ||
| 1579 | } | ||
| 1580 | |||
| 1581 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | ||
| 1582 | if (sr->l_len <= 0) { | ||
| 1583 | ret = -EINVAL; | ||
| 1584 | goto out_meta_unlock; | ||
| 1585 | } | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | if (should_remove_suid(file->f_path.dentry)) { | ||
| 1589 | ret = __ocfs2_write_remove_suid(inode, di_bh); | ||
| 1590 | if (ret) { | ||
| 1591 | mlog_errno(ret); | ||
| 1592 | goto out_meta_unlock; | ||
| 1593 | } | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1597 | switch (cmd) { | ||
| 1598 | case OCFS2_IOC_RESVSP: | ||
| 1599 | case OCFS2_IOC_RESVSP64: | ||
| 1600 | /* | ||
| 1601 | * This takes unsigned offsets, but the signed ones we | ||
| 1602 | * pass have been checked against overflow above. | ||
| 1603 | */ | ||
| 1604 | ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, | ||
| 1605 | sr->l_len); | ||
| 1606 | break; | ||
| 1607 | case OCFS2_IOC_UNRESVSP: | ||
| 1608 | case OCFS2_IOC_UNRESVSP64: | ||
| 1609 | ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, | ||
| 1610 | sr->l_len); | ||
| 1611 | break; | ||
| 1612 | default: | ||
| 1613 | ret = -EINVAL; | ||
| 1614 | } | ||
| 1615 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1616 | if (ret) { | ||
| 1617 | mlog_errno(ret); | ||
| 1618 | goto out_meta_unlock; | ||
| 1619 | } | ||
| 1620 | |||
| 1621 | /* | ||
| 1622 | * We update c/mtime for these changes | ||
| 1623 | */ | ||
| 1624 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1625 | if (IS_ERR(handle)) { | ||
| 1626 | ret = PTR_ERR(handle); | ||
| 1627 | mlog_errno(ret); | ||
| 1628 | goto out_meta_unlock; | ||
| 1629 | } | ||
| 1630 | |||
| 1631 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
| 1632 | ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); | ||
| 1633 | if (ret < 0) | ||
| 1634 | mlog_errno(ret); | ||
| 1635 | |||
| 1636 | ocfs2_commit_trans(osb, handle); | ||
| 1637 | |||
| 1638 | out_meta_unlock: | ||
| 1639 | brelse(di_bh); | ||
| 1640 | ocfs2_meta_unlock(inode, 1); | ||
| 1641 | out_rw_unlock: | ||
| 1642 | ocfs2_rw_unlock(inode, 1); | ||
| 1643 | |||
| 1644 | mutex_unlock(&inode->i_mutex); | ||
| 1645 | out: | ||
| 1646 | return ret; | ||
| 1647 | } | ||
| 1648 | |||
| 1162 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1649 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
| 1163 | loff_t *ppos, | 1650 | loff_t *ppos, |
| 1164 | size_t count, | 1651 | size_t count, |
| @@ -1329,15 +1816,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) | |||
| 1329 | *basep = base; | 1816 | *basep = base; |
| 1330 | } | 1817 | } |
| 1331 | 1818 | ||
| 1332 | static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, | 1819 | static struct page * ocfs2_get_write_source(char **ret_src_buf, |
| 1333 | const struct iovec *cur_iov, | 1820 | const struct iovec *cur_iov, |
| 1334 | size_t iov_offset) | 1821 | size_t iov_offset) |
| 1335 | { | 1822 | { |
| 1336 | int ret; | 1823 | int ret; |
| 1337 | char *buf; | 1824 | char *buf = cur_iov->iov_base + iov_offset; |
| 1338 | struct page *src_page = NULL; | 1825 | struct page *src_page = NULL; |
| 1826 | unsigned long off; | ||
| 1339 | 1827 | ||
| 1340 | buf = cur_iov->iov_base + iov_offset; | 1828 | off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; |
| 1341 | 1829 | ||
| 1342 | if (!segment_eq(get_fs(), KERNEL_DS)) { | 1830 | if (!segment_eq(get_fs(), KERNEL_DS)) { |
| 1343 | /* | 1831 | /* |
| @@ -1349,18 +1837,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp | |||
| 1349 | (unsigned long)buf & PAGE_CACHE_MASK, 1, | 1837 | (unsigned long)buf & PAGE_CACHE_MASK, 1, |
| 1350 | 0, 0, &src_page, NULL); | 1838 | 0, 0, &src_page, NULL); |
| 1351 | if (ret == 1) | 1839 | if (ret == 1) |
| 1352 | bp->b_src_buf = kmap(src_page); | 1840 | *ret_src_buf = kmap(src_page) + off; |
| 1353 | else | 1841 | else |
| 1354 | src_page = ERR_PTR(-EFAULT); | 1842 | src_page = ERR_PTR(-EFAULT); |
| 1355 | } else { | 1843 | } else { |
| 1356 | bp->b_src_buf = buf; | 1844 | *ret_src_buf = buf; |
| 1357 | } | 1845 | } |
| 1358 | 1846 | ||
| 1359 | return src_page; | 1847 | return src_page; |
| 1360 | } | 1848 | } |
| 1361 | 1849 | ||
| 1362 | static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp, | 1850 | static void ocfs2_put_write_source(struct page *page) |
| 1363 | struct page *page) | ||
| 1364 | { | 1851 | { |
| 1365 | if (page) { | 1852 | if (page) { |
| 1366 | kunmap(page); | 1853 | kunmap(page); |
| @@ -1376,10 +1863,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
| 1376 | { | 1863 | { |
| 1377 | int ret = 0; | 1864 | int ret = 0; |
| 1378 | ssize_t copied, total = 0; | 1865 | ssize_t copied, total = 0; |
| 1379 | size_t iov_offset = 0; | 1866 | size_t iov_offset = 0, bytes; |
| 1867 | loff_t pos; | ||
| 1380 | const struct iovec *cur_iov = iov; | 1868 | const struct iovec *cur_iov = iov; |
| 1381 | struct ocfs2_buffered_write_priv bp; | 1869 | struct page *user_page, *page; |
| 1382 | struct page *page; | 1870 | char *buf, *dst; |
| 1871 | void *fsdata; | ||
| 1383 | 1872 | ||
| 1384 | /* | 1873 | /* |
| 1385 | * handle partial DIO write. Adjust cur_iov if needed. | 1874 | * handle partial DIO write. Adjust cur_iov if needed. |
| @@ -1387,21 +1876,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
| 1387 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); | 1876 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); |
| 1388 | 1877 | ||
| 1389 | do { | 1878 | do { |
| 1390 | bp.b_cur_off = iov_offset; | 1879 | pos = *ppos; |
| 1391 | bp.b_cur_iov = cur_iov; | ||
| 1392 | 1880 | ||
| 1393 | page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); | 1881 | user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); |
| 1394 | if (IS_ERR(page)) { | 1882 | if (IS_ERR(user_page)) { |
| 1395 | ret = PTR_ERR(page); | 1883 | ret = PTR_ERR(user_page); |
| 1396 | goto out; | 1884 | goto out; |
| 1397 | } | 1885 | } |
| 1398 | 1886 | ||
| 1399 | copied = ocfs2_buffered_write_cluster(file, *ppos, count, | 1887 | /* Stay within our page boundaries */ |
| 1400 | ocfs2_map_and_write_user_data, | 1888 | bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), |
| 1401 | &bp); | 1889 | (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); |
| 1890 | /* Stay within the vector boundary */ | ||
| 1891 | bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); | ||
| 1892 | /* Stay within count */ | ||
| 1893 | bytes = min(bytes, count); | ||
| 1894 | |||
| 1895 | page = NULL; | ||
| 1896 | ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, | ||
| 1897 | &page, &fsdata); | ||
| 1898 | if (ret) { | ||
| 1899 | mlog_errno(ret); | ||
| 1900 | goto out; | ||
| 1901 | } | ||
| 1402 | 1902 | ||
| 1403 | ocfs2_put_write_source(&bp, page); | 1903 | dst = kmap_atomic(page, KM_USER0); |
| 1904 | memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); | ||
| 1905 | kunmap_atomic(dst, KM_USER0); | ||
| 1906 | flush_dcache_page(page); | ||
| 1907 | ocfs2_put_write_source(user_page); | ||
| 1404 | 1908 | ||
| 1909 | copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, | ||
| 1910 | bytes, page, fsdata); | ||
| 1405 | if (copied < 0) { | 1911 | if (copied < 0) { |
| 1406 | mlog_errno(copied); | 1912 | mlog_errno(copied); |
| 1407 | ret = copied; | 1913 | ret = copied; |
| @@ -1409,7 +1915,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
| 1409 | } | 1915 | } |
| 1410 | 1916 | ||
| 1411 | total += copied; | 1917 | total += copied; |
| 1412 | *ppos = *ppos + copied; | 1918 | *ppos = pos + copied; |
| 1413 | count -= copied; | 1919 | count -= copied; |
| 1414 | 1920 | ||
| 1415 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); | 1921 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); |
| @@ -1579,52 +2085,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, | |||
| 1579 | struct pipe_buffer *buf, | 2085 | struct pipe_buffer *buf, |
| 1580 | struct splice_desc *sd) | 2086 | struct splice_desc *sd) |
| 1581 | { | 2087 | { |
| 1582 | int ret, count, total = 0; | 2088 | int ret, count; |
| 1583 | ssize_t copied = 0; | 2089 | ssize_t copied = 0; |
| 1584 | struct ocfs2_splice_write_priv sp; | 2090 | struct file *file = sd->u.file; |
| 2091 | unsigned int offset; | ||
| 2092 | struct page *page = NULL; | ||
| 2093 | void *fsdata; | ||
| 2094 | char *src, *dst; | ||
| 1585 | 2095 | ||
| 1586 | ret = buf->ops->confirm(pipe, buf); | 2096 | ret = buf->ops->confirm(pipe, buf); |
| 1587 | if (ret) | 2097 | if (ret) |
| 1588 | goto out; | 2098 | goto out; |
| 1589 | 2099 | ||
| 1590 | sp.s_sd = sd; | 2100 | offset = sd->pos & ~PAGE_CACHE_MASK; |
| 1591 | sp.s_buf = buf; | ||
| 1592 | sp.s_pipe = pipe; | ||
| 1593 | sp.s_offset = sd->pos & ~PAGE_CACHE_MASK; | ||
| 1594 | sp.s_buf_offset = buf->offset; | ||
| 1595 | |||
| 1596 | count = sd->len; | 2101 | count = sd->len; |
| 1597 | if (count + sp.s_offset > PAGE_CACHE_SIZE) | 2102 | if (count + offset > PAGE_CACHE_SIZE) |
| 1598 | count = PAGE_CACHE_SIZE - sp.s_offset; | 2103 | count = PAGE_CACHE_SIZE - offset; |
| 1599 | 2104 | ||
| 1600 | do { | 2105 | ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, |
| 1601 | /* | 2106 | &page, &fsdata); |
| 1602 | * splice wants us to copy up to one page at a | 2107 | if (ret) { |
| 1603 | * time. For pagesize > cluster size, this means we | 2108 | mlog_errno(ret); |
| 1604 | * might enter ocfs2_buffered_write_cluster() more | 2109 | goto out; |
| 1605 | * than once, so keep track of our progress here. | 2110 | } |
| 1606 | */ | ||
| 1607 | copied = ocfs2_buffered_write_cluster(sd->u.file, | ||
| 1608 | (loff_t)sd->pos + total, | ||
| 1609 | count, | ||
| 1610 | ocfs2_map_and_write_splice_data, | ||
| 1611 | &sp); | ||
| 1612 | if (copied < 0) { | ||
| 1613 | mlog_errno(copied); | ||
| 1614 | ret = copied; | ||
| 1615 | goto out; | ||
| 1616 | } | ||
| 1617 | 2111 | ||
| 1618 | count -= copied; | 2112 | src = buf->ops->map(pipe, buf, 1); |
| 1619 | sp.s_offset += copied; | 2113 | dst = kmap_atomic(page, KM_USER1); |
| 1620 | sp.s_buf_offset += copied; | 2114 | memcpy(dst + offset, src + buf->offset, count); |
| 1621 | total += copied; | 2115 | kunmap_atomic(page, KM_USER1); |
| 1622 | } while (count); | 2116 | buf->ops->unmap(pipe, buf, src); |
| 1623 | 2117 | ||
| 1624 | ret = 0; | 2118 | copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, |
| 2119 | page, fsdata); | ||
| 2120 | if (copied < 0) { | ||
| 2121 | mlog_errno(copied); | ||
| 2122 | ret = copied; | ||
| 2123 | goto out; | ||
| 2124 | } | ||
| 1625 | out: | 2125 | out: |
| 1626 | 2126 | ||
| 1627 | return total ? total : ret; | 2127 | return copied ? copied : ret; |
| 1628 | } | 2128 | } |
| 1629 | 2129 | ||
| 1630 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 2130 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index a4dd1fa1822b..36fe27f268ee 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
| @@ -39,15 +39,16 @@ enum ocfs2_alloc_restarted { | |||
| 39 | }; | 39 | }; |
| 40 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 40 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, |
| 41 | struct inode *inode, | 41 | struct inode *inode, |
| 42 | u32 *cluster_start, | 42 | u32 *logical_offset, |
| 43 | u32 clusters_to_add, | 43 | u32 clusters_to_add, |
| 44 | int mark_unwritten, | ||
| 44 | struct buffer_head *fe_bh, | 45 | struct buffer_head *fe_bh, |
| 45 | handle_t *handle, | 46 | handle_t *handle, |
| 46 | struct ocfs2_alloc_context *data_ac, | 47 | struct ocfs2_alloc_context *data_ac, |
| 47 | struct ocfs2_alloc_context *meta_ac, | 48 | struct ocfs2_alloc_context *meta_ac, |
| 48 | enum ocfs2_alloc_restarted *reason); | 49 | enum ocfs2_alloc_restarted *reason_ret); |
| 49 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | 50 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, |
| 50 | u32 clusters_to_add, | 51 | u32 clusters_to_add, u32 extents_to_split, |
| 51 | struct ocfs2_alloc_context **data_ac, | 52 | struct ocfs2_alloc_context **data_ac, |
| 52 | struct ocfs2_alloc_context **meta_ac); | 53 | struct ocfs2_alloc_context **meta_ac); |
| 53 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 54 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
| @@ -61,4 +62,7 @@ int ocfs2_should_update_atime(struct inode *inode, | |||
| 61 | int ocfs2_update_inode_atime(struct inode *inode, | 62 | int ocfs2_update_inode_atime(struct inode *inode, |
| 62 | struct buffer_head *bh); | 63 | struct buffer_head *bh); |
| 63 | 64 | ||
| 65 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | ||
| 66 | struct ocfs2_space_resv *sr); | ||
| 67 | |||
| 64 | #endif /* OCFS2_FILE_H */ | 68 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index b25ef63781ba..352eb4a13f98 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) | |||
| 157 | if (ocfs2_mount_local(osb)) | 157 | if (ocfs2_mount_local(osb)) |
| 158 | return 0; | 158 | return 0; |
| 159 | 159 | ||
| 160 | status = o2hb_register_callback(&osb->osb_hb_down); | 160 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); |
| 161 | if (status < 0) { | 161 | if (status < 0) { |
| 162 | mlog_errno(status); | 162 | mlog_errno(status); |
| 163 | goto bail; | 163 | goto bail; |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | status = o2hb_register_callback(&osb->osb_hb_up); | 166 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); |
| 167 | if (status < 0) { | 167 | if (status < 0) { |
| 168 | mlog_errno(status); | 168 | mlog_errno(status); |
| 169 | o2hb_unregister_callback(&osb->osb_hb_down); | 169 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | bail: | 172 | bail: |
| @@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) | |||
| 178 | if (ocfs2_mount_local(osb)) | 178 | if (ocfs2_mount_local(osb)) |
| 179 | return; | 179 | return; |
| 180 | 180 | ||
| 181 | o2hb_unregister_callback(&osb->osb_hb_down); | 181 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); |
| 182 | o2hb_unregister_callback(&osb->osb_hb_up); | 182 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index f3ad21ad9aed..bd68c3f2afbe 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "ocfs2.h" | 14 | #include "ocfs2.h" |
| 15 | #include "alloc.h" | 15 | #include "alloc.h" |
| 16 | #include "dlmglue.h" | 16 | #include "dlmglue.h" |
| 17 | #include "file.h" | ||
| 17 | #include "inode.h" | 18 | #include "inode.h" |
| 18 | #include "journal.h" | 19 | #include "journal.h" |
| 19 | 20 | ||
| @@ -115,6 +116,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 115 | { | 116 | { |
| 116 | unsigned int flags; | 117 | unsigned int flags; |
| 117 | int status; | 118 | int status; |
| 119 | struct ocfs2_space_resv sr; | ||
| 118 | 120 | ||
| 119 | switch (cmd) { | 121 | switch (cmd) { |
| 120 | case OCFS2_IOC_GETFLAGS: | 122 | case OCFS2_IOC_GETFLAGS: |
| @@ -130,6 +132,14 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 130 | 132 | ||
| 131 | return ocfs2_set_inode_attr(inode, flags, | 133 | return ocfs2_set_inode_attr(inode, flags, |
| 132 | OCFS2_FL_MODIFIABLE); | 134 | OCFS2_FL_MODIFIABLE); |
| 135 | case OCFS2_IOC_RESVSP: | ||
| 136 | case OCFS2_IOC_RESVSP64: | ||
| 137 | case OCFS2_IOC_UNRESVSP: | ||
| 138 | case OCFS2_IOC_UNRESVSP64: | ||
| 139 | if (copy_from_user(&sr, (int __user *) arg, sizeof(sr))) | ||
| 140 | return -EFAULT; | ||
| 141 | |||
| 142 | return ocfs2_change_file_space(filp, cmd, &sr); | ||
| 133 | default: | 143 | default: |
| 134 | return -ENOTTY; | 144 | return -ENOTTY; |
| 135 | } | 145 | } |
| @@ -148,6 +158,11 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 148 | case OCFS2_IOC32_SETFLAGS: | 158 | case OCFS2_IOC32_SETFLAGS: |
| 149 | cmd = OCFS2_IOC_SETFLAGS; | 159 | cmd = OCFS2_IOC_SETFLAGS; |
| 150 | break; | 160 | break; |
| 161 | case OCFS2_IOC_RESVSP: | ||
| 162 | case OCFS2_IOC_RESVSP64: | ||
| 163 | case OCFS2_IOC_UNRESVSP: | ||
| 164 | case OCFS2_IOC_UNRESVSP64: | ||
| 165 | break; | ||
| 151 | default: | 166 | default: |
| 152 | return -ENOIOCTLCMD; | 167 | return -ENOIOCTLCMD; |
| 153 | } | 168 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index dc1188081720..dbfb20bb27ea 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
| 722 | container_of(work, struct ocfs2_journal, j_recovery_work); | 722 | container_of(work, struct ocfs2_journal, j_recovery_work); |
| 723 | struct ocfs2_super *osb = journal->j_osb; | 723 | struct ocfs2_super *osb = journal->j_osb; |
| 724 | struct ocfs2_dinode *la_dinode, *tl_dinode; | 724 | struct ocfs2_dinode *la_dinode, *tl_dinode; |
| 725 | struct ocfs2_la_recovery_item *item; | 725 | struct ocfs2_la_recovery_item *item, *n; |
| 726 | struct list_head *p, *n; | ||
| 727 | LIST_HEAD(tmp_la_list); | 726 | LIST_HEAD(tmp_la_list); |
| 728 | 727 | ||
| 729 | mlog_entry_void(); | 728 | mlog_entry_void(); |
| @@ -734,8 +733,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
| 734 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); | 733 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); |
| 735 | spin_unlock(&journal->j_lock); | 734 | spin_unlock(&journal->j_lock); |
| 736 | 735 | ||
| 737 | list_for_each_safe(p, n, &tmp_la_list) { | 736 | list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { |
| 738 | item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); | ||
| 739 | list_del_init(&item->lri_list); | 737 | list_del_init(&item->lri_list); |
| 740 | 738 | ||
| 741 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); | 739 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3db5de4506da..ce60aab013aa 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ | 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ |
| 290 | + OCFS2_TRUNCATE_LOG_UPDATE) | 290 | + OCFS2_TRUNCATE_LOG_UPDATE) |
| 291 | 291 | ||
| 292 | #define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) | ||
| 293 | |||
| 292 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 294 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
| 293 | * bitmap block for the new bit) */ | 295 | * bitmap block for the new bit) */ |
| 294 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 296 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af01158b39f5..d79aa12137d2 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
| @@ -37,11 +37,29 @@ | |||
| 37 | 37 | ||
| 38 | #include "ocfs2.h" | 38 | #include "ocfs2.h" |
| 39 | 39 | ||
| 40 | #include "aops.h" | ||
| 40 | #include "dlmglue.h" | 41 | #include "dlmglue.h" |
| 41 | #include "file.h" | 42 | #include "file.h" |
| 42 | #include "inode.h" | 43 | #include "inode.h" |
| 43 | #include "mmap.h" | 44 | #include "mmap.h" |
| 44 | 45 | ||
| 46 | static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) | ||
| 47 | { | ||
| 48 | /* The best way to deal with signals in the vm path is | ||
| 49 | * to block them upfront, rather than allowing the | ||
| 50 | * locking paths to return -ERESTARTSYS. */ | ||
| 51 | sigfillset(blocked); | ||
| 52 | |||
| 53 | /* We should technically never get a bad return value | ||
| 54 | * from sigprocmask */ | ||
| 55 | return sigprocmask(SIG_BLOCK, blocked, oldset); | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) | ||
| 59 | { | ||
| 60 | return sigprocmask(SIG_SETMASK, oldset, NULL); | ||
| 61 | } | ||
| 62 | |||
| 45 | static struct page *ocfs2_nopage(struct vm_area_struct * area, | 63 | static struct page *ocfs2_nopage(struct vm_area_struct * area, |
| 46 | unsigned long address, | 64 | unsigned long address, |
| 47 | int *type) | 65 | int *type) |
| @@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
| 53 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, | 71 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, |
| 54 | type); | 72 | type); |
| 55 | 73 | ||
| 56 | /* The best way to deal with signals in this path is | 74 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); |
| 57 | * to block them upfront, rather than allowing the | ||
| 58 | * locking paths to return -ERESTARTSYS. */ | ||
| 59 | sigfillset(&blocked); | ||
| 60 | |||
| 61 | /* We should technically never get a bad ret return | ||
| 62 | * from sigprocmask */ | ||
| 63 | ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); | ||
| 64 | if (ret < 0) { | 75 | if (ret < 0) { |
| 65 | mlog_errno(ret); | 76 | mlog_errno(ret); |
| 66 | goto out; | 77 | goto out; |
| @@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, | |||
| 68 | 79 | ||
| 69 | page = filemap_nopage(area, address, type); | 80 | page = filemap_nopage(area, address, type); |
| 70 | 81 | ||
| 71 | ret = sigprocmask(SIG_SETMASK, &oldset, NULL); | 82 | ret = ocfs2_vm_op_unblock_sigs(&oldset); |
| 72 | if (ret < 0) | 83 | if (ret < 0) |
| 73 | mlog_errno(ret); | 84 | mlog_errno(ret); |
| 74 | out: | 85 | out: |
| @@ -76,28 +87,136 @@ out: | |||
| 76 | return page; | 87 | return page; |
| 77 | } | 88 | } |
| 78 | 89 | ||
| 79 | static struct vm_operations_struct ocfs2_file_vm_ops = { | 90 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, |
| 80 | .nopage = ocfs2_nopage, | 91 | struct page *page) |
| 81 | }; | 92 | { |
| 93 | int ret; | ||
| 94 | struct address_space *mapping = inode->i_mapping; | ||
| 95 | loff_t pos = page->index << PAGE_CACHE_SHIFT; | ||
| 96 | unsigned int len = PAGE_CACHE_SIZE; | ||
| 97 | pgoff_t last_index; | ||
| 98 | struct page *locked_page = NULL; | ||
| 99 | void *fsdata; | ||
| 100 | loff_t size = i_size_read(inode); | ||
| 82 | 101 | ||
| 83 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | 102 | /* |
| 103 | * Another node might have truncated while we were waiting on | ||
| 104 | * cluster locks. | ||
| 105 | */ | ||
| 106 | last_index = size >> PAGE_CACHE_SHIFT; | ||
| 107 | if (page->index > last_index) { | ||
| 108 | ret = -EINVAL; | ||
| 109 | goto out; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * The i_size check above doesn't catch the case where nodes | ||
| 114 | * truncated and then re-extended the file. We'll re-check the | ||
| 115 | * page mapping after taking the page lock inside of | ||
| 116 | * ocfs2_write_begin_nolock(). | ||
| 117 | */ | ||
| 118 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | ||
| 119 | ret = -EINVAL; | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* | ||
| 124 | * Call ocfs2_write_begin() and ocfs2_write_end() to take | ||
| 125 | * advantage of the allocation code there. We pass a write | ||
| 126 | * length of the whole page (chopped to i_size) to make sure | ||
| 127 | * the whole thing is allocated. | ||
| 128 | * | ||
| 129 | * Since we know the page is up to date, we don't have to | ||
| 130 | * worry about ocfs2_write_begin() skipping some buffer reads | ||
| 131 | * because the "write" would invalidate their data. | ||
| 132 | */ | ||
| 133 | if (page->index == last_index) | ||
| 134 | len = size & ~PAGE_CACHE_MASK; | ||
| 135 | |||
| 136 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | ||
| 137 | &fsdata, di_bh, page); | ||
| 138 | if (ret) { | ||
| 139 | if (ret != -ENOSPC) | ||
| 140 | mlog_errno(ret); | ||
| 141 | goto out; | ||
| 142 | } | ||
| 143 | |||
| 144 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | ||
| 145 | fsdata); | ||
| 146 | if (ret < 0) { | ||
| 147 | mlog_errno(ret); | ||
| 148 | goto out; | ||
| 149 | } | ||
| 150 | BUG_ON(ret != len); | ||
| 151 | ret = 0; | ||
| 152 | out: | ||
| 153 | return ret; | ||
| 154 | } | ||
| 155 | |||
| 156 | static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 84 | { | 157 | { |
| 85 | int ret = 0, lock_level = 0; | 158 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
| 86 | struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); | 159 | struct buffer_head *di_bh = NULL; |
| 160 | sigset_t blocked, oldset; | ||
| 161 | int ret, ret2; | ||
| 162 | |||
| 163 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); | ||
| 164 | if (ret < 0) { | ||
| 165 | mlog_errno(ret); | ||
| 166 | return ret; | ||
| 167 | } | ||
| 168 | |||
| 169 | /* | ||
| 170 | * The cluster locks taken will block a truncate from another | ||
| 171 | * node. Taking the data lock will also ensure that we don't | ||
| 172 | * attempt page truncation as part of a downconvert. | ||
| 173 | */ | ||
| 174 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
| 175 | if (ret < 0) { | ||
| 176 | mlog_errno(ret); | ||
| 177 | goto out; | ||
| 178 | } | ||
| 87 | 179 | ||
| 88 | /* | 180 | /* |
| 89 | * Only support shared writeable mmap for local mounts which | 181 | * The alloc sem should be enough to serialize with |
| 90 | * don't know about holes. | 182 | * ocfs2_truncate_file() changing i_size as well as any thread |
| 183 | * modifying the inode btree. | ||
| 91 | */ | 184 | */ |
| 92 | if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && | 185 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 93 | ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && | 186 | |
| 94 | ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { | 187 | ret = ocfs2_data_lock(inode, 1); |
| 95 | mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); | 188 | if (ret < 0) { |
| 96 | /* This is -EINVAL because generic_file_readonly_mmap | 189 | mlog_errno(ret); |
| 97 | * returns it in a similar situation. */ | 190 | goto out_meta_unlock; |
| 98 | return -EINVAL; | ||
| 99 | } | 191 | } |
| 100 | 192 | ||
| 193 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | ||
| 194 | |||
| 195 | ocfs2_data_unlock(inode, 1); | ||
| 196 | |||
| 197 | out_meta_unlock: | ||
| 198 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 199 | |||
| 200 | brelse(di_bh); | ||
| 201 | ocfs2_meta_unlock(inode, 1); | ||
| 202 | |||
| 203 | out: | ||
| 204 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | ||
| 205 | if (ret2 < 0) | ||
| 206 | mlog_errno(ret2); | ||
| 207 | |||
| 208 | return ret; | ||
| 209 | } | ||
| 210 | |||
| 211 | static struct vm_operations_struct ocfs2_file_vm_ops = { | ||
| 212 | .nopage = ocfs2_nopage, | ||
| 213 | .page_mkwrite = ocfs2_page_mkwrite, | ||
| 214 | }; | ||
| 215 | |||
| 216 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 217 | { | ||
| 218 | int ret = 0, lock_level = 0; | ||
| 219 | |||
| 101 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 220 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, |
| 102 | file->f_vfsmnt, &lock_level); | 221 | file->f_vfsmnt, &lock_level); |
| 103 | if (ret < 0) { | 222 | if (ret < 0) { |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 36289e6295ce..d430fdab16e9 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -1674,7 +1674,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1674 | u32 offset = 0; | 1674 | u32 offset = 0; |
| 1675 | 1675 | ||
| 1676 | inode->i_op = &ocfs2_symlink_inode_operations; | 1676 | inode->i_op = &ocfs2_symlink_inode_operations; |
| 1677 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, | 1677 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, |
| 1678 | new_fe_bh, | 1678 | new_fe_bh, |
| 1679 | handle, data_ac, NULL, | 1679 | handle, data_ac, NULL, |
| 1680 | NULL); | 1680 | NULL); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a860633e833f..5cc90a40b3c5 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -219,6 +219,7 @@ struct ocfs2_super | |||
| 219 | u16 max_slots; | 219 | u16 max_slots; |
| 220 | s16 node_num; | 220 | s16 node_num; |
| 221 | s16 slot_num; | 221 | s16 slot_num; |
| 222 | s16 preferred_slot; | ||
| 222 | int s_sectsize_bits; | 223 | int s_sectsize_bits; |
| 223 | int s_clustersize; | 224 | int s_clustersize; |
| 224 | int s_clustersize_bits; | 225 | int s_clustersize_bits; |
| @@ -305,6 +306,19 @@ static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb) | |||
| 305 | return 0; | 306 | return 0; |
| 306 | } | 307 | } |
| 307 | 308 | ||
| 309 | static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) | ||
| 310 | { | ||
| 311 | /* | ||
| 312 | * Support for sparse files is a pre-requisite | ||
| 313 | */ | ||
| 314 | if (!ocfs2_sparse_alloc(osb)) | ||
| 315 | return 0; | ||
| 316 | |||
| 317 | if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) | ||
| 318 | return 1; | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 308 | /* set / clear functions because cluster events can make these happen | 322 | /* set / clear functions because cluster events can make these happen |
| 309 | * in parallel so we want the transitions to be atomic. this also | 323 | * in parallel so we want the transitions to be atomic. this also |
| 310 | * means that any future flags osb_flags must be protected by spinlock | 324 | * means that any future flags osb_flags must be protected by spinlock |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index f0d9eb08547a..82f8a75b207e 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -88,7 +88,7 @@ | |||
| 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB | 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB |
| 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ |
| 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) | 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) |
| 91 | #define OCFS2_FEATURE_RO_COMPAT_SUPP 0 | 91 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
| 92 | 92 | ||
| 93 | /* | 93 | /* |
| 94 | * Heartbeat-only devices are missing journals and other files. The | 94 | * Heartbeat-only devices are missing journals and other files. The |
| @@ -116,6 +116,11 @@ | |||
| 116 | */ | 116 | */ |
| 117 | #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 | 117 | #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 |
| 118 | 118 | ||
| 119 | /* | ||
| 120 | * Unwritten extents support. | ||
| 121 | */ | ||
| 122 | #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 | ||
| 123 | |||
| 119 | /* The byte offset of the first backup block will be 1G. | 124 | /* The byte offset of the first backup block will be 1G. |
| 120 | * The following will be 4G, 16G, 64G, 256G and 1T. | 125 | * The following will be 4G, 16G, 64G, 256G and 1T. |
| 121 | */ | 126 | */ |
| @@ -170,6 +175,32 @@ | |||
| 170 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 175 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) |
| 171 | 176 | ||
| 172 | /* | 177 | /* |
| 178 | * Space reservation / allocation / free ioctls and argument structure | ||
| 179 | * are designed to be compatible with XFS. | ||
| 180 | * | ||
| 181 | * ALLOCSP* and FREESP* are not and will never be supported, but are | ||
| 182 | * included here for completeness. | ||
| 183 | */ | ||
| 184 | struct ocfs2_space_resv { | ||
| 185 | __s16 l_type; | ||
| 186 | __s16 l_whence; | ||
| 187 | __s64 l_start; | ||
| 188 | __s64 l_len; /* len == 0 means until end of file */ | ||
| 189 | __s32 l_sysid; | ||
| 190 | __u32 l_pid; | ||
| 191 | __s32 l_pad[4]; /* reserve area */ | ||
| 192 | }; | ||
| 193 | |||
| 194 | #define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) | ||
| 195 | #define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) | ||
| 196 | #define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) | ||
| 197 | #define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) | ||
| 198 | #define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) | ||
| 199 | #define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) | ||
| 200 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | ||
| 201 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | ||
| 202 | |||
| 203 | /* | ||
| 173 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 204 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
| 174 | */ | 205 | */ |
| 175 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ | 206 | #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d8b79067dc14..af4882b62cfa 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -121,17 +121,25 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
| 121 | return ret; | 121 | return ret; |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si) | 124 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) |
| 125 | { | 125 | { |
| 126 | int i; | 126 | int i; |
| 127 | s16 ret = OCFS2_INVALID_SLOT; | 127 | s16 ret = OCFS2_INVALID_SLOT; |
| 128 | 128 | ||
| 129 | if (preferred >= 0 && preferred < si->si_num_slots) { | ||
| 130 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { | ||
| 131 | ret = preferred; | ||
| 132 | goto out; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 129 | for(i = 0; i < si->si_num_slots; i++) { | 136 | for(i = 0; i < si->si_num_slots; i++) { |
| 130 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { | 137 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { |
| 131 | ret = (s16) i; | 138 | ret = (s16) i; |
| 132 | break; | 139 | break; |
| 133 | } | 140 | } |
| 134 | } | 141 | } |
| 142 | out: | ||
| 135 | return ret; | 143 | return ret; |
| 136 | } | 144 | } |
| 137 | 145 | ||
| @@ -248,7 +256,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
| 248 | if (slot == OCFS2_INVALID_SLOT) { | 256 | if (slot == OCFS2_INVALID_SLOT) { |
| 249 | /* if no slot yet, then just take 1st available | 257 | /* if no slot yet, then just take 1st available |
| 250 | * one. */ | 258 | * one. */ |
| 251 | slot = __ocfs2_find_empty_slot(si); | 259 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
| 252 | if (slot == OCFS2_INVALID_SLOT) { | 260 | if (slot == OCFS2_INVALID_SLOT) { |
| 253 | spin_unlock(&si->si_lock); | 261 | spin_unlock(&si->si_lock); |
| 254 | mlog(ML_ERROR, "no free slots available!\n"); | 262 | mlog(ML_ERROR, "no free slots available!\n"); |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e3437626d183..d9c5c9fcb30f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
| 98 | u16 chain); | 98 | u16 chain); |
| 99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
| 100 | u32 wanted); | 100 | u32 wanted); |
| 101 | static int ocfs2_free_suballoc_bits(handle_t *handle, | ||
| 102 | struct inode *alloc_inode, | ||
| 103 | struct buffer_head *alloc_bh, | ||
| 104 | unsigned int start_bit, | ||
| 105 | u64 bg_blkno, | ||
| 106 | unsigned int count); | ||
| 107 | static inline u64 ocfs2_which_suballoc_group(u64 block, | ||
| 108 | unsigned int bit); | ||
| 109 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
| 110 | u64 bg_blkno, | 102 | u64 bg_blkno, |
| 111 | u16 bg_bit_off); | 103 | u16 bg_bit_off); |
| @@ -496,13 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
| 496 | 488 | ||
| 497 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | 489 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); |
| 498 | (*ac)->ac_which = OCFS2_AC_USE_META; | 490 | (*ac)->ac_which = OCFS2_AC_USE_META; |
| 499 | |||
| 500 | #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS | ||
| 501 | slot = 0; | ||
| 502 | #else | ||
| 503 | slot = osb->slot_num; | 491 | slot = osb->slot_num; |
| 504 | #endif | ||
| 505 | |||
| 506 | (*ac)->ac_group_search = ocfs2_block_group_search; | 492 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 507 | 493 | ||
| 508 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 494 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
| @@ -1626,12 +1612,12 @@ bail: | |||
| 1626 | /* | 1612 | /* |
| 1627 | * expects the suballoc inode to already be locked. | 1613 | * expects the suballoc inode to already be locked. |
| 1628 | */ | 1614 | */ |
| 1629 | static int ocfs2_free_suballoc_bits(handle_t *handle, | 1615 | int ocfs2_free_suballoc_bits(handle_t *handle, |
| 1630 | struct inode *alloc_inode, | 1616 | struct inode *alloc_inode, |
| 1631 | struct buffer_head *alloc_bh, | 1617 | struct buffer_head *alloc_bh, |
| 1632 | unsigned int start_bit, | 1618 | unsigned int start_bit, |
| 1633 | u64 bg_blkno, | 1619 | u64 bg_blkno, |
| 1634 | unsigned int count) | 1620 | unsigned int count) |
| 1635 | { | 1621 | { |
| 1636 | int status = 0; | 1622 | int status = 0; |
| 1637 | u32 tmp_used; | 1623 | u32 tmp_used; |
| @@ -1703,13 +1689,6 @@ bail: | |||
| 1703 | return status; | 1689 | return status; |
| 1704 | } | 1690 | } |
| 1705 | 1691 | ||
| 1706 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
| 1707 | { | ||
| 1708 | u64 group = block - (u64) bit; | ||
| 1709 | |||
| 1710 | return group; | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | int ocfs2_free_dinode(handle_t *handle, | 1692 | int ocfs2_free_dinode(handle_t *handle, |
| 1714 | struct inode *inode_alloc_inode, | 1693 | struct inode *inode_alloc_inode, |
| 1715 | struct buffer_head *inode_alloc_bh, | 1694 | struct buffer_head *inode_alloc_bh, |
| @@ -1723,19 +1702,6 @@ int ocfs2_free_dinode(handle_t *handle, | |||
| 1723 | inode_alloc_bh, bit, bg_blkno, 1); | 1702 | inode_alloc_bh, bit, bg_blkno, 1); |
| 1724 | } | 1703 | } |
| 1725 | 1704 | ||
| 1726 | int ocfs2_free_extent_block(handle_t *handle, | ||
| 1727 | struct inode *eb_alloc_inode, | ||
| 1728 | struct buffer_head *eb_alloc_bh, | ||
| 1729 | struct ocfs2_extent_block *eb) | ||
| 1730 | { | ||
| 1731 | u64 blk = le64_to_cpu(eb->h_blkno); | ||
| 1732 | u16 bit = le16_to_cpu(eb->h_suballoc_bit); | ||
| 1733 | u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
| 1734 | |||
| 1735 | return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, | ||
| 1736 | bit, bg_blkno, 1); | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | int ocfs2_free_clusters(handle_t *handle, | 1705 | int ocfs2_free_clusters(handle_t *handle, |
| 1740 | struct inode *bitmap_inode, | 1706 | struct inode *bitmap_inode, |
| 1741 | struct buffer_head *bitmap_bh, | 1707 | struct buffer_head *bitmap_bh, |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 1a3c94cb9250..f212dc01a84b 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -86,20 +86,29 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
| 86 | u32 *cluster_start, | 86 | u32 *cluster_start, |
| 87 | u32 *num_clusters); | 87 | u32 *num_clusters); |
| 88 | 88 | ||
| 89 | int ocfs2_free_suballoc_bits(handle_t *handle, | ||
| 90 | struct inode *alloc_inode, | ||
| 91 | struct buffer_head *alloc_bh, | ||
| 92 | unsigned int start_bit, | ||
| 93 | u64 bg_blkno, | ||
| 94 | unsigned int count); | ||
| 89 | int ocfs2_free_dinode(handle_t *handle, | 95 | int ocfs2_free_dinode(handle_t *handle, |
| 90 | struct inode *inode_alloc_inode, | 96 | struct inode *inode_alloc_inode, |
| 91 | struct buffer_head *inode_alloc_bh, | 97 | struct buffer_head *inode_alloc_bh, |
| 92 | struct ocfs2_dinode *di); | 98 | struct ocfs2_dinode *di); |
| 93 | int ocfs2_free_extent_block(handle_t *handle, | ||
| 94 | struct inode *eb_alloc_inode, | ||
| 95 | struct buffer_head *eb_alloc_bh, | ||
| 96 | struct ocfs2_extent_block *eb); | ||
| 97 | int ocfs2_free_clusters(handle_t *handle, | 99 | int ocfs2_free_clusters(handle_t *handle, |
| 98 | struct inode *bitmap_inode, | 100 | struct inode *bitmap_inode, |
| 99 | struct buffer_head *bitmap_bh, | 101 | struct buffer_head *bitmap_bh, |
| 100 | u64 start_blk, | 102 | u64 start_blk, |
| 101 | unsigned int num_clusters); | 103 | unsigned int num_clusters); |
| 102 | 104 | ||
| 105 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
| 106 | { | ||
| 107 | u64 group = block - (u64) bit; | ||
| 108 | |||
| 109 | return group; | ||
| 110 | } | ||
| 111 | |||
| 103 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, | 112 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, |
| 104 | u64 bg_blkno) | 113 | u64 bg_blkno) |
| 105 | { | 114 | { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 86b559c7dce9..3a5a1ed09ac9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -82,7 +82,8 @@ MODULE_AUTHOR("Oracle"); | |||
| 82 | MODULE_LICENSE("GPL"); | 82 | MODULE_LICENSE("GPL"); |
| 83 | 83 | ||
| 84 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 84 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
| 85 | unsigned long *mount_opt, int is_remount); | 85 | unsigned long *mount_opt, s16 *slot, |
| 86 | int is_remount); | ||
| 86 | static void ocfs2_put_super(struct super_block *sb); | 87 | static void ocfs2_put_super(struct super_block *sb); |
| 87 | static int ocfs2_mount_volume(struct super_block *sb); | 88 | static int ocfs2_mount_volume(struct super_block *sb); |
| 88 | static int ocfs2_remount(struct super_block *sb, int *flags, char *data); | 89 | static int ocfs2_remount(struct super_block *sb, int *flags, char *data); |
| @@ -114,8 +115,6 @@ static void ocfs2_write_super(struct super_block *sb); | |||
| 114 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); | 115 | static struct inode *ocfs2_alloc_inode(struct super_block *sb); |
| 115 | static void ocfs2_destroy_inode(struct inode *inode); | 116 | static void ocfs2_destroy_inode(struct inode *inode); |
| 116 | 117 | ||
| 117 | static unsigned long long ocfs2_max_file_offset(unsigned int blockshift); | ||
| 118 | |||
| 119 | static const struct super_operations ocfs2_sops = { | 118 | static const struct super_operations ocfs2_sops = { |
| 120 | .statfs = ocfs2_statfs, | 119 | .statfs = ocfs2_statfs, |
| 121 | .alloc_inode = ocfs2_alloc_inode, | 120 | .alloc_inode = ocfs2_alloc_inode, |
| @@ -140,6 +139,7 @@ enum { | |||
| 140 | Opt_data_ordered, | 139 | Opt_data_ordered, |
| 141 | Opt_data_writeback, | 140 | Opt_data_writeback, |
| 142 | Opt_atime_quantum, | 141 | Opt_atime_quantum, |
| 142 | Opt_slot, | ||
| 143 | Opt_err, | 143 | Opt_err, |
| 144 | }; | 144 | }; |
| 145 | 145 | ||
| @@ -154,6 +154,7 @@ static match_table_t tokens = { | |||
| 154 | {Opt_data_ordered, "data=ordered"}, | 154 | {Opt_data_ordered, "data=ordered"}, |
| 155 | {Opt_data_writeback, "data=writeback"}, | 155 | {Opt_data_writeback, "data=writeback"}, |
| 156 | {Opt_atime_quantum, "atime_quantum=%u"}, | 156 | {Opt_atime_quantum, "atime_quantum=%u"}, |
| 157 | {Opt_slot, "preferred_slot=%u"}, | ||
| 157 | {Opt_err, NULL} | 158 | {Opt_err, NULL} |
| 158 | }; | 159 | }; |
| 159 | 160 | ||
| @@ -318,7 +319,7 @@ static void ocfs2_destroy_inode(struct inode *inode) | |||
| 318 | /* From xfs_super.c:xfs_max_file_offset | 319 | /* From xfs_super.c:xfs_max_file_offset |
| 319 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. | 320 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. |
| 320 | */ | 321 | */ |
| 321 | static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) | 322 | unsigned long long ocfs2_max_file_offset(unsigned int blockshift) |
| 322 | { | 323 | { |
| 323 | unsigned int pagefactor = 1; | 324 | unsigned int pagefactor = 1; |
| 324 | unsigned int bitshift = BITS_PER_LONG - 1; | 325 | unsigned int bitshift = BITS_PER_LONG - 1; |
| @@ -355,9 +356,10 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
| 355 | int incompat_features; | 356 | int incompat_features; |
| 356 | int ret = 0; | 357 | int ret = 0; |
| 357 | unsigned long parsed_options; | 358 | unsigned long parsed_options; |
| 359 | s16 slot; | ||
| 358 | struct ocfs2_super *osb = OCFS2_SB(sb); | 360 | struct ocfs2_super *osb = OCFS2_SB(sb); |
| 359 | 361 | ||
| 360 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { | 362 | if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) { |
| 361 | ret = -EINVAL; | 363 | ret = -EINVAL; |
| 362 | goto out; | 364 | goto out; |
| 363 | } | 365 | } |
| @@ -534,6 +536,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 534 | struct dentry *root; | 536 | struct dentry *root; |
| 535 | int status, sector_size; | 537 | int status, sector_size; |
| 536 | unsigned long parsed_opt; | 538 | unsigned long parsed_opt; |
| 539 | s16 slot; | ||
| 537 | struct inode *inode = NULL; | 540 | struct inode *inode = NULL; |
| 538 | struct ocfs2_super *osb = NULL; | 541 | struct ocfs2_super *osb = NULL; |
| 539 | struct buffer_head *bh = NULL; | 542 | struct buffer_head *bh = NULL; |
| @@ -541,7 +544,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 541 | 544 | ||
| 542 | mlog_entry("%p, %p, %i", sb, data, silent); | 545 | mlog_entry("%p, %p, %i", sb, data, silent); |
| 543 | 546 | ||
| 544 | if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { | 547 | if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) { |
| 545 | status = -EINVAL; | 548 | status = -EINVAL; |
| 546 | goto read_super_error; | 549 | goto read_super_error; |
| 547 | } | 550 | } |
| @@ -571,6 +574,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 571 | brelse(bh); | 574 | brelse(bh); |
| 572 | bh = NULL; | 575 | bh = NULL; |
| 573 | osb->s_mount_opt = parsed_opt; | 576 | osb->s_mount_opt = parsed_opt; |
| 577 | osb->preferred_slot = slot; | ||
| 574 | 578 | ||
| 575 | sb->s_magic = OCFS2_SUPER_MAGIC; | 579 | sb->s_magic = OCFS2_SUPER_MAGIC; |
| 576 | 580 | ||
| @@ -713,6 +717,7 @@ static struct file_system_type ocfs2_fs_type = { | |||
| 713 | static int ocfs2_parse_options(struct super_block *sb, | 717 | static int ocfs2_parse_options(struct super_block *sb, |
| 714 | char *options, | 718 | char *options, |
| 715 | unsigned long *mount_opt, | 719 | unsigned long *mount_opt, |
| 720 | s16 *slot, | ||
| 716 | int is_remount) | 721 | int is_remount) |
| 717 | { | 722 | { |
| 718 | int status; | 723 | int status; |
| @@ -722,6 +727,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 722 | options ? options : "(none)"); | 727 | options ? options : "(none)"); |
| 723 | 728 | ||
| 724 | *mount_opt = 0; | 729 | *mount_opt = 0; |
| 730 | *slot = OCFS2_INVALID_SLOT; | ||
| 725 | 731 | ||
| 726 | if (!options) { | 732 | if (!options) { |
| 727 | status = 1; | 733 | status = 1; |
| @@ -782,6 +788,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 782 | else | 788 | else |
| 783 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 789 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 784 | break; | 790 | break; |
| 791 | case Opt_slot: | ||
| 792 | option = 0; | ||
| 793 | if (match_int(&args[0], &option)) { | ||
| 794 | status = 0; | ||
| 795 | goto bail; | ||
| 796 | } | ||
| 797 | if (option) | ||
| 798 | *slot = (s16)option; | ||
| 799 | break; | ||
| 785 | default: | 800 | default: |
| 786 | mlog(ML_ERROR, | 801 | mlog(ML_ERROR, |
| 787 | "Unrecognized mount option \"%s\" " | 802 | "Unrecognized mount option \"%s\" " |
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index 783f5270f2a1..3b9cb3d0b008 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h | |||
| @@ -45,4 +45,6 @@ void __ocfs2_abort(struct super_block *sb, | |||
| 45 | 45 | ||
| 46 | #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) | 46 | #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) |
| 47 | 47 | ||
| 48 | unsigned long long ocfs2_max_file_offset(unsigned int blockshift); | ||
| 49 | |||
| 48 | #endif /* OCFS2_SUPER_H */ | 50 | #endif /* OCFS2_SUPER_H */ |
| @@ -855,7 +855,7 @@ EXPORT_SYMBOL(dentry_open); | |||
| 855 | /* | 855 | /* |
| 856 | * Find an empty file descriptor entry, and mark it busy. | 856 | * Find an empty file descriptor entry, and mark it busy. |
| 857 | */ | 857 | */ |
| 858 | int get_unused_fd(void) | 858 | int get_unused_fd_flags(int flags) |
| 859 | { | 859 | { |
| 860 | struct files_struct * files = current->files; | 860 | struct files_struct * files = current->files; |
| 861 | int fd, error; | 861 | int fd, error; |
| @@ -891,7 +891,10 @@ repeat: | |||
| 891 | } | 891 | } |
| 892 | 892 | ||
| 893 | FD_SET(fd, fdt->open_fds); | 893 | FD_SET(fd, fdt->open_fds); |
| 894 | FD_CLR(fd, fdt->close_on_exec); | 894 | if (flags & O_CLOEXEC) |
| 895 | FD_SET(fd, fdt->close_on_exec); | ||
| 896 | else | ||
| 897 | FD_CLR(fd, fdt->close_on_exec); | ||
| 895 | files->next_fd = fd + 1; | 898 | files->next_fd = fd + 1; |
| 896 | #if 1 | 899 | #if 1 |
| 897 | /* Sanity check */ | 900 | /* Sanity check */ |
| @@ -907,6 +910,11 @@ out: | |||
| 907 | return error; | 910 | return error; |
| 908 | } | 911 | } |
| 909 | 912 | ||
| 913 | int get_unused_fd(void) | ||
| 914 | { | ||
| 915 | return get_unused_fd_flags(0); | ||
| 916 | } | ||
| 917 | |||
| 910 | EXPORT_SYMBOL(get_unused_fd); | 918 | EXPORT_SYMBOL(get_unused_fd); |
| 911 | 919 | ||
| 912 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) | 920 | static void __put_unused_fd(struct files_struct *files, unsigned int fd) |
| @@ -959,7 +967,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) | |||
| 959 | int fd = PTR_ERR(tmp); | 967 | int fd = PTR_ERR(tmp); |
| 960 | 968 | ||
| 961 | if (!IS_ERR(tmp)) { | 969 | if (!IS_ERR(tmp)) { |
| 962 | fd = get_unused_fd(); | 970 | fd = get_unused_fd_flags(flags); |
| 963 | if (fd >= 0) { | 971 | if (fd >= 0) { |
| 964 | struct file *f = do_filp_open(dfd, tmp, flags, mode); | 972 | struct file *f = do_filp_open(dfd, tmp, flags, mode); |
| 965 | if (IS_ERR(f)) { | 973 | if (IS_ERR(f)) { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index e3491328596b..3d3e16631472 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | #define PARTITION_RISCIX_SCSI 2 | 25 | #define PARTITION_RISCIX_SCSI 2 |
| 26 | #define PARTITION_LINUX 9 | 26 | #define PARTITION_LINUX 9 |
| 27 | 27 | ||
| 28 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
| 29 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
| 28 | static struct adfs_discrecord * | 30 | static struct adfs_discrecord * |
| 29 | adfs_partition(struct parsed_partitions *state, char *name, char *data, | 31 | adfs_partition(struct parsed_partitions *state, char *name, char *data, |
| 30 | unsigned long first_sector, int slot) | 32 | unsigned long first_sector, int slot) |
| @@ -48,6 +50,7 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data, | |||
| 48 | put_partition(state, slot, first_sector, nr_sects); | 50 | put_partition(state, slot, first_sector, nr_sects); |
| 49 | return dr; | 51 | return dr; |
| 50 | } | 52 | } |
| 53 | #endif | ||
| 51 | 54 | ||
| 52 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 55 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
| 53 | 56 | ||
| @@ -65,6 +68,8 @@ struct riscix_record { | |||
| 65 | struct riscix_part part[8]; | 68 | struct riscix_part part[8]; |
| 66 | }; | 69 | }; |
| 67 | 70 | ||
| 71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
| 72 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
| 68 | static int | 73 | static int |
| 69 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | 74 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, |
| 70 | unsigned long first_sect, int slot, unsigned long nr_sects) | 75 | unsigned long first_sect, int slot, unsigned long nr_sects) |
| @@ -105,6 +110,7 @@ riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
| 105 | return slot; | 110 | return slot; |
| 106 | } | 111 | } |
| 107 | #endif | 112 | #endif |
| 113 | #endif | ||
| 108 | 114 | ||
| 109 | #define LINUX_NATIVE_MAGIC 0xdeafa1de | 115 | #define LINUX_NATIVE_MAGIC 0xdeafa1de |
| 110 | #define LINUX_SWAP_MAGIC 0xdeafab1e | 116 | #define LINUX_SWAP_MAGIC 0xdeafab1e |
| @@ -115,6 +121,8 @@ struct linux_part { | |||
| 115 | __le32 nr_sects; | 121 | __le32 nr_sects; |
| 116 | }; | 122 | }; |
| 117 | 123 | ||
| 124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | ||
| 125 | defined(CONFIG_ACORN_PARTITION_ADFS) | ||
| 118 | static int | 126 | static int |
| 119 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, | 127 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, |
| 120 | unsigned long first_sect, int slot, unsigned long nr_sects) | 128 | unsigned long first_sect, int slot, unsigned long nr_sects) |
| @@ -146,6 +154,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
| 146 | put_dev_sector(sect); | 154 | put_dev_sector(sect); |
| 147 | return slot; | 155 | return slot; |
| 148 | } | 156 | } |
| 157 | #endif | ||
| 149 | 158 | ||
| 150 | #ifdef CONFIG_ACORN_PARTITION_CUMANA | 159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA |
| 151 | int | 160 | int |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 99873a2b4cbc..e7dd1d4e3473 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
| @@ -677,15 +677,24 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
| 677 | * Return: -1 Error, the calculated offset exceeded the size of the buffer | 677 | * Return: -1 Error, the calculated offset exceeded the size of the buffer |
| 678 | * n OK, a range-checked offset into buffer | 678 | * n OK, a range-checked offset into buffer |
| 679 | */ | 679 | */ |
| 680 | static int ldm_relative (const u8 *buffer, int buflen, int base, int offset) | 680 | static int ldm_relative(const u8 *buffer, int buflen, int base, int offset) |
| 681 | { | 681 | { |
| 682 | 682 | ||
| 683 | base += offset; | 683 | base += offset; |
| 684 | if ((!buffer) || (offset < 0) || (base > buflen)) | 684 | if (!buffer || offset < 0 || base > buflen) { |
| 685 | if (!buffer) | ||
| 686 | ldm_error("!buffer"); | ||
| 687 | if (offset < 0) | ||
| 688 | ldm_error("offset (%d) < 0", offset); | ||
| 689 | if (base > buflen) | ||
| 690 | ldm_error("base (%d) > buflen (%d)", base, buflen); | ||
| 685 | return -1; | 691 | return -1; |
| 686 | if ((base + buffer[base]) >= buflen) | 692 | } |
| 693 | if (base + buffer[base] >= buflen) { | ||
| 694 | ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base, | ||
| 695 | buffer[base], buflen); | ||
| 687 | return -1; | 696 | return -1; |
| 688 | 697 | } | |
| 689 | return buffer[base] + offset + 1; | 698 | return buffer[base] + offset + 1; |
| 690 | } | 699 | } |
| 691 | 700 | ||
| @@ -1054,60 +1063,98 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) | |||
| 1054 | * Return: 'true' @vb contains a Volume VBLK | 1063 | * Return: 'true' @vb contains a Volume VBLK |
| 1055 | * 'false' @vb contents are not defined | 1064 | * 'false' @vb contents are not defined |
| 1056 | */ | 1065 | */ |
| 1057 | static bool ldm_parse_vol5 (const u8 *buffer, int buflen, struct vblk *vb) | 1066 | static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) |
| 1058 | { | 1067 | { |
| 1059 | int r_objid, r_name, r_vtype, r_child, r_size, r_id1, r_id2, r_size2; | 1068 | int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size; |
| 1060 | int r_drive, len; | 1069 | int r_id1, r_id2, r_size2, r_drive, len; |
| 1061 | struct vblk_volu *volu; | 1070 | struct vblk_volu *volu; |
| 1062 | 1071 | ||
| 1063 | BUG_ON (!buffer || !vb); | 1072 | BUG_ON(!buffer || !vb); |
| 1064 | 1073 | r_objid = ldm_relative(buffer, buflen, 0x18, 0); | |
| 1065 | r_objid = ldm_relative (buffer, buflen, 0x18, 0); | 1074 | if (r_objid < 0) { |
| 1066 | r_name = ldm_relative (buffer, buflen, 0x18, r_objid); | 1075 | ldm_error("r_objid %d < 0", r_objid); |
| 1067 | r_vtype = ldm_relative (buffer, buflen, 0x18, r_name); | 1076 | return false; |
| 1068 | r_child = ldm_relative (buffer, buflen, 0x2E, r_vtype); | 1077 | } |
| 1069 | r_size = ldm_relative (buffer, buflen, 0x3E, r_child); | 1078 | r_name = ldm_relative(buffer, buflen, 0x18, r_objid); |
| 1070 | 1079 | if (r_name < 0) { | |
| 1071 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) | 1080 | ldm_error("r_name %d < 0", r_name); |
| 1072 | r_id1 = ldm_relative (buffer, buflen, 0x53, r_size); | 1081 | return false; |
| 1073 | else | 1082 | } |
| 1083 | r_vtype = ldm_relative(buffer, buflen, 0x18, r_name); | ||
| 1084 | if (r_vtype < 0) { | ||
| 1085 | ldm_error("r_vtype %d < 0", r_vtype); | ||
| 1086 | return false; | ||
| 1087 | } | ||
| 1088 | r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype); | ||
| 1089 | if (r_disable_drive_letter < 0) { | ||
| 1090 | ldm_error("r_disable_drive_letter %d < 0", | ||
| 1091 | r_disable_drive_letter); | ||
| 1092 | return false; | ||
| 1093 | } | ||
| 1094 | r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter); | ||
| 1095 | if (r_child < 0) { | ||
| 1096 | ldm_error("r_child %d < 0", r_child); | ||
| 1097 | return false; | ||
| 1098 | } | ||
| 1099 | r_size = ldm_relative(buffer, buflen, 0x3D, r_child); | ||
| 1100 | if (r_size < 0) { | ||
| 1101 | ldm_error("r_size %d < 0", r_size); | ||
| 1102 | return false; | ||
| 1103 | } | ||
| 1104 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) { | ||
| 1105 | r_id1 = ldm_relative(buffer, buflen, 0x52, r_size); | ||
| 1106 | if (r_id1 < 0) { | ||
| 1107 | ldm_error("r_id1 %d < 0", r_id1); | ||
| 1108 | return false; | ||
| 1109 | } | ||
| 1110 | } else | ||
| 1074 | r_id1 = r_size; | 1111 | r_id1 = r_size; |
| 1075 | 1112 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) { | |
| 1076 | if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) | 1113 | r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1); |
| 1077 | r_id2 = ldm_relative (buffer, buflen, 0x53, r_id1); | 1114 | if (r_id2 < 0) { |
| 1078 | else | 1115 | ldm_error("r_id2 %d < 0", r_id2); |
| 1116 | return false; | ||
| 1117 | } | ||
| 1118 | } else | ||
| 1079 | r_id2 = r_id1; | 1119 | r_id2 = r_id1; |
| 1080 | 1120 | if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) { | |
| 1081 | if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) | 1121 | r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2); |
| 1082 | r_size2 = ldm_relative (buffer, buflen, 0x53, r_id2); | 1122 | if (r_size2 < 0) { |
| 1083 | else | 1123 | ldm_error("r_size2 %d < 0", r_size2); |
| 1124 | return false; | ||
| 1125 | } | ||
| 1126 | } else | ||
| 1084 | r_size2 = r_id2; | 1127 | r_size2 = r_id2; |
| 1085 | 1128 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { | |
| 1086 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) | 1129 | r_drive = ldm_relative(buffer, buflen, 0x52, r_size2); |
| 1087 | r_drive = ldm_relative (buffer, buflen, 0x53, r_size2); | 1130 | if (r_drive < 0) { |
| 1088 | else | 1131 | ldm_error("r_drive %d < 0", r_drive); |
| 1132 | return false; | ||
| 1133 | } | ||
| 1134 | } else | ||
| 1089 | r_drive = r_size2; | 1135 | r_drive = r_size2; |
| 1090 | |||
| 1091 | len = r_drive; | 1136 | len = r_drive; |
| 1092 | if (len < 0) | 1137 | if (len < 0) { |
| 1138 | ldm_error("len %d < 0", len); | ||
| 1093 | return false; | 1139 | return false; |
| 1094 | 1140 | } | |
| 1095 | len += VBLK_SIZE_VOL5; | 1141 | len += VBLK_SIZE_VOL5; |
| 1096 | if (len != BE32 (buffer + 0x14)) | 1142 | if (len > BE32(buffer + 0x14)) { |
| 1143 | ldm_error("len %d > BE32(buffer + 0x14) %d", len, | ||
| 1144 | BE32(buffer + 0x14)); | ||
| 1097 | return false; | 1145 | return false; |
| 1098 | 1146 | } | |
| 1099 | volu = &vb->vblk.volu; | 1147 | volu = &vb->vblk.volu; |
| 1100 | 1148 | ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type, | |
| 1101 | ldm_get_vstr (buffer + 0x18 + r_name, volu->volume_type, | 1149 | sizeof(volu->volume_type)); |
| 1102 | sizeof (volu->volume_type)); | 1150 | memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter, |
| 1103 | memcpy (volu->volume_state, buffer + 0x19 + r_vtype, | 1151 | sizeof(volu->volume_state)); |
| 1104 | sizeof (volu->volume_state)); | 1152 | volu->size = ldm_get_vnum(buffer + 0x3D + r_child); |
| 1105 | volu->size = ldm_get_vnum (buffer + 0x3E + r_child); | 1153 | volu->partition_type = buffer[0x41 + r_size]; |
| 1106 | volu->partition_type = buffer[0x42 + r_size]; | 1154 | memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid)); |
| 1107 | memcpy (volu->guid, buffer + 0x43 + r_size, sizeof (volu->guid)); | ||
| 1108 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { | 1155 | if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { |
| 1109 | ldm_get_vstr (buffer + 0x53 + r_size, volu->drive_hint, | 1156 | ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint, |
| 1110 | sizeof (volu->drive_hint)); | 1157 | sizeof(volu->drive_hint)); |
| 1111 | } | 1158 | } |
| 1112 | return true; | 1159 | return true; |
| 1113 | } | 1160 | } |
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index d2e6a3046939..80f63b5fdd9f 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h | |||
| @@ -68,7 +68,7 @@ struct parsed_partitions; | |||
| 68 | #define VBLK_SIZE_DSK3 12 | 68 | #define VBLK_SIZE_DSK3 12 |
| 69 | #define VBLK_SIZE_DSK4 45 | 69 | #define VBLK_SIZE_DSK4 45 |
| 70 | #define VBLK_SIZE_PRT3 28 | 70 | #define VBLK_SIZE_PRT3 28 |
| 71 | #define VBLK_SIZE_VOL5 59 | 71 | #define VBLK_SIZE_VOL5 58 |
| 72 | 72 | ||
| 73 | /* component types */ | 73 | /* component types */ |
| 74 | #define COMP_STRIPE 0x01 /* Stripe-set */ | 74 | #define COMP_STRIPE 0x01 /* Stripe-set */ |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 98e78e2f18d6..965625a0977d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
| @@ -62,6 +62,8 @@ | |||
| 62 | #include <linux/mman.h> | 62 | #include <linux/mman.h> |
| 63 | #include <linux/proc_fs.h> | 63 | #include <linux/proc_fs.h> |
| 64 | #include <linux/ioport.h> | 64 | #include <linux/ioport.h> |
| 65 | #include <linux/uaccess.h> | ||
| 66 | #include <linux/io.h> | ||
| 65 | #include <linux/mm.h> | 67 | #include <linux/mm.h> |
| 66 | #include <linux/hugetlb.h> | 68 | #include <linux/hugetlb.h> |
| 67 | #include <linux/pagemap.h> | 69 | #include <linux/pagemap.h> |
| @@ -76,9 +78,7 @@ | |||
| 76 | #include <linux/rcupdate.h> | 78 | #include <linux/rcupdate.h> |
| 77 | #include <linux/delayacct.h> | 79 | #include <linux/delayacct.h> |
| 78 | 80 | ||
| 79 | #include <asm/uaccess.h> | ||
| 80 | #include <asm/pgtable.h> | 81 | #include <asm/pgtable.h> |
| 81 | #include <asm/io.h> | ||
| 82 | #include <asm/processor.h> | 82 | #include <asm/processor.h> |
| 83 | #include "internal.h" | 83 | #include "internal.h" |
| 84 | 84 | ||
| @@ -87,10 +87,10 @@ | |||
| 87 | do { memcpy(buffer, string, strlen(string)); \ | 87 | do { memcpy(buffer, string, strlen(string)); \ |
| 88 | buffer += strlen(string); } while (0) | 88 | buffer += strlen(string); } while (0) |
| 89 | 89 | ||
| 90 | static inline char * task_name(struct task_struct *p, char * buf) | 90 | static inline char *task_name(struct task_struct *p, char *buf) |
| 91 | { | 91 | { |
| 92 | int i; | 92 | int i; |
| 93 | char * name; | 93 | char *name; |
| 94 | char tcomm[sizeof(p->comm)]; | 94 | char tcomm[sizeof(p->comm)]; |
| 95 | 95 | ||
| 96 | get_task_comm(tcomm, p); | 96 | get_task_comm(tcomm, p); |
| @@ -138,7 +138,7 @@ static const char *task_state_array[] = { | |||
| 138 | "X (dead)" /* 32 */ | 138 | "X (dead)" /* 32 */ |
| 139 | }; | 139 | }; |
| 140 | 140 | ||
| 141 | static inline const char * get_task_state(struct task_struct *tsk) | 141 | static inline const char *get_task_state(struct task_struct *tsk) |
| 142 | { | 142 | { |
| 143 | unsigned int state = (tsk->state & (TASK_RUNNING | | 143 | unsigned int state = (tsk->state & (TASK_RUNNING | |
| 144 | TASK_INTERRUPTIBLE | | 144 | TASK_INTERRUPTIBLE | |
| @@ -156,7 +156,7 @@ static inline const char * get_task_state(struct task_struct *tsk) | |||
| 156 | return *p; | 156 | return *p; |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | static inline char * task_state(struct task_struct *p, char *buffer) | 159 | static inline char *task_state(struct task_struct *p, char *buffer) |
| 160 | { | 160 | { |
| 161 | struct group_info *group_info; | 161 | struct group_info *group_info; |
| 162 | int g; | 162 | int g; |
| @@ -172,8 +172,8 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
| 172 | "Uid:\t%d\t%d\t%d\t%d\n" | 172 | "Uid:\t%d\t%d\t%d\t%d\n" |
| 173 | "Gid:\t%d\t%d\t%d\t%d\n", | 173 | "Gid:\t%d\t%d\t%d\t%d\n", |
| 174 | get_task_state(p), | 174 | get_task_state(p), |
| 175 | p->tgid, p->pid, | 175 | p->tgid, p->pid, |
| 176 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, | 176 | pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, |
| 177 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, | 177 | pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, |
| 178 | p->uid, p->euid, p->suid, p->fsuid, | 178 | p->uid, p->euid, p->suid, p->fsuid, |
| 179 | p->gid, p->egid, p->sgid, p->fsgid); | 179 | p->gid, p->egid, p->sgid, p->fsgid); |
| @@ -191,15 +191,15 @@ static inline char * task_state(struct task_struct *p, char *buffer) | |||
| 191 | get_group_info(group_info); | 191 | get_group_info(group_info); |
| 192 | task_unlock(p); | 192 | task_unlock(p); |
| 193 | 193 | ||
| 194 | for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) | 194 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) |
| 195 | buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); | 195 | buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g)); |
| 196 | put_group_info(group_info); | 196 | put_group_info(group_info); |
| 197 | 197 | ||
| 198 | buffer += sprintf(buffer, "\n"); | 198 | buffer += sprintf(buffer, "\n"); |
| 199 | return buffer; | 199 | return buffer; |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) | 202 | static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) |
| 203 | { | 203 | { |
| 204 | int i, len; | 204 | int i, len; |
| 205 | 205 | ||
| @@ -239,7 +239,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, | |||
| 239 | } | 239 | } |
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | static inline char * task_sig(struct task_struct *p, char *buffer) | 242 | static inline char *task_sig(struct task_struct *p, char *buffer) |
| 243 | { | 243 | { |
| 244 | unsigned long flags; | 244 | unsigned long flags; |
| 245 | sigset_t pending, shpending, blocked, ignored, caught; | 245 | sigset_t pending, shpending, blocked, ignored, caught; |
| @@ -289,14 +289,23 @@ static inline char *task_cap(struct task_struct *p, char *buffer) | |||
| 289 | cap_t(p->cap_effective)); | 289 | cap_t(p->cap_effective)); |
| 290 | } | 290 | } |
| 291 | 291 | ||
| 292 | int proc_pid_status(struct task_struct *task, char * buffer) | 292 | static inline char *task_context_switch_counts(struct task_struct *p, |
| 293 | char *buffer) | ||
| 293 | { | 294 | { |
| 294 | char * orig = buffer; | 295 | return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" |
| 296 | "nonvoluntary_ctxt_switches:\t%lu\n", | ||
| 297 | p->nvcsw, | ||
| 298 | p->nivcsw); | ||
| 299 | } | ||
| 300 | |||
| 301 | int proc_pid_status(struct task_struct *task, char *buffer) | ||
| 302 | { | ||
| 303 | char *orig = buffer; | ||
| 295 | struct mm_struct *mm = get_task_mm(task); | 304 | struct mm_struct *mm = get_task_mm(task); |
| 296 | 305 | ||
| 297 | buffer = task_name(task, buffer); | 306 | buffer = task_name(task, buffer); |
| 298 | buffer = task_state(task, buffer); | 307 | buffer = task_state(task, buffer); |
| 299 | 308 | ||
| 300 | if (mm) { | 309 | if (mm) { |
| 301 | buffer = task_mem(mm, buffer); | 310 | buffer = task_mem(mm, buffer); |
| 302 | mmput(mm); | 311 | mmput(mm); |
| @@ -307,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) | |||
| 307 | #if defined(CONFIG_S390) | 316 | #if defined(CONFIG_S390) |
| 308 | buffer = task_show_regs(task, buffer); | 317 | buffer = task_show_regs(task, buffer); |
| 309 | #endif | 318 | #endif |
| 319 | buffer = task_context_switch_counts(task, buffer); | ||
| 310 | return buffer - orig; | 320 | return buffer - orig; |
| 311 | } | 321 | } |
| 312 | 322 | ||
| @@ -332,7 +342,7 @@ static clock_t task_utime(struct task_struct *p) | |||
| 332 | 342 | ||
| 333 | static clock_t task_stime(struct task_struct *p) | 343 | static clock_t task_stime(struct task_struct *p) |
| 334 | { | 344 | { |
| 335 | clock_t stime = cputime_to_clock_t(p->stime); | 345 | clock_t stime; |
| 336 | 346 | ||
| 337 | /* | 347 | /* |
| 338 | * Use CFS's precise accounting. (we subtract utime from | 348 | * Use CFS's precise accounting. (we subtract utime from |
| @@ -344,8 +354,7 @@ static clock_t task_stime(struct task_struct *p) | |||
| 344 | return stime; | 354 | return stime; |
| 345 | } | 355 | } |
| 346 | 356 | ||
| 347 | 357 | static int do_task_stat(struct task_struct *task, char *buffer, int whole) | |
| 348 | static int do_task_stat(struct task_struct *task, char * buffer, int whole) | ||
| 349 | { | 358 | { |
| 350 | unsigned long vsize, eip, esp, wchan = ~0UL; | 359 | unsigned long vsize, eip, esp, wchan = ~0UL; |
| 351 | long priority, nice; | 360 | long priority, nice; |
| @@ -353,7 +362,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 353 | sigset_t sigign, sigcatch; | 362 | sigset_t sigign, sigcatch; |
| 354 | char state; | 363 | char state; |
| 355 | int res; | 364 | int res; |
| 356 | pid_t ppid = 0, pgid = -1, sid = -1; | 365 | pid_t ppid = 0, pgid = -1, sid = -1; |
| 357 | int num_threads = 0; | 366 | int num_threads = 0; |
| 358 | struct mm_struct *mm; | 367 | struct mm_struct *mm; |
| 359 | unsigned long long start_time; | 368 | unsigned long long start_time; |
| @@ -424,7 +433,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 424 | } | 433 | } |
| 425 | rcu_read_unlock(); | 434 | rcu_read_unlock(); |
| 426 | 435 | ||
| 427 | if (!whole || num_threads<2) | 436 | if (!whole || num_threads < 2) |
| 428 | wchan = get_wchan(task); | 437 | wchan = get_wchan(task); |
| 429 | if (!whole) { | 438 | if (!whole) { |
| 430 | min_flt = task->min_flt; | 439 | min_flt = task->min_flt; |
| @@ -440,12 +449,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 440 | 449 | ||
| 441 | /* Temporary variable needed for gcc-2.96 */ | 450 | /* Temporary variable needed for gcc-2.96 */ |
| 442 | /* convert timespec -> nsec*/ | 451 | /* convert timespec -> nsec*/ |
| 443 | start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC | 452 | start_time = |
| 444 | + task->start_time.tv_nsec; | 453 | (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC |
| 454 | + task->real_start_time.tv_nsec; | ||
| 445 | /* convert nsec -> ticks */ | 455 | /* convert nsec -> ticks */ |
| 446 | start_time = nsec_to_clock_t(start_time); | 456 | start_time = nsec_to_clock_t(start_time); |
| 447 | 457 | ||
| 448 | res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \ | 458 | res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ |
| 449 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 459 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
| 450 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", | 460 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", |
| 451 | task->pid, | 461 | task->pid, |
| @@ -471,7 +481,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 471 | start_time, | 481 | start_time, |
| 472 | vsize, | 482 | vsize, |
| 473 | mm ? get_mm_rss(mm) : 0, | 483 | mm ? get_mm_rss(mm) : 0, |
| 474 | rsslim, | 484 | rsslim, |
| 475 | mm ? mm->start_code : 0, | 485 | mm ? mm->start_code : 0, |
| 476 | mm ? mm->end_code : 0, | 486 | mm ? mm->end_code : 0, |
| 477 | mm ? mm->start_stack : 0, | 487 | mm ? mm->start_stack : 0, |
| @@ -493,17 +503,17 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
| 493 | task->rt_priority, | 503 | task->rt_priority, |
| 494 | task->policy, | 504 | task->policy, |
| 495 | (unsigned long long)delayacct_blkio_ticks(task)); | 505 | (unsigned long long)delayacct_blkio_ticks(task)); |
| 496 | if(mm) | 506 | if (mm) |
| 497 | mmput(mm); | 507 | mmput(mm); |
| 498 | return res; | 508 | return res; |
| 499 | } | 509 | } |
| 500 | 510 | ||
| 501 | int proc_tid_stat(struct task_struct *task, char * buffer) | 511 | int proc_tid_stat(struct task_struct *task, char *buffer) |
| 502 | { | 512 | { |
| 503 | return do_task_stat(task, buffer, 0); | 513 | return do_task_stat(task, buffer, 0); |
| 504 | } | 514 | } |
| 505 | 515 | ||
| 506 | int proc_tgid_stat(struct task_struct *task, char * buffer) | 516 | int proc_tgid_stat(struct task_struct *task, char *buffer) |
| 507 | { | 517 | { |
| 508 | return do_task_stat(task, buffer, 1); | 518 | return do_task_stat(task, buffer, 1); |
| 509 | } | 519 | } |
| @@ -512,12 +522,12 @@ int proc_pid_statm(struct task_struct *task, char *buffer) | |||
| 512 | { | 522 | { |
| 513 | int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; | 523 | int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; |
| 514 | struct mm_struct *mm = get_task_mm(task); | 524 | struct mm_struct *mm = get_task_mm(task); |
| 515 | 525 | ||
| 516 | if (mm) { | 526 | if (mm) { |
| 517 | size = task_statm(mm, &shared, &text, &data, &resident); | 527 | size = task_statm(mm, &shared, &text, &data, &resident); |
| 518 | mmput(mm); | 528 | mmput(mm); |
| 519 | } | 529 | } |
| 520 | 530 | ||
| 521 | return sprintf(buffer,"%d %d %d %d %d %d %d\n", | 531 | return sprintf(buffer, "%d %d %d %d %d %d %d\n", |
| 522 | size, resident, shared, text, lib, data, 0); | 532 | size, resident, shared, text, lib, data, 0); |
| 523 | } | 533 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 46ea5d56e1bb..42cb4f5613b6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -67,7 +67,6 @@ | |||
| 67 | #include <linux/mount.h> | 67 | #include <linux/mount.h> |
| 68 | #include <linux/security.h> | 68 | #include <linux/security.h> |
| 69 | #include <linux/ptrace.h> | 69 | #include <linux/ptrace.h> |
| 70 | #include <linux/seccomp.h> | ||
| 71 | #include <linux/cpuset.h> | 70 | #include <linux/cpuset.h> |
| 72 | #include <linux/audit.h> | 71 | #include <linux/audit.h> |
| 73 | #include <linux/poll.h> | 72 | #include <linux/poll.h> |
| @@ -204,12 +203,17 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) | |||
| 204 | int res = 0; | 203 | int res = 0; |
| 205 | struct mm_struct *mm = get_task_mm(task); | 204 | struct mm_struct *mm = get_task_mm(task); |
| 206 | if (mm) { | 205 | if (mm) { |
| 207 | unsigned int len = mm->env_end - mm->env_start; | 206 | unsigned int len; |
| 207 | |||
| 208 | res = -ESRCH; | ||
| 209 | if (!ptrace_may_attach(task)) | ||
| 210 | goto out; | ||
| 211 | |||
| 212 | len = mm->env_end - mm->env_start; | ||
| 208 | if (len > PAGE_SIZE) | 213 | if (len > PAGE_SIZE) |
| 209 | len = PAGE_SIZE; | 214 | len = PAGE_SIZE; |
| 210 | res = access_process_vm(task, mm->env_start, buffer, len, 0); | 215 | res = access_process_vm(task, mm->env_start, buffer, len, 0); |
| 211 | if (!ptrace_may_attach(task)) | 216 | out: |
| 212 | res = -ESRCH; | ||
| 213 | mmput(mm); | 217 | mmput(mm); |
| 214 | } | 218 | } |
| 215 | return res; | 219 | return res; |
| @@ -279,7 +283,7 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) | |||
| 279 | static int proc_pid_wchan(struct task_struct *task, char *buffer) | 283 | static int proc_pid_wchan(struct task_struct *task, char *buffer) |
| 280 | { | 284 | { |
| 281 | unsigned long wchan; | 285 | unsigned long wchan; |
| 282 | char symname[KSYM_NAME_LEN+1]; | 286 | char symname[KSYM_NAME_LEN]; |
| 283 | 287 | ||
| 284 | wchan = get_wchan(task); | 288 | wchan = get_wchan(task); |
| 285 | 289 | ||
| @@ -812,71 +816,6 @@ static const struct file_operations proc_loginuid_operations = { | |||
| 812 | }; | 816 | }; |
| 813 | #endif | 817 | #endif |
| 814 | 818 | ||
| 815 | #ifdef CONFIG_SECCOMP | ||
| 816 | static ssize_t seccomp_read(struct file *file, char __user *buf, | ||
| 817 | size_t count, loff_t *ppos) | ||
| 818 | { | ||
| 819 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); | ||
| 820 | char __buf[20]; | ||
| 821 | size_t len; | ||
| 822 | |||
| 823 | if (!tsk) | ||
| 824 | return -ESRCH; | ||
| 825 | /* no need to print the trailing zero, so use only len */ | ||
| 826 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | ||
| 827 | put_task_struct(tsk); | ||
| 828 | |||
| 829 | return simple_read_from_buffer(buf, count, ppos, __buf, len); | ||
| 830 | } | ||
| 831 | |||
| 832 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | ||
| 833 | size_t count, loff_t *ppos) | ||
| 834 | { | ||
| 835 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); | ||
| 836 | char __buf[20], *end; | ||
| 837 | unsigned int seccomp_mode; | ||
| 838 | ssize_t result; | ||
| 839 | |||
| 840 | result = -ESRCH; | ||
| 841 | if (!tsk) | ||
| 842 | goto out_no_task; | ||
| 843 | |||
| 844 | /* can set it only once to be even more secure */ | ||
| 845 | result = -EPERM; | ||
| 846 | if (unlikely(tsk->seccomp.mode)) | ||
| 847 | goto out; | ||
| 848 | |||
| 849 | result = -EFAULT; | ||
| 850 | memset(__buf, 0, sizeof(__buf)); | ||
| 851 | count = min(count, sizeof(__buf) - 1); | ||
| 852 | if (copy_from_user(__buf, buf, count)) | ||
| 853 | goto out; | ||
| 854 | |||
| 855 | seccomp_mode = simple_strtoul(__buf, &end, 0); | ||
| 856 | if (*end == '\n') | ||
| 857 | end++; | ||
| 858 | result = -EINVAL; | ||
| 859 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | ||
| 860 | tsk->seccomp.mode = seccomp_mode; | ||
| 861 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | ||
| 862 | } else | ||
| 863 | goto out; | ||
| 864 | result = -EIO; | ||
| 865 | if (unlikely(!(end - __buf))) | ||
| 866 | goto out; | ||
| 867 | result = end - __buf; | ||
| 868 | out: | ||
| 869 | put_task_struct(tsk); | ||
| 870 | out_no_task: | ||
| 871 | return result; | ||
| 872 | } | ||
| 873 | |||
| 874 | static const struct file_operations proc_seccomp_operations = { | ||
| 875 | .read = seccomp_read, | ||
| 876 | .write = seccomp_write, | ||
| 877 | }; | ||
| 878 | #endif /* CONFIG_SECCOMP */ | ||
| 879 | |||
| 880 | #ifdef CONFIG_FAULT_INJECTION | 819 | #ifdef CONFIG_FAULT_INJECTION |
| 881 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, | 820 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, |
| 882 | size_t count, loff_t *ppos) | 821 | size_t count, loff_t *ppos) |
| @@ -2037,9 +1976,6 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 2037 | REG("numa_maps", S_IRUGO, numa_maps), | 1976 | REG("numa_maps", S_IRUGO, numa_maps), |
| 2038 | #endif | 1977 | #endif |
| 2039 | REG("mem", S_IRUSR|S_IWUSR, mem), | 1978 | REG("mem", S_IRUSR|S_IWUSR, mem), |
| 2040 | #ifdef CONFIG_SECCOMP | ||
| 2041 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), | ||
| 2042 | #endif | ||
| 2043 | LNK("cwd", cwd), | 1979 | LNK("cwd", cwd), |
| 2044 | LNK("root", root), | 1980 | LNK("root", root), |
| 2045 | LNK("exe", exe), | 1981 | LNK("exe", exe), |
| @@ -2324,9 +2260,6 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 2324 | REG("numa_maps", S_IRUGO, numa_maps), | 2260 | REG("numa_maps", S_IRUGO, numa_maps), |
| 2325 | #endif | 2261 | #endif |
| 2326 | REG("mem", S_IRUSR|S_IWUSR, mem), | 2262 | REG("mem", S_IRUSR|S_IWUSR, mem), |
| 2327 | #ifdef CONFIG_SECCOMP | ||
| 2328 | REG("seccomp", S_IRUSR|S_IWUSR, seccomp), | ||
| 2329 | #endif | ||
| 2330 | LNK("cwd", cwd), | 2263 | LNK("cwd", cwd), |
| 2331 | LNK("root", root), | 2264 | LNK("root", root), |
| 2332 | LNK("exe", exe), | 2265 | LNK("exe", exe), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8a40e15f5ecb..b5e7155d30d8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/namei.h> | 20 | #include <linux/namei.h> |
| 21 | #include <linux/bitops.h> | 21 | #include <linux/bitops.h> |
| 22 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
| 23 | #include <linux/completion.h> | ||
| 23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
| 24 | 25 | ||
| 25 | #include "internal.h" | 26 | #include "internal.h" |
| @@ -529,12 +530,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
| 529 | return -EAGAIN; | 530 | return -EAGAIN; |
| 530 | dp->low_ino = i; | 531 | dp->low_ino = i; |
| 531 | 532 | ||
| 532 | spin_lock(&proc_subdir_lock); | ||
| 533 | dp->next = dir->subdir; | ||
| 534 | dp->parent = dir; | ||
| 535 | dir->subdir = dp; | ||
| 536 | spin_unlock(&proc_subdir_lock); | ||
| 537 | |||
| 538 | if (S_ISDIR(dp->mode)) { | 533 | if (S_ISDIR(dp->mode)) { |
| 539 | if (dp->proc_iops == NULL) { | 534 | if (dp->proc_iops == NULL) { |
| 540 | dp->proc_fops = &proc_dir_operations; | 535 | dp->proc_fops = &proc_dir_operations; |
| @@ -550,6 +545,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
| 550 | if (dp->proc_iops == NULL) | 545 | if (dp->proc_iops == NULL) |
| 551 | dp->proc_iops = &proc_file_inode_operations; | 546 | dp->proc_iops = &proc_file_inode_operations; |
| 552 | } | 547 | } |
| 548 | |||
| 549 | spin_lock(&proc_subdir_lock); | ||
| 550 | dp->next = dir->subdir; | ||
| 551 | dp->parent = dir; | ||
| 552 | dir->subdir = dp; | ||
| 553 | spin_unlock(&proc_subdir_lock); | ||
| 554 | |||
| 553 | return 0; | 555 | return 0; |
| 554 | } | 556 | } |
| 555 | 557 | ||
| @@ -613,6 +615,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, | |||
| 613 | ent->namelen = len; | 615 | ent->namelen = len; |
| 614 | ent->mode = mode; | 616 | ent->mode = mode; |
| 615 | ent->nlink = nlink; | 617 | ent->nlink = nlink; |
| 618 | ent->pde_users = 0; | ||
| 619 | spin_lock_init(&ent->pde_unload_lock); | ||
| 620 | ent->pde_unload_completion = NULL; | ||
| 616 | out: | 621 | out: |
| 617 | return ent; | 622 | return ent; |
| 618 | } | 623 | } |
| @@ -649,9 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, | |||
| 649 | 654 | ||
| 650 | ent = proc_create(&parent, name, S_IFDIR | mode, 2); | 655 | ent = proc_create(&parent, name, S_IFDIR | mode, 2); |
| 651 | if (ent) { | 656 | if (ent) { |
| 652 | ent->proc_fops = &proc_dir_operations; | ||
| 653 | ent->proc_iops = &proc_dir_inode_operations; | ||
| 654 | |||
| 655 | if (proc_register(parent, ent) < 0) { | 657 | if (proc_register(parent, ent) < 0) { |
| 656 | kfree(ent); | 658 | kfree(ent); |
| 657 | ent = NULL; | 659 | ent = NULL; |
| @@ -686,10 +688,6 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, | |||
| 686 | 688 | ||
| 687 | ent = proc_create(&parent,name,mode,nlink); | 689 | ent = proc_create(&parent,name,mode,nlink); |
| 688 | if (ent) { | 690 | if (ent) { |
| 689 | if (S_ISDIR(mode)) { | ||
| 690 | ent->proc_fops = &proc_dir_operations; | ||
| 691 | ent->proc_iops = &proc_dir_inode_operations; | ||
| 692 | } | ||
| 693 | if (proc_register(parent, ent) < 0) { | 691 | if (proc_register(parent, ent) < 0) { |
| 694 | kfree(ent); | 692 | kfree(ent); |
| 695 | ent = NULL; | 693 | ent = NULL; |
| @@ -734,9 +732,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
| 734 | de = *p; | 732 | de = *p; |
| 735 | *p = de->next; | 733 | *p = de->next; |
| 736 | de->next = NULL; | 734 | de->next = NULL; |
| 735 | |||
| 736 | spin_lock(&de->pde_unload_lock); | ||
| 737 | /* | ||
| 738 | * Stop accepting new callers into module. If you're | ||
| 739 | * dynamically allocating ->proc_fops, save a pointer somewhere. | ||
| 740 | */ | ||
| 741 | de->proc_fops = NULL; | ||
| 742 | /* Wait until all existing callers into module are done. */ | ||
| 743 | if (de->pde_users > 0) { | ||
| 744 | DECLARE_COMPLETION_ONSTACK(c); | ||
| 745 | |||
| 746 | if (!de->pde_unload_completion) | ||
| 747 | de->pde_unload_completion = &c; | ||
| 748 | |||
| 749 | spin_unlock(&de->pde_unload_lock); | ||
| 750 | spin_unlock(&proc_subdir_lock); | ||
| 751 | |||
| 752 | wait_for_completion(de->pde_unload_completion); | ||
| 753 | |||
| 754 | spin_lock(&proc_subdir_lock); | ||
| 755 | goto continue_removing; | ||
| 756 | } | ||
| 757 | spin_unlock(&de->pde_unload_lock); | ||
| 758 | |||
| 759 | continue_removing: | ||
| 737 | if (S_ISDIR(de->mode)) | 760 | if (S_ISDIR(de->mode)) |
| 738 | parent->nlink--; | 761 | parent->nlink--; |
| 739 | proc_kill_inodes(de); | 762 | if (!S_ISREG(de->mode)) |
| 763 | proc_kill_inodes(de); | ||
| 740 | de->nlink = 0; | 764 | de->nlink = 0; |
| 741 | WARN_ON(de->subdir); | 765 | WARN_ON(de->subdir); |
| 742 | if (!atomic_read(&de->count)) | 766 | if (!atomic_read(&de->count)) |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d5ce65c68d7b..dd28e86ab422 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
| 11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
| 12 | #include <linux/stat.h> | 12 | #include <linux/stat.h> |
| 13 | #include <linux/completion.h> | ||
| 13 | #include <linux/file.h> | 14 | #include <linux/file.h> |
| 14 | #include <linux/limits.h> | 15 | #include <linux/limits.h> |
| 15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
| @@ -140,6 +141,251 @@ static const struct super_operations proc_sops = { | |||
| 140 | .remount_fs = proc_remount, | 141 | .remount_fs = proc_remount, |
| 141 | }; | 142 | }; |
| 142 | 143 | ||
| 144 | static void pde_users_dec(struct proc_dir_entry *pde) | ||
| 145 | { | ||
| 146 | spin_lock(&pde->pde_unload_lock); | ||
| 147 | pde->pde_users--; | ||
| 148 | if (pde->pde_unload_completion && pde->pde_users == 0) | ||
| 149 | complete(pde->pde_unload_completion); | ||
| 150 | spin_unlock(&pde->pde_unload_lock); | ||
| 151 | } | ||
| 152 | |||
| 153 | static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) | ||
| 154 | { | ||
| 155 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 156 | loff_t rv = -EINVAL; | ||
| 157 | loff_t (*llseek)(struct file *, loff_t, int); | ||
| 158 | |||
| 159 | spin_lock(&pde->pde_unload_lock); | ||
| 160 | /* | ||
| 161 | * remove_proc_entry() is going to delete PDE (as part of module | ||
| 162 | * cleanup sequence). No new callers into module allowed. | ||
| 163 | */ | ||
| 164 | if (!pde->proc_fops) { | ||
| 165 | spin_unlock(&pde->pde_unload_lock); | ||
| 166 | return rv; | ||
| 167 | } | ||
| 168 | /* | ||
| 169 | * Bump refcount so that remove_proc_entry will wail for ->llseek to | ||
| 170 | * complete. | ||
| 171 | */ | ||
| 172 | pde->pde_users++; | ||
| 173 | /* | ||
| 174 | * Save function pointer under lock, to protect against ->proc_fops | ||
| 175 | * NULL'ifying right after ->pde_unload_lock is dropped. | ||
| 176 | */ | ||
| 177 | llseek = pde->proc_fops->llseek; | ||
| 178 | spin_unlock(&pde->pde_unload_lock); | ||
| 179 | |||
| 180 | if (!llseek) | ||
| 181 | llseek = default_llseek; | ||
| 182 | rv = llseek(file, offset, whence); | ||
| 183 | |||
| 184 | pde_users_dec(pde); | ||
| 185 | return rv; | ||
| 186 | } | ||
| 187 | |||
| 188 | static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
| 189 | { | ||
| 190 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 191 | ssize_t rv = -EIO; | ||
| 192 | ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); | ||
| 193 | |||
| 194 | spin_lock(&pde->pde_unload_lock); | ||
| 195 | if (!pde->proc_fops) { | ||
| 196 | spin_unlock(&pde->pde_unload_lock); | ||
| 197 | return rv; | ||
| 198 | } | ||
| 199 | pde->pde_users++; | ||
| 200 | read = pde->proc_fops->read; | ||
| 201 | spin_unlock(&pde->pde_unload_lock); | ||
| 202 | |||
| 203 | if (read) | ||
| 204 | rv = read(file, buf, count, ppos); | ||
| 205 | |||
| 206 | pde_users_dec(pde); | ||
| 207 | return rv; | ||
| 208 | } | ||
| 209 | |||
| 210 | static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) | ||
| 211 | { | ||
| 212 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 213 | ssize_t rv = -EIO; | ||
| 214 | ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); | ||
| 215 | |||
| 216 | spin_lock(&pde->pde_unload_lock); | ||
| 217 | if (!pde->proc_fops) { | ||
| 218 | spin_unlock(&pde->pde_unload_lock); | ||
| 219 | return rv; | ||
| 220 | } | ||
| 221 | pde->pde_users++; | ||
| 222 | write = pde->proc_fops->write; | ||
| 223 | spin_unlock(&pde->pde_unload_lock); | ||
| 224 | |||
| 225 | if (write) | ||
| 226 | rv = write(file, buf, count, ppos); | ||
| 227 | |||
| 228 | pde_users_dec(pde); | ||
| 229 | return rv; | ||
| 230 | } | ||
| 231 | |||
| 232 | static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) | ||
| 233 | { | ||
| 234 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 235 | unsigned int rv = 0; | ||
| 236 | unsigned int (*poll)(struct file *, struct poll_table_struct *); | ||
| 237 | |||
| 238 | spin_lock(&pde->pde_unload_lock); | ||
| 239 | if (!pde->proc_fops) { | ||
| 240 | spin_unlock(&pde->pde_unload_lock); | ||
| 241 | return rv; | ||
| 242 | } | ||
| 243 | pde->pde_users++; | ||
| 244 | poll = pde->proc_fops->poll; | ||
| 245 | spin_unlock(&pde->pde_unload_lock); | ||
| 246 | |||
| 247 | if (poll) | ||
| 248 | rv = poll(file, pts); | ||
| 249 | |||
| 250 | pde_users_dec(pde); | ||
| 251 | return rv; | ||
| 252 | } | ||
| 253 | |||
| 254 | static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 255 | { | ||
| 256 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 257 | long rv = -ENOTTY; | ||
| 258 | long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); | ||
| 259 | int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); | ||
| 260 | |||
| 261 | spin_lock(&pde->pde_unload_lock); | ||
| 262 | if (!pde->proc_fops) { | ||
| 263 | spin_unlock(&pde->pde_unload_lock); | ||
| 264 | return rv; | ||
| 265 | } | ||
| 266 | pde->pde_users++; | ||
| 267 | unlocked_ioctl = pde->proc_fops->unlocked_ioctl; | ||
| 268 | ioctl = pde->proc_fops->ioctl; | ||
| 269 | spin_unlock(&pde->pde_unload_lock); | ||
| 270 | |||
| 271 | if (unlocked_ioctl) { | ||
| 272 | rv = unlocked_ioctl(file, cmd, arg); | ||
| 273 | if (rv == -ENOIOCTLCMD) | ||
| 274 | rv = -EINVAL; | ||
| 275 | } else if (ioctl) { | ||
| 276 | lock_kernel(); | ||
| 277 | rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); | ||
| 278 | unlock_kernel(); | ||
| 279 | } | ||
| 280 | |||
| 281 | pde_users_dec(pde); | ||
| 282 | return rv; | ||
| 283 | } | ||
| 284 | |||
| 285 | #ifdef CONFIG_COMPAT | ||
| 286 | static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 287 | { | ||
| 288 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 289 | long rv = -ENOTTY; | ||
| 290 | long (*compat_ioctl)(struct file *, unsigned int, unsigned long); | ||
| 291 | |||
| 292 | spin_lock(&pde->pde_unload_lock); | ||
| 293 | if (!pde->proc_fops) { | ||
| 294 | spin_unlock(&pde->pde_unload_lock); | ||
| 295 | return rv; | ||
| 296 | } | ||
| 297 | pde->pde_users++; | ||
| 298 | compat_ioctl = pde->proc_fops->compat_ioctl; | ||
| 299 | spin_unlock(&pde->pde_unload_lock); | ||
| 300 | |||
| 301 | if (compat_ioctl) | ||
| 302 | rv = compat_ioctl(file, cmd, arg); | ||
| 303 | |||
| 304 | pde_users_dec(pde); | ||
| 305 | return rv; | ||
| 306 | } | ||
| 307 | #endif | ||
| 308 | |||
| 309 | static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 310 | { | ||
| 311 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | ||
| 312 | int rv = -EIO; | ||
| 313 | int (*mmap)(struct file *, struct vm_area_struct *); | ||
| 314 | |||
| 315 | spin_lock(&pde->pde_unload_lock); | ||
| 316 | if (!pde->proc_fops) { | ||
| 317 | spin_unlock(&pde->pde_unload_lock); | ||
| 318 | return rv; | ||
| 319 | } | ||
| 320 | pde->pde_users++; | ||
| 321 | mmap = pde->proc_fops->mmap; | ||
| 322 | spin_unlock(&pde->pde_unload_lock); | ||
| 323 | |||
| 324 | if (mmap) | ||
| 325 | rv = mmap(file, vma); | ||
| 326 | |||
| 327 | pde_users_dec(pde); | ||
| 328 | return rv; | ||
| 329 | } | ||
| 330 | |||
| 331 | static int proc_reg_open(struct inode *inode, struct file *file) | ||
| 332 | { | ||
| 333 | struct proc_dir_entry *pde = PDE(inode); | ||
| 334 | int rv = 0; | ||
| 335 | int (*open)(struct inode *, struct file *); | ||
| 336 | |||
| 337 | spin_lock(&pde->pde_unload_lock); | ||
| 338 | if (!pde->proc_fops) { | ||
| 339 | spin_unlock(&pde->pde_unload_lock); | ||
| 340 | return rv; | ||
| 341 | } | ||
| 342 | pde->pde_users++; | ||
| 343 | open = pde->proc_fops->open; | ||
| 344 | spin_unlock(&pde->pde_unload_lock); | ||
| 345 | |||
| 346 | if (open) | ||
| 347 | rv = open(inode, file); | ||
| 348 | |||
| 349 | pde_users_dec(pde); | ||
| 350 | return rv; | ||
| 351 | } | ||
| 352 | |||
| 353 | static int proc_reg_release(struct inode *inode, struct file *file) | ||
| 354 | { | ||
| 355 | struct proc_dir_entry *pde = PDE(inode); | ||
| 356 | int rv = 0; | ||
| 357 | int (*release)(struct inode *, struct file *); | ||
| 358 | |||
| 359 | spin_lock(&pde->pde_unload_lock); | ||
| 360 | if (!pde->proc_fops) { | ||
| 361 | spin_unlock(&pde->pde_unload_lock); | ||
| 362 | return rv; | ||
| 363 | } | ||
| 364 | pde->pde_users++; | ||
| 365 | release = pde->proc_fops->release; | ||
| 366 | spin_unlock(&pde->pde_unload_lock); | ||
| 367 | |||
| 368 | if (release) | ||
| 369 | rv = release(inode, file); | ||
| 370 | |||
| 371 | pde_users_dec(pde); | ||
| 372 | return rv; | ||
| 373 | } | ||
| 374 | |||
| 375 | static const struct file_operations proc_reg_file_ops = { | ||
| 376 | .llseek = proc_reg_llseek, | ||
| 377 | .read = proc_reg_read, | ||
| 378 | .write = proc_reg_write, | ||
| 379 | .poll = proc_reg_poll, | ||
| 380 | .unlocked_ioctl = proc_reg_unlocked_ioctl, | ||
| 381 | #ifdef CONFIG_COMPAT | ||
| 382 | .compat_ioctl = proc_reg_compat_ioctl, | ||
| 383 | #endif | ||
| 384 | .mmap = proc_reg_mmap, | ||
| 385 | .open = proc_reg_open, | ||
| 386 | .release = proc_reg_release, | ||
| 387 | }; | ||
| 388 | |||
| 143 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | 389 | struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, |
| 144 | struct proc_dir_entry *de) | 390 | struct proc_dir_entry *de) |
| 145 | { | 391 | { |
| @@ -166,8 +412,12 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, | |||
| 166 | inode->i_nlink = de->nlink; | 412 | inode->i_nlink = de->nlink; |
| 167 | if (de->proc_iops) | 413 | if (de->proc_iops) |
| 168 | inode->i_op = de->proc_iops; | 414 | inode->i_op = de->proc_iops; |
| 169 | if (de->proc_fops) | 415 | if (de->proc_fops) { |
| 170 | inode->i_fop = de->proc_fops; | 416 | if (S_ISREG(inode->i_mode)) |
| 417 | inode->i_fop = &proc_reg_file_ops; | ||
| 418 | else | ||
| 419 | inode->i_fop = de->proc_fops; | ||
| 420 | } | ||
| 171 | } | 421 | } |
| 172 | 422 | ||
| 173 | return inode; | 423 | return inode; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5fd49e47f83a..d24b8d46059a 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -105,6 +105,7 @@ static int uptime_read_proc(char *page, char **start, off_t off, | |||
| 105 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); | 105 | cputime_t idletime = cputime_add(init_task.utime, init_task.stime); |
| 106 | 106 | ||
| 107 | do_posix_clock_monotonic_gettime(&uptime); | 107 | do_posix_clock_monotonic_gettime(&uptime); |
| 108 | monotonic_to_bootbased(&uptime); | ||
| 108 | cputime_to_timespec(idletime, &idle); | 109 | cputime_to_timespec(idletime, &idle); |
| 109 | len = sprintf(page,"%lu.%02lu %lu.%02lu\n", | 110 | len = sprintf(page,"%lu.%02lu %lu.%02lu\n", |
| 110 | (unsigned long) uptime.tv_sec, | 111 | (unsigned long) uptime.tv_sec, |
| @@ -443,12 +444,12 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 443 | unsigned long jif; | 444 | unsigned long jif; |
| 444 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | 445 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; |
| 445 | u64 sum = 0; | 446 | u64 sum = 0; |
| 447 | struct timespec boottime; | ||
| 446 | 448 | ||
| 447 | user = nice = system = idle = iowait = | 449 | user = nice = system = idle = iowait = |
| 448 | irq = softirq = steal = cputime64_zero; | 450 | irq = softirq = steal = cputime64_zero; |
| 449 | jif = - wall_to_monotonic.tv_sec; | 451 | getboottime(&boottime); |
| 450 | if (wall_to_monotonic.tv_nsec) | 452 | jif = boottime.tv_sec; |
| 451 | --jif; | ||
| 452 | 453 | ||
| 453 | for_each_possible_cpu(i) { | 454 | for_each_possible_cpu(i) { |
| 454 | int j; | 455 | int j; |
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index b3a473b0a191..22846225acfa 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c | |||
| @@ -69,7 +69,7 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p, | |||
| 69 | 69 | ||
| 70 | static int show_tty_driver(struct seq_file *m, void *v) | 70 | static int show_tty_driver(struct seq_file *m, void *v) |
| 71 | { | 71 | { |
| 72 | struct tty_driver *p = v; | 72 | struct tty_driver *p = list_entry(v, struct tty_driver, tty_drivers); |
| 73 | dev_t from = MKDEV(p->major, p->minor_start); | 73 | dev_t from = MKDEV(p->major, p->minor_start); |
| 74 | dev_t to = from + p->num; | 74 | dev_t to = from + p->num; |
| 75 | 75 | ||
| @@ -106,22 +106,13 @@ static int show_tty_driver(struct seq_file *m, void *v) | |||
| 106 | /* iterator */ | 106 | /* iterator */ |
| 107 | static void *t_start(struct seq_file *m, loff_t *pos) | 107 | static void *t_start(struct seq_file *m, loff_t *pos) |
| 108 | { | 108 | { |
| 109 | struct list_head *p; | ||
| 110 | loff_t l = *pos; | ||
| 111 | |||
| 112 | mutex_lock(&tty_mutex); | 109 | mutex_lock(&tty_mutex); |
| 113 | list_for_each(p, &tty_drivers) | 110 | return seq_list_start(&tty_drivers, *pos); |
| 114 | if (!l--) | ||
| 115 | return list_entry(p, struct tty_driver, tty_drivers); | ||
| 116 | return NULL; | ||
| 117 | } | 111 | } |
| 118 | 112 | ||
| 119 | static void *t_next(struct seq_file *m, void *v, loff_t *pos) | 113 | static void *t_next(struct seq_file *m, void *v, loff_t *pos) |
| 120 | { | 114 | { |
| 121 | struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next; | 115 | return seq_list_next(v, &tty_drivers, pos); |
| 122 | (*pos)++; | ||
| 123 | return p==&tty_drivers ? NULL : | ||
| 124 | list_entry(p, struct tty_driver, tty_drivers); | ||
| 125 | } | 116 | } |
| 126 | 117 | ||
| 127 | static void t_stop(struct seq_file *m, void *v) | 118 | static void t_stop(struct seq_file *m, void *v) |
diff --git a/fs/quota.c b/fs/quota.c index 9f237d6182c9..e6577ac15a6c 100644 --- a/fs/quota.c +++ b/fs/quota.c | |||
| @@ -10,12 +10,14 @@ | |||
| 10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
| 11 | #include <asm/current.h> | 11 | #include <asm/current.h> |
| 12 | #include <asm/uaccess.h> | 12 | #include <asm/uaccess.h> |
| 13 | #include <linux/compat.h> | ||
| 13 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
| 14 | #include <linux/security.h> | 15 | #include <linux/security.h> |
| 15 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
| 16 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
| 17 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
| 18 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
| 20 | #include <linux/types.h> | ||
| 19 | 21 | ||
| 20 | /* Check validity of generic quotactl commands */ | 22 | /* Check validity of generic quotactl commands */ |
| 21 | static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) | 23 | static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id) |
| @@ -384,3 +386,119 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t | |||
| 384 | 386 | ||
| 385 | return ret; | 387 | return ret; |
| 386 | } | 388 | } |
| 389 | |||
| 390 | #if defined(CONFIG_X86_64) || defined(CONFIG_IA64) | ||
| 391 | /* | ||
| 392 | * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64) | ||
| 393 | * and is necessary due to alignment problems. | ||
| 394 | */ | ||
| 395 | struct compat_if_dqblk { | ||
| 396 | compat_u64 dqb_bhardlimit; | ||
| 397 | compat_u64 dqb_bsoftlimit; | ||
| 398 | compat_u64 dqb_curspace; | ||
| 399 | compat_u64 dqb_ihardlimit; | ||
| 400 | compat_u64 dqb_isoftlimit; | ||
| 401 | compat_u64 dqb_curinodes; | ||
| 402 | compat_u64 dqb_btime; | ||
| 403 | compat_u64 dqb_itime; | ||
| 404 | compat_uint_t dqb_valid; | ||
| 405 | }; | ||
| 406 | |||
| 407 | /* XFS structures */ | ||
| 408 | struct compat_fs_qfilestat { | ||
| 409 | compat_u64 dqb_bhardlimit; | ||
| 410 | compat_u64 qfs_nblks; | ||
| 411 | compat_uint_t qfs_nextents; | ||
| 412 | }; | ||
| 413 | |||
| 414 | struct compat_fs_quota_stat { | ||
| 415 | __s8 qs_version; | ||
| 416 | __u16 qs_flags; | ||
| 417 | __s8 qs_pad; | ||
| 418 | struct compat_fs_qfilestat qs_uquota; | ||
| 419 | struct compat_fs_qfilestat qs_gquota; | ||
| 420 | compat_uint_t qs_incoredqs; | ||
| 421 | compat_int_t qs_btimelimit; | ||
| 422 | compat_int_t qs_itimelimit; | ||
| 423 | compat_int_t qs_rtbtimelimit; | ||
| 424 | __u16 qs_bwarnlimit; | ||
| 425 | __u16 qs_iwarnlimit; | ||
| 426 | }; | ||
| 427 | |||
| 428 | asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, | ||
| 429 | qid_t id, void __user *addr) | ||
| 430 | { | ||
| 431 | unsigned int cmds; | ||
| 432 | struct if_dqblk __user *dqblk; | ||
| 433 | struct compat_if_dqblk __user *compat_dqblk; | ||
| 434 | struct fs_quota_stat __user *fsqstat; | ||
| 435 | struct compat_fs_quota_stat __user *compat_fsqstat; | ||
| 436 | compat_uint_t data; | ||
| 437 | u16 xdata; | ||
| 438 | long ret; | ||
| 439 | |||
| 440 | cmds = cmd >> SUBCMDSHIFT; | ||
| 441 | |||
| 442 | switch (cmds) { | ||
| 443 | case Q_GETQUOTA: | ||
| 444 | dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); | ||
| 445 | compat_dqblk = addr; | ||
| 446 | ret = sys_quotactl(cmd, special, id, dqblk); | ||
| 447 | if (ret) | ||
| 448 | break; | ||
| 449 | if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) || | ||
| 450 | get_user(data, &dqblk->dqb_valid) || | ||
| 451 | put_user(data, &compat_dqblk->dqb_valid)) | ||
| 452 | ret = -EFAULT; | ||
| 453 | break; | ||
| 454 | case Q_SETQUOTA: | ||
| 455 | dqblk = compat_alloc_user_space(sizeof(struct if_dqblk)); | ||
| 456 | compat_dqblk = addr; | ||
| 457 | ret = -EFAULT; | ||
| 458 | if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) || | ||
| 459 | get_user(data, &compat_dqblk->dqb_valid) || | ||
| 460 | put_user(data, &dqblk->dqb_valid)) | ||
| 461 | break; | ||
| 462 | ret = sys_quotactl(cmd, special, id, dqblk); | ||
| 463 | break; | ||
| 464 | case Q_XGETQSTAT: | ||
| 465 | fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat)); | ||
| 466 | compat_fsqstat = addr; | ||
| 467 | ret = sys_quotactl(cmd, special, id, fsqstat); | ||
| 468 | if (ret) | ||
| 469 | break; | ||
| 470 | ret = -EFAULT; | ||
| 471 | /* Copying qs_version, qs_flags, qs_pad */ | ||
| 472 | if (copy_in_user(compat_fsqstat, fsqstat, | ||
| 473 | offsetof(struct compat_fs_quota_stat, qs_uquota))) | ||
| 474 | break; | ||
| 475 | /* Copying qs_uquota */ | ||
| 476 | if (copy_in_user(&compat_fsqstat->qs_uquota, | ||
| 477 | &fsqstat->qs_uquota, | ||
| 478 | sizeof(compat_fsqstat->qs_uquota)) || | ||
| 479 | get_user(data, &fsqstat->qs_uquota.qfs_nextents) || | ||
| 480 | put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents)) | ||
| 481 | break; | ||
| 482 | /* Copying qs_gquota */ | ||
| 483 | if (copy_in_user(&compat_fsqstat->qs_gquota, | ||
| 484 | &fsqstat->qs_gquota, | ||
| 485 | sizeof(compat_fsqstat->qs_gquota)) || | ||
| 486 | get_user(data, &fsqstat->qs_gquota.qfs_nextents) || | ||
| 487 | put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents)) | ||
| 488 | break; | ||
| 489 | /* Copying the rest */ | ||
| 490 | if (copy_in_user(&compat_fsqstat->qs_incoredqs, | ||
| 491 | &fsqstat->qs_incoredqs, | ||
| 492 | sizeof(struct compat_fs_quota_stat) - | ||
| 493 | offsetof(struct compat_fs_quota_stat, qs_incoredqs)) || | ||
| 494 | get_user(xdata, &fsqstat->qs_iwarnlimit) || | ||
| 495 | put_user(xdata, &compat_fsqstat->qs_iwarnlimit)) | ||
| 496 | break; | ||
| 497 | ret = 0; | ||
| 498 | break; | ||
| 499 | default: | ||
| 500 | ret = sys_quotactl(cmd, special, id, addr); | ||
| 501 | } | ||
| 502 | return ret; | ||
| 503 | } | ||
| 504 | #endif | ||
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index d40d22b347b7..ef2b46d099ff 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
| @@ -60,6 +60,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
| 60 | inode->i_blocks = 0; | 60 | inode->i_blocks = 0; |
| 61 | inode->i_mapping->a_ops = &ramfs_aops; | 61 | inode->i_mapping->a_ops = &ramfs_aops; |
| 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
| 63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | ||
| 63 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 64 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 64 | switch (mode & S_IFMT) { | 65 | switch (mode & S_IFMT) { |
| 65 | default: | 66 | default: |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 30eebfb1b2d8..2070aeee2a52 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
| @@ -1305,7 +1305,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t | |||
| 1305 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && | 1305 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && |
| 1306 | *ppos + count > MAX_NON_LFS) { | 1306 | *ppos + count > MAX_NON_LFS) { |
| 1307 | if (*ppos >= MAX_NON_LFS) { | 1307 | if (*ppos >= MAX_NON_LFS) { |
| 1308 | send_sig(SIGXFSZ, current, 0); | ||
| 1309 | return -EFBIG; | 1308 | return -EFBIG; |
| 1310 | } | 1309 | } |
| 1311 | if (count > MAX_NON_LFS - (unsigned long)*ppos) | 1310 | if (count > MAX_NON_LFS - (unsigned long)*ppos) |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 1272d11399fb..ddde489f1cb2 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/reiserfs_fs.h> | 7 | #include <linux/reiserfs_fs.h> |
| 8 | #include <linux/reiserfs_acl.h> | 8 | #include <linux/reiserfs_acl.h> |
| 9 | #include <linux/reiserfs_xattr.h> | 9 | #include <linux/reiserfs_xattr.h> |
| 10 | #include <linux/exportfs.h> | ||
| 10 | #include <linux/smp_lock.h> | 11 | #include <linux/smp_lock.h> |
| 11 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
| 12 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b4ac9119200e..5a93cfe1a032 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
| 22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
| 24 | #include <linux/exportfs.h> | ||
| 24 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
| 25 | #include <linux/mnt_namespace.h> | 26 | #include <linux/mnt_namespace.h> |
| 26 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 49194a4e6b91..bbb19be260ce 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
| @@ -177,21 +177,23 @@ EXPORT_SYMBOL(seq_read); | |||
| 177 | 177 | ||
| 178 | static int traverse(struct seq_file *m, loff_t offset) | 178 | static int traverse(struct seq_file *m, loff_t offset) |
| 179 | { | 179 | { |
| 180 | loff_t pos = 0; | 180 | loff_t pos = 0, index; |
| 181 | int error = 0; | 181 | int error = 0; |
| 182 | void *p; | 182 | void *p; |
| 183 | 183 | ||
| 184 | m->version = 0; | 184 | m->version = 0; |
| 185 | m->index = 0; | 185 | index = 0; |
| 186 | m->count = m->from = 0; | 186 | m->count = m->from = 0; |
| 187 | if (!offset) | 187 | if (!offset) { |
| 188 | m->index = index; | ||
| 188 | return 0; | 189 | return 0; |
| 190 | } | ||
| 189 | if (!m->buf) { | 191 | if (!m->buf) { |
| 190 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); | 192 | m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); |
| 191 | if (!m->buf) | 193 | if (!m->buf) |
| 192 | return -ENOMEM; | 194 | return -ENOMEM; |
| 193 | } | 195 | } |
| 194 | p = m->op->start(m, &m->index); | 196 | p = m->op->start(m, &index); |
| 195 | while (p) { | 197 | while (p) { |
| 196 | error = PTR_ERR(p); | 198 | error = PTR_ERR(p); |
| 197 | if (IS_ERR(p)) | 199 | if (IS_ERR(p)) |
| @@ -204,15 +206,17 @@ static int traverse(struct seq_file *m, loff_t offset) | |||
| 204 | if (pos + m->count > offset) { | 206 | if (pos + m->count > offset) { |
| 205 | m->from = offset - pos; | 207 | m->from = offset - pos; |
| 206 | m->count -= m->from; | 208 | m->count -= m->from; |
| 209 | m->index = index; | ||
| 207 | break; | 210 | break; |
| 208 | } | 211 | } |
| 209 | pos += m->count; | 212 | pos += m->count; |
| 210 | m->count = 0; | 213 | m->count = 0; |
| 211 | if (pos == offset) { | 214 | if (pos == offset) { |
| 212 | m->index++; | 215 | index++; |
| 216 | m->index = index; | ||
| 213 | break; | 217 | break; |
| 214 | } | 218 | } |
| 215 | p = m->op->next(m, p, &m->index); | 219 | p = m->op->next(m, p, &index); |
| 216 | } | 220 | } |
| 217 | m->op->stop(m, p); | 221 | m->op->stop(m, p); |
| 218 | return error; | 222 | return error; |
| @@ -260,8 +264,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin) | |||
| 260 | } | 264 | } |
| 261 | } | 265 | } |
| 262 | } | 266 | } |
| 263 | mutex_unlock(&m->lock); | ||
| 264 | file->f_version = m->version; | 267 | file->f_version = m->version; |
| 268 | mutex_unlock(&m->lock); | ||
| 265 | return retval; | 269 | return retval; |
| 266 | } | 270 | } |
| 267 | EXPORT_SYMBOL(seq_lseek); | 271 | EXPORT_SYMBOL(seq_lseek); |
diff --git a/fs/splice.c b/fs/splice.c index 6c9828651e6f..53fc2082a468 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1061,8 +1061,9 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
| 1061 | 1061 | ||
| 1062 | while (len) { | 1062 | while (len) { |
| 1063 | size_t read_len; | 1063 | size_t read_len; |
| 1064 | loff_t pos = sd->pos; | ||
| 1064 | 1065 | ||
| 1065 | ret = do_splice_to(in, &sd->pos, pipe, len, flags); | 1066 | ret = do_splice_to(in, &pos, pipe, len, flags); |
| 1066 | if (unlikely(ret <= 0)) | 1067 | if (unlikely(ret <= 0)) |
| 1067 | goto out_release; | 1068 | goto out_release; |
| 1068 | 1069 | ||
| @@ -1080,6 +1081,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
| 1080 | 1081 | ||
| 1081 | bytes += ret; | 1082 | bytes += ret; |
| 1082 | len -= ret; | 1083 | len -= ret; |
| 1084 | sd->pos = pos; | ||
| 1083 | 1085 | ||
| 1084 | if (ret < read_len) | 1086 | if (ret < read_len) |
| 1085 | goto out_release; | 1087 | goto out_release; |
diff --git a/fs/super.c b/fs/super.c index 5260d620c555..fc8ebedc6bed 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -884,6 +884,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
| 884 | error = type->get_sb(type, flags, name, data, mnt); | 884 | error = type->get_sb(type, flags, name, data, mnt); |
| 885 | if (error < 0) | 885 | if (error < 0) |
| 886 | goto out_free_secdata; | 886 | goto out_free_secdata; |
| 887 | BUG_ON(!mnt->mnt_sb); | ||
| 887 | 888 | ||
| 888 | error = security_sb_kern_mount(mnt->mnt_sb, secdata); | 889 | error = security_sb_kern_mount(mnt->mnt_sb, secdata); |
| 889 | if (error) | 890 | if (error) |
diff --git a/fs/udf/crc.c b/fs/udf/crc.c index 1b82a4adc2f7..ef2bfaa19d75 100644 --- a/fs/udf/crc.c +++ b/fs/udf/crc.c | |||
| @@ -106,8 +106,8 @@ int main(void) | |||
| 106 | { | 106 | { |
| 107 | unsigned short x; | 107 | unsigned short x; |
| 108 | 108 | ||
| 109 | x = udf_crc16(bytes, sizeof bytes); | 109 | x = udf_crc(bytes, sizeof bytes); |
| 110 | printf("udf_crc16: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); | 110 | printf("udf_crc: calculated = %4.4x, correct = %4.4x\n", x, 0x3299U); |
| 111 | 111 | ||
| 112 | return 0; | 112 | return 0; |
| 113 | } | 113 | } |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 8206983f2ebf..10f3188738af 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
| @@ -50,7 +50,7 @@ void udf_free_inode(struct inode * inode) | |||
| 50 | else | 50 | else |
| 51 | UDF_SB_LVIDIU(sb)->numFiles = | 51 | UDF_SB_LVIDIU(sb)->numFiles = |
| 52 | cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); | 52 | cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); |
| 53 | 53 | ||
| 54 | mark_buffer_dirty(sbi->s_lvidbh); | 54 | mark_buffer_dirty(sbi->s_lvidbh); |
| 55 | } | 55 | } |
| 56 | mutex_unlock(&sbi->s_alloc_mutex); | 56 | mutex_unlock(&sbi->s_alloc_mutex); |
| @@ -136,6 +136,13 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) | |||
| 136 | UDF_I_EFE(inode) = 0; | 136 | UDF_I_EFE(inode) = 0; |
| 137 | UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); | 137 | UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); |
| 138 | } | 138 | } |
| 139 | if (!UDF_I_DATA(inode)) | ||
| 140 | { | ||
| 141 | iput(inode); | ||
| 142 | *err = -ENOMEM; | ||
| 143 | mutex_unlock(&sbi->s_alloc_mutex); | ||
| 144 | return NULL; | ||
| 145 | } | ||
| 139 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
| 140 | UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; | 147 | UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; |
| 141 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index bf7de0bdbab3..5b82e489af78 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
| @@ -49,6 +49,7 @@ MODULE_LICENSE("GPL"); | |||
| 49 | static mode_t udf_convert_permissions(struct fileEntry *); | 49 | static mode_t udf_convert_permissions(struct fileEntry *); |
| 50 | static int udf_update_inode(struct inode *, int); | 50 | static int udf_update_inode(struct inode *, int); |
| 51 | static void udf_fill_inode(struct inode *, struct buffer_head *); | 51 | static void udf_fill_inode(struct inode *, struct buffer_head *); |
| 52 | static int udf_alloc_i_data(struct inode *inode, size_t size); | ||
| 52 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, | 53 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, |
| 53 | long *, int *); | 54 | long *, int *); |
| 54 | static int8_t udf_insert_aext(struct inode *, struct extent_position, | 55 | static int8_t udf_insert_aext(struct inode *, struct extent_position, |
| @@ -734,7 +735,7 @@ static void udf_split_extents(struct inode *inode, int *c, int offset, int newbl | |||
| 734 | (*c) ++; | 735 | (*c) ++; |
| 735 | (*endnum) ++; | 736 | (*endnum) ++; |
| 736 | } | 737 | } |
| 737 | 738 | ||
| 738 | laarr[curr].extLocation.logicalBlockNum = newblocknum; | 739 | laarr[curr].extLocation.logicalBlockNum = newblocknum; |
| 739 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) | 740 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) |
| 740 | laarr[curr].extLocation.partitionReferenceNum = | 741 | laarr[curr].extLocation.partitionReferenceNum = |
| @@ -836,7 +837,7 @@ static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, | |||
| 836 | { | 837 | { |
| 837 | numalloc -= elen; | 838 | numalloc -= elen; |
| 838 | if (*endnum > (i+1)) | 839 | if (*endnum > (i+1)) |
| 839 | memmove(&laarr[i], &laarr[i+1], | 840 | memmove(&laarr[i], &laarr[i+1], |
| 840 | sizeof(long_ad) * (*endnum - (i+1))); | 841 | sizeof(long_ad) * (*endnum - (i+1))); |
| 841 | i --; | 842 | i --; |
| 842 | (*endnum) --; | 843 | (*endnum) --; |
| @@ -1024,7 +1025,7 @@ void udf_truncate(struct inode * inode) | |||
| 1024 | { | 1025 | { |
| 1025 | block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); | 1026 | block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); |
| 1026 | udf_truncate_extents(inode); | 1027 | udf_truncate_extents(inode); |
| 1027 | } | 1028 | } |
| 1028 | 1029 | ||
| 1029 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); | 1030 | inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); |
| 1030 | if (IS_SYNC(inode)) | 1031 | if (IS_SYNC(inode)) |
| @@ -1087,10 +1088,10 @@ __udf_read_inode(struct inode *inode) | |||
| 1087 | { | 1088 | { |
| 1088 | kernel_lb_addr loc; | 1089 | kernel_lb_addr loc; |
| 1089 | ie = (struct indirectEntry *)ibh->b_data; | 1090 | ie = (struct indirectEntry *)ibh->b_data; |
| 1090 | 1091 | ||
| 1091 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | 1092 | loc = lelb_to_cpu(ie->indirectICB.extLocation); |
| 1092 | 1093 | ||
| 1093 | if (ie->indirectICB.extLength && | 1094 | if (ie->indirectICB.extLength && |
| 1094 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) | 1095 | (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) |
| 1095 | { | 1096 | { |
| 1096 | if (ident == TAG_IDENT_FE || | 1097 | if (ident == TAG_IDENT_FE || |
| @@ -1156,14 +1157,22 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
| 1156 | { | 1157 | { |
| 1157 | UDF_I_EFE(inode) = 1; | 1158 | UDF_I_EFE(inode) = 1; |
| 1158 | UDF_I_USE(inode) = 0; | 1159 | UDF_I_USE(inode) = 0; |
| 1159 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); | 1160 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry))) |
| 1161 | { | ||
| 1162 | make_bad_inode(inode); | ||
| 1163 | return; | ||
| 1164 | } | ||
| 1160 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); | 1165 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); |
| 1161 | } | 1166 | } |
| 1162 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) | 1167 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) |
| 1163 | { | 1168 | { |
| 1164 | UDF_I_EFE(inode) = 0; | 1169 | UDF_I_EFE(inode) = 0; |
| 1165 | UDF_I_USE(inode) = 0; | 1170 | UDF_I_USE(inode) = 0; |
| 1166 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); | 1171 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry))) |
| 1172 | { | ||
| 1173 | make_bad_inode(inode); | ||
| 1174 | return; | ||
| 1175 | } | ||
| 1167 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); | 1176 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); |
| 1168 | } | 1177 | } |
| 1169 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) | 1178 | else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) |
| @@ -1173,7 +1182,11 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
| 1173 | UDF_I_LENALLOC(inode) = | 1182 | UDF_I_LENALLOC(inode) = |
| 1174 | le32_to_cpu( | 1183 | le32_to_cpu( |
| 1175 | ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); | 1184 | ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); |
| 1176 | UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry), GFP_KERNEL); | 1185 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry))) |
| 1186 | { | ||
| 1187 | make_bad_inode(inode); | ||
| 1188 | return; | ||
| 1189 | } | ||
| 1177 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); | 1190 | memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); |
| 1178 | return; | 1191 | return; |
| 1179 | } | 1192 | } |
| @@ -1191,7 +1204,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
| 1191 | inode->i_nlink = le16_to_cpu(fe->fileLinkCount); | 1204 | inode->i_nlink = le16_to_cpu(fe->fileLinkCount); |
| 1192 | if (!inode->i_nlink) | 1205 | if (!inode->i_nlink) |
| 1193 | inode->i_nlink = 1; | 1206 | inode->i_nlink = 1; |
| 1194 | 1207 | ||
| 1195 | inode->i_size = le64_to_cpu(fe->informationLength); | 1208 | inode->i_size = le64_to_cpu(fe->informationLength); |
| 1196 | UDF_I_LENEXTENTS(inode) = inode->i_size; | 1209 | UDF_I_LENEXTENTS(inode) = inode->i_size; |
| 1197 | 1210 | ||
| @@ -1243,7 +1256,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
| 1243 | } | 1256 | } |
| 1244 | else | 1257 | else |
| 1245 | { | 1258 | { |
| 1246 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << | 1259 | inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << |
| 1247 | (inode->i_sb->s_blocksize_bits - 9); | 1260 | (inode->i_sb->s_blocksize_bits - 9); |
| 1248 | 1261 | ||
| 1249 | if ( udf_stamp_to_time(&convtime, &convtime_usec, | 1262 | if ( udf_stamp_to_time(&convtime, &convtime_usec, |
| @@ -1374,6 +1387,20 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
| 1374 | } | 1387 | } |
| 1375 | } | 1388 | } |
| 1376 | 1389 | ||
| 1390 | static int udf_alloc_i_data(struct inode *inode, size_t size) | ||
| 1391 | { | ||
| 1392 | UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL); | ||
| 1393 | |||
| 1394 | if (!UDF_I_DATA(inode)) | ||
| 1395 | { | ||
| 1396 | printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n", | ||
| 1397 | inode->i_ino); | ||
| 1398 | return -ENOMEM; | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | return 0; | ||
| 1402 | } | ||
| 1403 | |||
| 1377 | static mode_t | 1404 | static mode_t |
| 1378 | udf_convert_permissions(struct fileEntry *fe) | 1405 | udf_convert_permissions(struct fileEntry *fe) |
| 1379 | { | 1406 | { |
| @@ -2072,7 +2099,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, | |||
| 2072 | mark_buffer_dirty_inode(oepos.bh, inode); | 2099 | mark_buffer_dirty_inode(oepos.bh, inode); |
| 2073 | } | 2100 | } |
| 2074 | } | 2101 | } |
| 2075 | 2102 | ||
| 2076 | brelse(epos.bh); | 2103 | brelse(epos.bh); |
| 2077 | brelse(oepos.bh); | 2104 | brelse(oepos.bh); |
| 2078 | return (elen >> 30); | 2105 | return (elen >> 30); |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 22ff6ed55ce9..2b3011689e89 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
| @@ -87,6 +87,7 @@ | |||
| 87 | #include <linux/smp_lock.h> | 87 | #include <linux/smp_lock.h> |
| 88 | #include <linux/buffer_head.h> | 88 | #include <linux/buffer_head.h> |
| 89 | #include <linux/vfs.h> | 89 | #include <linux/vfs.h> |
| 90 | #include <linux/log2.h> | ||
| 90 | 91 | ||
| 91 | #include "swab.h" | 92 | #include "swab.h" |
| 92 | #include "util.h" | 93 | #include "util.h" |
| @@ -854,7 +855,7 @@ magic_found: | |||
| 854 | uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); | 855 | uspi->s_fmask = fs32_to_cpu(sb, usb1->fs_fmask); |
| 855 | uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); | 856 | uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); |
| 856 | 857 | ||
| 857 | if (uspi->s_fsize & (uspi->s_fsize - 1)) { | 858 | if (!is_power_of_2(uspi->s_fsize)) { |
| 858 | printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", | 859 | printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", |
| 859 | uspi->s_fsize); | 860 | uspi->s_fsize); |
| 860 | goto failed; | 861 | goto failed; |
| @@ -869,7 +870,7 @@ magic_found: | |||
| 869 | uspi->s_fsize); | 870 | uspi->s_fsize); |
| 870 | goto failed; | 871 | goto failed; |
| 871 | } | 872 | } |
| 872 | if (uspi->s_bsize & (uspi->s_bsize - 1)) { | 873 | if (!is_power_of_2(uspi->s_bsize)) { |
| 873 | printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", | 874 | printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", |
| 874 | uspi->s_bsize); | 875 | uspi->s_bsize); |
| 875 | goto failed; | 876 | goto failed; |
diff --git a/fs/utimes.c b/fs/utimes.c index b3c88952465f..83a7e69e706c 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
| @@ -106,7 +106,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
| 106 | if (IS_IMMUTABLE(inode)) | 106 | if (IS_IMMUTABLE(inode)) |
| 107 | goto dput_and_out; | 107 | goto dput_and_out; |
| 108 | 108 | ||
| 109 | if (current->fsuid != inode->i_uid) { | 109 | if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) { |
| 110 | if (f) { | 110 | if (f) { |
| 111 | if (!(f->f_mode & FMODE_WRITE)) | 111 | if (!(f->f_mode & FMODE_WRITE)) |
| 112 | goto dput_and_out; | 112 | goto dput_and_out; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2df63622354e..b0f0e58866de 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -35,10 +35,13 @@ | |||
| 35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
| 36 | 36 | ||
| 37 | static kmem_zone_t *xfs_buf_zone; | 37 | static kmem_zone_t *xfs_buf_zone; |
| 38 | static struct shrinker *xfs_buf_shake; | ||
| 39 | STATIC int xfsbufd(void *); | 38 | STATIC int xfsbufd(void *); |
| 40 | STATIC int xfsbufd_wakeup(int, gfp_t); | 39 | STATIC int xfsbufd_wakeup(int, gfp_t); |
| 41 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 40 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
| 41 | static struct shrinker xfs_buf_shake = { | ||
| 42 | .shrink = xfsbufd_wakeup, | ||
| 43 | .seeks = DEFAULT_SEEKS, | ||
| 44 | }; | ||
| 42 | 45 | ||
| 43 | static struct workqueue_struct *xfslogd_workqueue; | 46 | static struct workqueue_struct *xfslogd_workqueue; |
| 44 | struct workqueue_struct *xfsdatad_workqueue; | 47 | struct workqueue_struct *xfsdatad_workqueue; |
| @@ -1832,14 +1835,9 @@ xfs_buf_init(void) | |||
| 1832 | if (!xfsdatad_workqueue) | 1835 | if (!xfsdatad_workqueue) |
| 1833 | goto out_destroy_xfslogd_workqueue; | 1836 | goto out_destroy_xfslogd_workqueue; |
| 1834 | 1837 | ||
| 1835 | xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup); | 1838 | register_shrinker(&xfs_buf_shake); |
| 1836 | if (!xfs_buf_shake) | ||
| 1837 | goto out_destroy_xfsdatad_workqueue; | ||
| 1838 | |||
| 1839 | return 0; | 1839 | return 0; |
| 1840 | 1840 | ||
| 1841 | out_destroy_xfsdatad_workqueue: | ||
| 1842 | destroy_workqueue(xfsdatad_workqueue); | ||
| 1843 | out_destroy_xfslogd_workqueue: | 1841 | out_destroy_xfslogd_workqueue: |
| 1844 | destroy_workqueue(xfslogd_workqueue); | 1842 | destroy_workqueue(xfslogd_workqueue); |
| 1845 | out_free_buf_zone: | 1843 | out_free_buf_zone: |
| @@ -1854,7 +1852,7 @@ xfs_buf_init(void) | |||
| 1854 | void | 1852 | void |
| 1855 | xfs_buf_terminate(void) | 1853 | xfs_buf_terminate(void) |
| 1856 | { | 1854 | { |
| 1857 | remove_shrinker(xfs_buf_shake); | 1855 | unregister_shrinker(&xfs_buf_shake); |
| 1858 | destroy_workqueue(xfsdatad_workqueue); | 1856 | destroy_workqueue(xfsdatad_workqueue); |
| 1859 | destroy_workqueue(xfslogd_workqueue); | 1857 | destroy_workqueue(xfslogd_workqueue); |
| 1860 | kmem_zone_destroy(xfs_buf_zone); | 1858 | kmem_zone_destroy(xfs_buf_zone); |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 06894cf00b12..4528f9a3f304 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -562,6 +562,7 @@ xfssyncd( | |||
| 562 | bhv_vfs_sync_work_t *work, *n; | 562 | bhv_vfs_sync_work_t *work, *n; |
| 563 | LIST_HEAD (tmp); | 563 | LIST_HEAD (tmp); |
| 564 | 564 | ||
| 565 | set_freezable(); | ||
| 565 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | 566 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); |
| 566 | for (;;) { | 567 | for (;;) { |
| 567 | timeleft = schedule_timeout_interruptible(timeleft); | 568 | timeleft = schedule_timeout_interruptible(timeleft); |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 33dd1ca13245..201cc3273c84 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #ifndef __XFS_SUPER_H__ | 18 | #ifndef __XFS_SUPER_H__ |
| 19 | #define __XFS_SUPER_H__ | 19 | #define __XFS_SUPER_H__ |
| 20 | 20 | ||
| 21 | #include <linux/exportfs.h> | ||
| 22 | |||
| 21 | #ifdef CONFIG_XFS_DMAPI | 23 | #ifdef CONFIG_XFS_DMAPI |
| 22 | # define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) | 24 | # define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) |
| 23 | # define vfs_initdmapi() dmapi_init() | 25 | # define vfs_initdmapi() dmapi_init() |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 7def4c699343..2d274b23ade5 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
| @@ -62,7 +62,6 @@ uint ndquot; | |||
| 62 | 62 | ||
| 63 | kmem_zone_t *qm_dqzone; | 63 | kmem_zone_t *qm_dqzone; |
| 64 | kmem_zone_t *qm_dqtrxzone; | 64 | kmem_zone_t *qm_dqtrxzone; |
| 65 | static struct shrinker *xfs_qm_shaker; | ||
| 66 | 65 | ||
| 67 | static cred_t xfs_zerocr; | 66 | static cred_t xfs_zerocr; |
| 68 | 67 | ||
| @@ -78,6 +77,11 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | |||
| 78 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 77 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
| 79 | STATIC int xfs_qm_shake(int, gfp_t); | 78 | STATIC int xfs_qm_shake(int, gfp_t); |
| 80 | 79 | ||
| 80 | static struct shrinker xfs_qm_shaker = { | ||
| 81 | .shrink = xfs_qm_shake, | ||
| 82 | .seeks = DEFAULT_SEEKS, | ||
| 83 | }; | ||
| 84 | |||
| 81 | #ifdef DEBUG | 85 | #ifdef DEBUG |
| 82 | extern mutex_t qcheck_lock; | 86 | extern mutex_t qcheck_lock; |
| 83 | #endif | 87 | #endif |
| @@ -149,7 +153,7 @@ xfs_Gqm_init(void) | |||
| 149 | } else | 153 | } else |
| 150 | xqm->qm_dqzone = qm_dqzone; | 154 | xqm->qm_dqzone = qm_dqzone; |
| 151 | 155 | ||
| 152 | xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake); | 156 | register_shrinker(&xfs_qm_shaker); |
| 153 | 157 | ||
| 154 | /* | 158 | /* |
| 155 | * The t_dqinfo portion of transactions. | 159 | * The t_dqinfo portion of transactions. |
| @@ -181,7 +185,7 @@ xfs_qm_destroy( | |||
| 181 | 185 | ||
| 182 | ASSERT(xqm != NULL); | 186 | ASSERT(xqm != NULL); |
| 183 | ASSERT(xqm->qm_nrefs == 0); | 187 | ASSERT(xqm->qm_nrefs == 0); |
| 184 | remove_shrinker(xfs_qm_shaker); | 188 | unregister_shrinker(&xfs_qm_shaker); |
| 185 | hsize = xqm->qm_dqhashmask + 1; | 189 | hsize = xqm->qm_dqhashmask + 1; |
| 186 | for (i = 0; i < hsize; i++) { | 190 | for (i = 0; i < hsize; i++) { |
| 187 | xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); | 191 | xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); |
