aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 18:34:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 18:34:42 -0500
commit4b4f8580a4b77126733db8072862793d4deae66a (patch)
tree0d6ab49f4fe61ca96fd513b6dfae8be541796320 /fs
parent872912352c5be930e9568e5f3b6d73107d9f278d (diff)
parent8116bf4cb62d337c953cfa5369ef4cf83e73140c (diff)
Merge tag 'locks-v3.20-1' of git://git.samba.org/jlayton/linux
Pull file locking related changes #1 from Jeff Layton: "This patchset contains a fairly major overhaul of how file locks are tracked within the inode. Rather than a single list, we now create a per-inode "lock context" that contains individual lists for the file locks, and a new dedicated spinlock for them. There are changes in other trees that are based on top of this set so it may be easiest to pull this in early" * tag 'locks-v3.20-1' of git://git.samba.org/jlayton/linux: locks: update comments that refer to inode->i_flock locks: consolidate NULL i_flctx checks in locks_remove_file locks: keep a count of locks on the flctx lists locks: clean up the lm_change prototype locks: add a dedicated spinlock to protect i_flctx lists locks: remove i_flock field from struct inode locks: convert lease handling to file_lock_context locks: convert posix locks to file_lock_context locks: move flock locks to file_lock_context ceph: move spinlocking into ceph_encode_locks_to_buffer and ceph_count_locks locks: add a new struct file_locking_context pointer to struct inode locks: have locks_release_file use flock_lock_file to release generic flock locks locks: add new struct list_head to struct file_lock
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/cifs/file.c34
-rw-r--r--fs/inode.c3
-rw-r--r--fs/lockd/svcsubs.c26
-rw-r--r--fs/locks.c569
-rw-r--r--fs/nfs/delegation.c23
-rw-r--r--fs/nfs/nfs4state.c70
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/write.c41
-rw-r--r--fs/nfsd/nfs4state.c21
-rw-r--r--fs/read_write.c2
12 files changed, 474 insertions, 389 deletions
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index c35c5c614e38..06ea5cd05cd9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
239 return err; 239 return err;
240} 240}
241 241
242/** 242/*
243 * Must be called with lock_flocks() already held. Fills in the passed 243 * Fills in the passed counter variables, so you can prepare pagelist metadata
244 * counter variables, so you can prepare pagelist metadata before calling 244 * before calling ceph_encode_locks.
245 * ceph_encode_locks.
246 */ 245 */
247void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 246void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
248{ 247{
249 struct file_lock *lock; 248 struct file_lock_context *ctx;
250 249
251 *fcntl_count = 0; 250 *fcntl_count = 0;
252 *flock_count = 0; 251 *flock_count = 0;
253 252
254 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 253 ctx = inode->i_flctx;
255 if (lock->fl_flags & FL_POSIX) 254 if (ctx) {
256 ++(*fcntl_count); 255 *fcntl_count = ctx->flc_posix_cnt;
257 else if (lock->fl_flags & FL_FLOCK) 256 *flock_count = ctx->flc_flock_cnt;
258 ++(*flock_count);
259 } 257 }
260 dout("counted %d flock locks and %d fcntl locks", 258 dout("counted %d flock locks and %d fcntl locks",
261 *flock_count, *fcntl_count); 259 *flock_count, *fcntl_count);
@@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
271 int num_fcntl_locks, int num_flock_locks) 269 int num_fcntl_locks, int num_flock_locks)
272{ 270{
273 struct file_lock *lock; 271 struct file_lock *lock;
272 struct file_lock_context *ctx = inode->i_flctx;
274 int err = 0; 273 int err = 0;
275 int seen_fcntl = 0; 274 int seen_fcntl = 0;
276 int seen_flock = 0; 275 int seen_flock = 0;
@@ -279,33 +278,34 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
279 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 278 dout("encoding %d flock and %d fcntl locks", num_flock_locks,
280 num_fcntl_locks); 279 num_fcntl_locks);
281 280
282 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 281 if (!ctx)
283 if (lock->fl_flags & FL_POSIX) { 282 return 0;
284 ++seen_fcntl; 283
285 if (seen_fcntl > num_fcntl_locks) { 284 spin_lock(&ctx->flc_lock);
286 err = -ENOSPC; 285 list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
287 goto fail; 286 ++seen_fcntl;
288 } 287 if (seen_fcntl > num_fcntl_locks) {
289 err = lock_to_ceph_filelock(lock, &flocks[l]); 288 err = -ENOSPC;
290 if (err) 289 goto fail;
291 goto fail;
292 ++l;
293 } 290 }
291 err = lock_to_ceph_filelock(lock, &flocks[l]);
292 if (err)
293 goto fail;
294 ++l;
294 } 295 }
295 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 296 list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
296 if (lock->fl_flags & FL_FLOCK) { 297 ++seen_flock;
297 ++seen_flock; 298 if (seen_flock > num_flock_locks) {
298 if (seen_flock > num_flock_locks) { 299 err = -ENOSPC;
299 err = -ENOSPC; 300 goto fail;
300 goto fail;
301 }
302 err = lock_to_ceph_filelock(lock, &flocks[l]);
303 if (err)
304 goto fail;
305 ++l;
306 } 301 }
302 err = lock_to_ceph_filelock(lock, &flocks[l]);
303 if (err)
304 goto fail;
305 ++l;
307 } 306 }
308fail: 307fail:
308 spin_unlock(&ctx->flc_lock);
309 return err; 309 return err;
310} 310}
311 311
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d2171f4a6980..5f62fb7a5d0a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2700 struct ceph_filelock *flocks; 2700 struct ceph_filelock *flocks;
2701 2701
2702encode_again: 2702encode_again:
2703 spin_lock(&inode->i_lock);
2704 ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 2703 ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
2705 spin_unlock(&inode->i_lock);
2706 flocks = kmalloc((num_fcntl_locks+num_flock_locks) * 2704 flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
2707 sizeof(struct ceph_filelock), GFP_NOFS); 2705 sizeof(struct ceph_filelock), GFP_NOFS);
2708 if (!flocks) { 2706 if (!flocks) {
2709 err = -ENOMEM; 2707 err = -ENOMEM;
2710 goto out_free; 2708 goto out_free;
2711 } 2709 }
2712 spin_lock(&inode->i_lock);
2713 err = ceph_encode_locks_to_buffer(inode, flocks, 2710 err = ceph_encode_locks_to_buffer(inode, flocks,
2714 num_fcntl_locks, 2711 num_fcntl_locks,
2715 num_flock_locks); 2712 num_flock_locks);
2716 spin_unlock(&inode->i_lock);
2717 if (err) { 2713 if (err) {
2718 kfree(flocks); 2714 kfree(flocks);
2719 if (err == -ENOSPC) 2715 if (err == -ENOSPC)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 74f12877493a..c1a86764bbf7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1113,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1113 return rc; 1113 return rc;
1114} 1114}
1115 1115
1116/* copied from fs/locks.c with a name change */
1117#define cifs_for_each_lock(inode, lockp) \
1118 for (lockp = &inode->i_flock; *lockp != NULL; \
1119 lockp = &(*lockp)->fl_next)
1120
1121struct lock_to_push { 1116struct lock_to_push {
1122 struct list_head llist; 1117 struct list_head llist;
1123 __u64 offset; 1118 __u64 offset;
@@ -1132,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1132{ 1127{
1133 struct inode *inode = cfile->dentry->d_inode; 1128 struct inode *inode = cfile->dentry->d_inode;
1134 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1129 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135 struct file_lock *flock, **before; 1130 struct file_lock *flock;
1136 unsigned int count = 0, i = 0; 1131 struct file_lock_context *flctx = inode->i_flctx;
1132 unsigned int i;
1137 int rc = 0, xid, type; 1133 int rc = 0, xid, type;
1138 struct list_head locks_to_send, *el; 1134 struct list_head locks_to_send, *el;
1139 struct lock_to_push *lck, *tmp; 1135 struct lock_to_push *lck, *tmp;
@@ -1141,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1141 1137
1142 xid = get_xid(); 1138 xid = get_xid();
1143 1139
1144 spin_lock(&inode->i_lock); 1140 if (!flctx)
1145 cifs_for_each_lock(inode, before) { 1141 goto out;
1146 if ((*before)->fl_flags & FL_POSIX)
1147 count++;
1148 }
1149 spin_unlock(&inode->i_lock);
1150 1142
1151 INIT_LIST_HEAD(&locks_to_send); 1143 INIT_LIST_HEAD(&locks_to_send);
1152 1144
1153 /* 1145 /*
1154 * Allocating count locks is enough because no FL_POSIX locks can be 1146 * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks
1155 * added to the list while we are holding cinode->lock_sem that 1147 * can be added to the list while we are holding cinode->lock_sem that
1156 * protects locking operations of this inode. 1148 * protects locking operations of this inode.
1157 */ 1149 */
1158 for (; i < count; i++) { 1150 for (i = 0; i < flctx->flc_posix_cnt; i++) {
1159 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1151 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1160 if (!lck) { 1152 if (!lck) {
1161 rc = -ENOMEM; 1153 rc = -ENOMEM;
@@ -1165,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1165 } 1157 }
1166 1158
1167 el = locks_to_send.next; 1159 el = locks_to_send.next;
1168 spin_lock(&inode->i_lock); 1160 spin_lock(&flctx->flc_lock);
1169 cifs_for_each_lock(inode, before) { 1161 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1170 flock = *before;
1171 if ((flock->fl_flags & FL_POSIX) == 0)
1172 continue;
1173 if (el == &locks_to_send) { 1162 if (el == &locks_to_send) {
1174 /* 1163 /*
1175 * The list ended. We don't have enough allocated 1164 * The list ended. We don't have enough allocated
@@ -1189,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1189 lck->length = length; 1178 lck->length = length;
1190 lck->type = type; 1179 lck->type = type;
1191 lck->offset = flock->fl_start; 1180 lck->offset = flock->fl_start;
1192 el = el->next;
1193 } 1181 }
1194 spin_unlock(&inode->i_lock); 1182 spin_unlock(&flctx->flc_lock);
1195 1183
1196 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1184 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1197 int stored_rc; 1185 int stored_rc;
diff --git a/fs/inode.c b/fs/inode.c
index aa149e7262ac..f30872ade6d7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -194,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
194#ifdef CONFIG_FSNOTIFY 194#ifdef CONFIG_FSNOTIFY
195 inode->i_fsnotify_mask = 0; 195 inode->i_fsnotify_mask = 0;
196#endif 196#endif
197 197 inode->i_flctx = NULL;
198 this_cpu_inc(nr_inodes); 198 this_cpu_inc(nr_inodes);
199 199
200 return 0; 200 return 0;
@@ -237,6 +237,7 @@ void __destroy_inode(struct inode *inode)
237 BUG_ON(inode_has_buffers(inode)); 237 BUG_ON(inode_has_buffers(inode));
238 security_inode_free(inode); 238 security_inode_free(inode);
239 fsnotify_inode_delete(inode); 239 fsnotify_inode_delete(inode);
240 locks_free_lock_context(inode->i_flctx);
240 if (!inode->i_nlink) { 241 if (!inode->i_nlink) {
241 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); 242 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
242 atomic_long_dec(&inode->i_sb->s_remove_count); 243 atomic_long_dec(&inode->i_sb->s_remove_count);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d12ff4e2dbe7..665ef5a05183 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
164{ 164{
165 struct inode *inode = nlmsvc_file_inode(file); 165 struct inode *inode = nlmsvc_file_inode(file);
166 struct file_lock *fl; 166 struct file_lock *fl;
167 struct file_lock_context *flctx = inode->i_flctx;
167 struct nlm_host *lockhost; 168 struct nlm_host *lockhost;
168 169
170 if (!flctx || list_empty_careful(&flctx->flc_posix))
171 return 0;
169again: 172again:
170 file->f_locks = 0; 173 file->f_locks = 0;
171 spin_lock(&inode->i_lock); 174 spin_lock(&flctx->flc_lock);
172 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 175 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
173 if (fl->fl_lmops != &nlmsvc_lock_operations) 176 if (fl->fl_lmops != &nlmsvc_lock_operations)
174 continue; 177 continue;
175 178
@@ -180,7 +183,7 @@ again:
180 if (match(lockhost, host)) { 183 if (match(lockhost, host)) {
181 struct file_lock lock = *fl; 184 struct file_lock lock = *fl;
182 185
183 spin_unlock(&inode->i_lock); 186 spin_unlock(&flctx->flc_lock);
184 lock.fl_type = F_UNLCK; 187 lock.fl_type = F_UNLCK;
185 lock.fl_start = 0; 188 lock.fl_start = 0;
186 lock.fl_end = OFFSET_MAX; 189 lock.fl_end = OFFSET_MAX;
@@ -192,7 +195,7 @@ again:
192 goto again; 195 goto again;
193 } 196 }
194 } 197 }
195 spin_unlock(&inode->i_lock); 198 spin_unlock(&flctx->flc_lock);
196 199
197 return 0; 200 return 0;
198} 201}
@@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file)
223{ 226{
224 struct inode *inode = nlmsvc_file_inode(file); 227 struct inode *inode = nlmsvc_file_inode(file);
225 struct file_lock *fl; 228 struct file_lock *fl;
229 struct file_lock_context *flctx = inode->i_flctx;
226 230
227 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) 231 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
228 return 1; 232 return 1;
229 233
230 spin_lock(&inode->i_lock); 234 if (flctx && !list_empty_careful(&flctx->flc_posix)) {
231 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 235 spin_lock(&flctx->flc_lock);
232 if (fl->fl_lmops == &nlmsvc_lock_operations) { 236 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
233 spin_unlock(&inode->i_lock); 237 if (fl->fl_lmops == &nlmsvc_lock_operations) {
234 return 1; 238 spin_unlock(&flctx->flc_lock);
239 return 1;
240 }
235 } 241 }
242 spin_unlock(&flctx->flc_lock);
236 } 243 }
237 spin_unlock(&inode->i_lock);
238 file->f_locks = 0; 244 file->f_locks = 0;
239 return 0; 245 return 0;
240} 246}
diff --git a/fs/locks.c b/fs/locks.c
index 59e2f905e4ff..4d0d41163a50 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl)
157int leases_enable = 1; 157int leases_enable = 1;
158int lease_break_time = 45; 158int lease_break_time = 45;
159 159
160#define for_each_lock(inode, lockp) \
161 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
162
163/* 160/*
164 * The global file_lock_list is only used for displaying /proc/locks, so we 161 * The global file_lock_list is only used for displaying /proc/locks, so we
165 * keep a list on each CPU, with each list protected by its own spinlock via 162 * keep a list on each CPU, with each list protected by its own spinlock via
166 * the file_lock_lglock. Note that alterations to the list also require that 163 * the file_lock_lglock. Note that alterations to the list also require that
167 * the relevant i_lock is held. 164 * the relevant flc_lock is held.
168 */ 165 */
169DEFINE_STATIC_LGLOCK(file_lock_lglock); 166DEFINE_STATIC_LGLOCK(file_lock_lglock);
170static DEFINE_PER_CPU(struct hlist_head, file_lock_list); 167static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
@@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
192 * contrast to those that are acting as records of acquired locks). 189 * contrast to those that are acting as records of acquired locks).
193 * 190 *
194 * Note that when we acquire this lock in order to change the above fields, 191 * Note that when we acquire this lock in order to change the above fields,
195 * we often hold the i_lock as well. In certain cases, when reading the fields 192 * we often hold the flc_lock as well. In certain cases, when reading the fields
196 * protected by this lock, we can skip acquiring it iff we already hold the 193 * protected by this lock, we can skip acquiring it iff we already hold the
197 * i_lock. 194 * flc_lock.
198 * 195 *
199 * In particular, adding an entry to the fl_block list requires that you hold 196 * In particular, adding an entry to the fl_block list requires that you hold
200 * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting 197 * both the flc_lock and the blocked_lock_lock (acquired in that order).
201 * an entry from the list however only requires the file_lock_lock. 198 * Deleting an entry from the list however only requires the file_lock_lock.
202 */ 199 */
203static DEFINE_SPINLOCK(blocked_lock_lock); 200static DEFINE_SPINLOCK(blocked_lock_lock);
204 201
202static struct kmem_cache *flctx_cache __read_mostly;
205static struct kmem_cache *filelock_cache __read_mostly; 203static struct kmem_cache *filelock_cache __read_mostly;
206 204
205static struct file_lock_context *
206locks_get_lock_context(struct inode *inode)
207{
208 struct file_lock_context *new;
209
210 if (likely(inode->i_flctx))
211 goto out;
212
213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
214 if (!new)
215 goto out;
216
217 spin_lock_init(&new->flc_lock);
218 INIT_LIST_HEAD(&new->flc_flock);
219 INIT_LIST_HEAD(&new->flc_posix);
220 INIT_LIST_HEAD(&new->flc_lease);
221
222 /*
223 * Assign the pointer if it's not already assigned. If it is, then
224 * free the context we just allocated.
225 */
226 spin_lock(&inode->i_lock);
227 if (likely(!inode->i_flctx)) {
228 inode->i_flctx = new;
229 new = NULL;
230 }
231 spin_unlock(&inode->i_lock);
232
233 if (new)
234 kmem_cache_free(flctx_cache, new);
235out:
236 return inode->i_flctx;
237}
238
239void
240locks_free_lock_context(struct file_lock_context *ctx)
241{
242 if (ctx) {
243 WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
244 WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
245 WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
246 kmem_cache_free(flctx_cache, ctx);
247 }
248}
249
207static void locks_init_lock_heads(struct file_lock *fl) 250static void locks_init_lock_heads(struct file_lock *fl)
208{ 251{
209 INIT_HLIST_NODE(&fl->fl_link); 252 INIT_HLIST_NODE(&fl->fl_link);
253 INIT_LIST_HEAD(&fl->fl_list);
210 INIT_LIST_HEAD(&fl->fl_block); 254 INIT_LIST_HEAD(&fl->fl_block);
211 init_waitqueue_head(&fl->fl_wait); 255 init_waitqueue_head(&fl->fl_wait);
212} 256}
@@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private);
243void locks_free_lock(struct file_lock *fl) 287void locks_free_lock(struct file_lock *fl)
244{ 288{
245 BUG_ON(waitqueue_active(&fl->fl_wait)); 289 BUG_ON(waitqueue_active(&fl->fl_wait));
290 BUG_ON(!list_empty(&fl->fl_list));
246 BUG_ON(!list_empty(&fl->fl_block)); 291 BUG_ON(!list_empty(&fl->fl_block));
247 BUG_ON(!hlist_unhashed(&fl->fl_link)); 292 BUG_ON(!hlist_unhashed(&fl->fl_link));
248 293
@@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose)
257 struct file_lock *fl; 302 struct file_lock *fl;
258 303
259 while (!list_empty(dispose)) { 304 while (!list_empty(dispose)) {
260 fl = list_first_entry(dispose, struct file_lock, fl_block); 305 fl = list_first_entry(dispose, struct file_lock, fl_list);
261 list_del_init(&fl->fl_block); 306 list_del_init(&fl->fl_list);
262 locks_free_lock(fl); 307 locks_free_lock(fl);
263 } 308 }
264} 309}
@@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
513 return fl1->fl_owner == fl2->fl_owner; 558 return fl1->fl_owner == fl2->fl_owner;
514} 559}
515 560
516/* Must be called with the i_lock held! */ 561/* Must be called with the flc_lock held! */
517static void locks_insert_global_locks(struct file_lock *fl) 562static void locks_insert_global_locks(struct file_lock *fl)
518{ 563{
519 lg_local_lock(&file_lock_lglock); 564 lg_local_lock(&file_lock_lglock);
@@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl)
522 lg_local_unlock(&file_lock_lglock); 567 lg_local_unlock(&file_lock_lglock);
523} 568}
524 569
525/* Must be called with the i_lock held! */ 570/* Must be called with the flc_lock held! */
526static void locks_delete_global_locks(struct file_lock *fl) 571static void locks_delete_global_locks(struct file_lock *fl)
527{ 572{
528 /* 573 /*
529 * Avoid taking lock if already unhashed. This is safe since this check 574 * Avoid taking lock if already unhashed. This is safe since this check
530 * is done while holding the i_lock, and new insertions into the list 575 * is done while holding the flc_lock, and new insertions into the list
531 * also require that it be held. 576 * also require that it be held.
532 */ 577 */
533 if (hlist_unhashed(&fl->fl_link)) 578 if (hlist_unhashed(&fl->fl_link))
@@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter)
579 * the order they blocked. The documentation doesn't require this but 624 * the order they blocked. The documentation doesn't require this but
580 * it seems like the reasonable thing to do. 625 * it seems like the reasonable thing to do.
581 * 626 *
582 * Must be called with both the i_lock and blocked_lock_lock held. The fl_block 627 * Must be called with both the flc_lock and blocked_lock_lock held. The
583 * list itself is protected by the blocked_lock_lock, but by ensuring that the 628 * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
584 * i_lock is also held on insertions we can avoid taking the blocked_lock_lock 629 * that the flc_lock is also held on insertions we can avoid taking the
585 * in some cases when we see that the fl_block list is empty. 630 * blocked_lock_lock in some cases when we see that the fl_block list is empty.
586 */ 631 */
587static void __locks_insert_block(struct file_lock *blocker, 632static void __locks_insert_block(struct file_lock *blocker,
588 struct file_lock *waiter) 633 struct file_lock *waiter)
@@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker,
594 locks_insert_global_blocked(waiter); 639 locks_insert_global_blocked(waiter);
595} 640}
596 641
597/* Must be called with i_lock held. */ 642/* Must be called with flc_lock held. */
598static void locks_insert_block(struct file_lock *blocker, 643static void locks_insert_block(struct file_lock *blocker,
599 struct file_lock *waiter) 644 struct file_lock *waiter)
600{ 645{
@@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker,
606/* 651/*
607 * Wake up processes blocked waiting for blocker. 652 * Wake up processes blocked waiting for blocker.
608 * 653 *
609 * Must be called with the inode->i_lock held! 654 * Must be called with the inode->flc_lock held!
610 */ 655 */
611static void locks_wake_up_blocks(struct file_lock *blocker) 656static void locks_wake_up_blocks(struct file_lock *blocker)
612{ 657{
613 /* 658 /*
614 * Avoid taking global lock if list is empty. This is safe since new 659 * Avoid taking global lock if list is empty. This is safe since new
615 * blocked requests are only added to the list under the i_lock, and 660 * blocked requests are only added to the list under the flc_lock, and
616 * the i_lock is always held here. Note that removal from the fl_block 661 * the flc_lock is always held here. Note that removal from the fl_block
617 * list does not require the i_lock, so we must recheck list_empty() 662 * list does not require the flc_lock, so we must recheck list_empty()
618 * after acquiring the blocked_lock_lock. 663 * after acquiring the blocked_lock_lock.
619 */ 664 */
620 if (list_empty(&blocker->fl_block)) 665 if (list_empty(&blocker->fl_block))
@@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
635 spin_unlock(&blocked_lock_lock); 680 spin_unlock(&blocked_lock_lock);
636} 681}
637 682
638/* Insert file lock fl into an inode's lock list at the position indicated 683static void
639 * by pos. At the same time add the lock to the global file lock list. 684locks_insert_lock_ctx(struct file_lock *fl, int *counter,
640 * 685 struct list_head *before)
641 * Must be called with the i_lock held!
642 */
643static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
644{ 686{
645 fl->fl_nspid = get_pid(task_tgid(current)); 687 fl->fl_nspid = get_pid(task_tgid(current));
646 688 list_add_tail(&fl->fl_list, before);
647 /* insert into file's list */ 689 ++*counter;
648 fl->fl_next = *pos;
649 *pos = fl;
650
651 locks_insert_global_locks(fl); 690 locks_insert_global_locks(fl);
652} 691}
653 692
654/** 693static void
655 * locks_delete_lock - Delete a lock and then free it. 694locks_unlink_lock_ctx(struct file_lock *fl, int *counter)
656 * @thisfl_p: pointer that points to the fl_next field of the previous
657 * inode->i_flock list entry
658 *
659 * Unlink a lock from all lists and free the namespace reference, but don't
660 * free it yet. Wake up processes that are blocked waiting for this lock and
661 * notify the FS that the lock has been cleared.
662 *
663 * Must be called with the i_lock held!
664 */
665static void locks_unlink_lock(struct file_lock **thisfl_p)
666{ 695{
667 struct file_lock *fl = *thisfl_p;
668
669 locks_delete_global_locks(fl); 696 locks_delete_global_locks(fl);
670 697 list_del_init(&fl->fl_list);
671 *thisfl_p = fl->fl_next; 698 --*counter;
672 fl->fl_next = NULL;
673
674 if (fl->fl_nspid) { 699 if (fl->fl_nspid) {
675 put_pid(fl->fl_nspid); 700 put_pid(fl->fl_nspid);
676 fl->fl_nspid = NULL; 701 fl->fl_nspid = NULL;
677 } 702 }
678
679 locks_wake_up_blocks(fl); 703 locks_wake_up_blocks(fl);
680} 704}
681 705
682/* 706static void
683 * Unlink a lock from all lists and free it. 707locks_delete_lock_ctx(struct file_lock *fl, int *counter,
684 * 708 struct list_head *dispose)
685 * Must be called with i_lock held!
686 */
687static void locks_delete_lock(struct file_lock **thisfl_p,
688 struct list_head *dispose)
689{ 709{
690 struct file_lock *fl = *thisfl_p; 710 locks_unlink_lock_ctx(fl, counter);
691
692 locks_unlink_lock(thisfl_p);
693 if (dispose) 711 if (dispose)
694 list_add(&fl->fl_block, dispose); 712 list_add(&fl->fl_list, dispose);
695 else 713 else
696 locks_free_lock(fl); 714 locks_free_lock(fl);
697} 715}
@@ -746,22 +764,27 @@ void
746posix_test_lock(struct file *filp, struct file_lock *fl) 764posix_test_lock(struct file *filp, struct file_lock *fl)
747{ 765{
748 struct file_lock *cfl; 766 struct file_lock *cfl;
767 struct file_lock_context *ctx;
749 struct inode *inode = file_inode(filp); 768 struct inode *inode = file_inode(filp);
750 769
751 spin_lock(&inode->i_lock); 770 ctx = inode->i_flctx;
752 for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { 771 if (!ctx || list_empty_careful(&ctx->flc_posix)) {
753 if (!IS_POSIX(cfl))
754 continue;
755 if (posix_locks_conflict(fl, cfl))
756 break;
757 }
758 if (cfl) {
759 locks_copy_conflock(fl, cfl);
760 if (cfl->fl_nspid)
761 fl->fl_pid = pid_vnr(cfl->fl_nspid);
762 } else
763 fl->fl_type = F_UNLCK; 772 fl->fl_type = F_UNLCK;
764 spin_unlock(&inode->i_lock); 773 return;
774 }
775
776 spin_lock(&ctx->flc_lock);
777 list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
778 if (posix_locks_conflict(fl, cfl)) {
779 locks_copy_conflock(fl, cfl);
780 if (cfl->fl_nspid)
781 fl->fl_pid = pid_vnr(cfl->fl_nspid);
782 goto out;
783 }
784 }
785 fl->fl_type = F_UNLCK;
786out:
787 spin_unlock(&ctx->flc_lock);
765 return; 788 return;
766} 789}
767EXPORT_SYMBOL(posix_test_lock); 790EXPORT_SYMBOL(posix_test_lock);
@@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
845static int flock_lock_file(struct file *filp, struct file_lock *request) 868static int flock_lock_file(struct file *filp, struct file_lock *request)
846{ 869{
847 struct file_lock *new_fl = NULL; 870 struct file_lock *new_fl = NULL;
848 struct file_lock **before; 871 struct file_lock *fl;
849 struct inode * inode = file_inode(filp); 872 struct file_lock_context *ctx;
873 struct inode *inode = file_inode(filp);
850 int error = 0; 874 int error = 0;
851 int found = 0; 875 bool found = false;
852 LIST_HEAD(dispose); 876 LIST_HEAD(dispose);
853 877
878 ctx = locks_get_lock_context(inode);
879 if (!ctx)
880 return -ENOMEM;
881
854 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 882 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
855 new_fl = locks_alloc_lock(); 883 new_fl = locks_alloc_lock();
856 if (!new_fl) 884 if (!new_fl)
857 return -ENOMEM; 885 return -ENOMEM;
858 } 886 }
859 887
860 spin_lock(&inode->i_lock); 888 spin_lock(&ctx->flc_lock);
861 if (request->fl_flags & FL_ACCESS) 889 if (request->fl_flags & FL_ACCESS)
862 goto find_conflict; 890 goto find_conflict;
863 891
864 for_each_lock(inode, before) { 892 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
865 struct file_lock *fl = *before;
866 if (IS_POSIX(fl))
867 break;
868 if (IS_LEASE(fl))
869 continue;
870 if (filp != fl->fl_file) 893 if (filp != fl->fl_file)
871 continue; 894 continue;
872 if (request->fl_type == fl->fl_type) 895 if (request->fl_type == fl->fl_type)
873 goto out; 896 goto out;
874 found = 1; 897 found = true;
875 locks_delete_lock(before, &dispose); 898 locks_delete_lock_ctx(fl, &ctx->flc_flock_cnt, &dispose);
876 break; 899 break;
877 } 900 }
878 901
@@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
887 * give it the opportunity to lock the file. 910 * give it the opportunity to lock the file.
888 */ 911 */
889 if (found) { 912 if (found) {
890 spin_unlock(&inode->i_lock); 913 spin_unlock(&ctx->flc_lock);
891 cond_resched(); 914 cond_resched();
892 spin_lock(&inode->i_lock); 915 spin_lock(&ctx->flc_lock);
893 } 916 }
894 917
895find_conflict: 918find_conflict:
896 for_each_lock(inode, before) { 919 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
897 struct file_lock *fl = *before;
898 if (IS_POSIX(fl))
899 break;
900 if (IS_LEASE(fl))
901 continue;
902 if (!flock_locks_conflict(request, fl)) 920 if (!flock_locks_conflict(request, fl))
903 continue; 921 continue;
904 error = -EAGAIN; 922 error = -EAGAIN;
@@ -911,12 +929,12 @@ find_conflict:
911 if (request->fl_flags & FL_ACCESS) 929 if (request->fl_flags & FL_ACCESS)
912 goto out; 930 goto out;
913 locks_copy_lock(new_fl, request); 931 locks_copy_lock(new_fl, request);
914 locks_insert_lock(before, new_fl); 932 locks_insert_lock_ctx(new_fl, &ctx->flc_flock_cnt, &ctx->flc_flock);
915 new_fl = NULL; 933 new_fl = NULL;
916 error = 0; 934 error = 0;
917 935
918out: 936out:
919 spin_unlock(&inode->i_lock); 937 spin_unlock(&ctx->flc_lock);
920 if (new_fl) 938 if (new_fl)
921 locks_free_lock(new_fl); 939 locks_free_lock(new_fl);
922 locks_dispose_list(&dispose); 940 locks_dispose_list(&dispose);
@@ -925,16 +943,20 @@ out:
925 943
926static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 944static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
927{ 945{
928 struct file_lock *fl; 946 struct file_lock *fl, *tmp;
929 struct file_lock *new_fl = NULL; 947 struct file_lock *new_fl = NULL;
930 struct file_lock *new_fl2 = NULL; 948 struct file_lock *new_fl2 = NULL;
931 struct file_lock *left = NULL; 949 struct file_lock *left = NULL;
932 struct file_lock *right = NULL; 950 struct file_lock *right = NULL;
933 struct file_lock **before; 951 struct file_lock_context *ctx;
934 int error; 952 int error;
935 bool added = false; 953 bool added = false;
936 LIST_HEAD(dispose); 954 LIST_HEAD(dispose);
937 955
956 ctx = locks_get_lock_context(inode);
957 if (!ctx)
958 return -ENOMEM;
959
938 /* 960 /*
939 * We may need two file_lock structures for this operation, 961 * We may need two file_lock structures for this operation,
940 * so we get them in advance to avoid races. 962 * so we get them in advance to avoid races.
@@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
948 new_fl2 = locks_alloc_lock(); 970 new_fl2 = locks_alloc_lock();
949 } 971 }
950 972
951 spin_lock(&inode->i_lock); 973 spin_lock(&ctx->flc_lock);
952 /* 974 /*
953 * New lock request. Walk all POSIX locks and look for conflicts. If 975 * New lock request. Walk all POSIX locks and look for conflicts. If
954 * there are any, either return error or put the request on the 976 * there are any, either return error or put the request on the
955 * blocker's list of waiters and the global blocked_hash. 977 * blocker's list of waiters and the global blocked_hash.
956 */ 978 */
957 if (request->fl_type != F_UNLCK) { 979 if (request->fl_type != F_UNLCK) {
958 for_each_lock(inode, before) { 980 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
959 fl = *before;
960 if (!IS_POSIX(fl)) 981 if (!IS_POSIX(fl))
961 continue; 982 continue;
962 if (!posix_locks_conflict(request, fl)) 983 if (!posix_locks_conflict(request, fl))
@@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
986 if (request->fl_flags & FL_ACCESS) 1007 if (request->fl_flags & FL_ACCESS)
987 goto out; 1008 goto out;
988 1009
989 /* 1010 /* Find the first old lock with the same owner as the new lock */
990 * Find the first old lock with the same owner as the new lock. 1011 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
991 */ 1012 if (posix_same_owner(request, fl))
992 1013 break;
993 before = &inode->i_flock;
994
995 /* First skip locks owned by other processes. */
996 while ((fl = *before) && (!IS_POSIX(fl) ||
997 !posix_same_owner(request, fl))) {
998 before = &fl->fl_next;
999 } 1014 }
1000 1015
1001 /* Process locks with this owner. */ 1016 /* Process locks with this owner. */
1002 while ((fl = *before) && posix_same_owner(request, fl)) { 1017 list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1003 /* Detect adjacent or overlapping regions (if same lock type) 1018 if (!posix_same_owner(request, fl))
1004 */ 1019 break;
1020
1021 /* Detect adjacent or overlapping regions (if same lock type) */
1005 if (request->fl_type == fl->fl_type) { 1022 if (request->fl_type == fl->fl_type) {
1006 /* In all comparisons of start vs end, use 1023 /* In all comparisons of start vs end, use
1007 * "start - 1" rather than "end + 1". If end 1024 * "start - 1" rather than "end + 1". If end
1008 * is OFFSET_MAX, end + 1 will become negative. 1025 * is OFFSET_MAX, end + 1 will become negative.
1009 */ 1026 */
1010 if (fl->fl_end < request->fl_start - 1) 1027 if (fl->fl_end < request->fl_start - 1)
1011 goto next_lock; 1028 continue;
1012 /* If the next lock in the list has entirely bigger 1029 /* If the next lock in the list has entirely bigger
1013 * addresses than the new one, insert the lock here. 1030 * addresses than the new one, insert the lock here.
1014 */ 1031 */
@@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1029 else 1046 else
1030 request->fl_end = fl->fl_end; 1047 request->fl_end = fl->fl_end;
1031 if (added) { 1048 if (added) {
1032 locks_delete_lock(before, &dispose); 1049 locks_delete_lock_ctx(fl, &ctx->flc_posix_cnt,
1050 &dispose);
1033 continue; 1051 continue;
1034 } 1052 }
1035 request = fl; 1053 request = fl;
1036 added = true; 1054 added = true;
1037 } 1055 } else {
1038 else {
1039 /* Processing for different lock types is a bit 1056 /* Processing for different lock types is a bit
1040 * more complex. 1057 * more complex.
1041 */ 1058 */
1042 if (fl->fl_end < request->fl_start) 1059 if (fl->fl_end < request->fl_start)
1043 goto next_lock; 1060 continue;
1044 if (fl->fl_start > request->fl_end) 1061 if (fl->fl_start > request->fl_end)
1045 break; 1062 break;
1046 if (request->fl_type == F_UNLCK) 1063 if (request->fl_type == F_UNLCK)
@@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1059 * one (This may happen several times). 1076 * one (This may happen several times).
1060 */ 1077 */
1061 if (added) { 1078 if (added) {
1062 locks_delete_lock(before, &dispose); 1079 locks_delete_lock_ctx(fl,
1080 &ctx->flc_posix_cnt, &dispose);
1063 continue; 1081 continue;
1064 } 1082 }
1065 /* 1083 /*
@@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1075 locks_copy_lock(new_fl, request); 1093 locks_copy_lock(new_fl, request);
1076 request = new_fl; 1094 request = new_fl;
1077 new_fl = NULL; 1095 new_fl = NULL;
1078 locks_delete_lock(before, &dispose); 1096 locks_insert_lock_ctx(request,
1079 locks_insert_lock(before, request); 1097 &ctx->flc_posix_cnt, &fl->fl_list);
1098 locks_delete_lock_ctx(fl,
1099 &ctx->flc_posix_cnt, &dispose);
1080 added = true; 1100 added = true;
1081 } 1101 }
1082 } 1102 }
1083 /* Go on to next lock.
1084 */
1085 next_lock:
1086 before = &fl->fl_next;
1087 } 1103 }
1088 1104
1089 /* 1105 /*
@@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1108 goto out; 1124 goto out;
1109 } 1125 }
1110 locks_copy_lock(new_fl, request); 1126 locks_copy_lock(new_fl, request);
1111 locks_insert_lock(before, new_fl); 1127 locks_insert_lock_ctx(new_fl, &ctx->flc_posix_cnt,
1128 &fl->fl_list);
1112 new_fl = NULL; 1129 new_fl = NULL;
1113 } 1130 }
1114 if (right) { 1131 if (right) {
@@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1119 left = new_fl2; 1136 left = new_fl2;
1120 new_fl2 = NULL; 1137 new_fl2 = NULL;
1121 locks_copy_lock(left, right); 1138 locks_copy_lock(left, right);
1122 locks_insert_lock(before, left); 1139 locks_insert_lock_ctx(left, &ctx->flc_posix_cnt,
1140 &fl->fl_list);
1123 } 1141 }
1124 right->fl_start = request->fl_end + 1; 1142 right->fl_start = request->fl_end + 1;
1125 locks_wake_up_blocks(right); 1143 locks_wake_up_blocks(right);
@@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1129 locks_wake_up_blocks(left); 1147 locks_wake_up_blocks(left);
1130 } 1148 }
1131 out: 1149 out:
1132 spin_unlock(&inode->i_lock); 1150 spin_unlock(&ctx->flc_lock);
1133 /* 1151 /*
1134 * Free any unused locks. 1152 * Free any unused locks.
1135 */ 1153 */
@@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait);
1199 */ 1217 */
1200int locks_mandatory_locked(struct file *file) 1218int locks_mandatory_locked(struct file *file)
1201{ 1219{
1220 int ret;
1202 struct inode *inode = file_inode(file); 1221 struct inode *inode = file_inode(file);
1222 struct file_lock_context *ctx;
1203 struct file_lock *fl; 1223 struct file_lock *fl;
1204 1224
1225 ctx = inode->i_flctx;
1226 if (!ctx || list_empty_careful(&ctx->flc_posix))
1227 return 0;
1228
1205 /* 1229 /*
1206 * Search the lock list for this inode for any POSIX locks. 1230 * Search the lock list for this inode for any POSIX locks.
1207 */ 1231 */
1208 spin_lock(&inode->i_lock); 1232 spin_lock(&ctx->flc_lock);
1209 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1233 ret = 0;
1210 if (!IS_POSIX(fl)) 1234 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1211 continue;
1212 if (fl->fl_owner != current->files && 1235 if (fl->fl_owner != current->files &&
1213 fl->fl_owner != file) 1236 fl->fl_owner != file) {
1237 ret = -EAGAIN;
1214 break; 1238 break;
1239 }
1215 } 1240 }
1216 spin_unlock(&inode->i_lock); 1241 spin_unlock(&ctx->flc_lock);
1217 return fl ? -EAGAIN : 0; 1242 return ret;
1218} 1243}
1219 1244
1220/** 1245/**
@@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
1294} 1319}
1295 1320
1296/* We already had a lease on this file; just change its type */ 1321/* We already had a lease on this file; just change its type */
1297int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) 1322int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1298{ 1323{
1299 struct file_lock *fl = *before; 1324 struct file_lock_context *flctx;
1300 int error = assign_type(fl, arg); 1325 int error = assign_type(fl, arg);
1301 1326
1302 if (error) 1327 if (error)
@@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
1306 if (arg == F_UNLCK) { 1331 if (arg == F_UNLCK) {
1307 struct file *filp = fl->fl_file; 1332 struct file *filp = fl->fl_file;
1308 1333
1334 flctx = file_inode(filp)->i_flctx;
1309 f_delown(filp); 1335 f_delown(filp);
1310 filp->f_owner.signum = 0; 1336 filp->f_owner.signum = 0;
1311 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); 1337 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
@@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
1313 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 1339 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1314 fl->fl_fasync = NULL; 1340 fl->fl_fasync = NULL;
1315 } 1341 }
1316 locks_delete_lock(before, dispose); 1342 locks_delete_lock_ctx(fl, &flctx->flc_lease_cnt, dispose);
1317 } 1343 }
1318 return 0; 1344 return 0;
1319} 1345}
@@ -1329,20 +1355,17 @@ static bool past_time(unsigned long then)
1329 1355
1330static void time_out_leases(struct inode *inode, struct list_head *dispose) 1356static void time_out_leases(struct inode *inode, struct list_head *dispose)
1331{ 1357{
1332 struct file_lock **before; 1358 struct file_lock_context *ctx = inode->i_flctx;
1333 struct file_lock *fl; 1359 struct file_lock *fl, *tmp;
1334 1360
1335 lockdep_assert_held(&inode->i_lock); 1361 lockdep_assert_held(&ctx->flc_lock);
1336 1362
1337 before = &inode->i_flock; 1363 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1338 while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
1339 trace_time_out_leases(inode, fl); 1364 trace_time_out_leases(inode, fl);
1340 if (past_time(fl->fl_downgrade_time)) 1365 if (past_time(fl->fl_downgrade_time))
1341 lease_modify(before, F_RDLCK, dispose); 1366 lease_modify(fl, F_RDLCK, dispose);
1342 if (past_time(fl->fl_break_time)) 1367 if (past_time(fl->fl_break_time))
1343 lease_modify(before, F_UNLCK, dispose); 1368 lease_modify(fl, F_UNLCK, dispose);
1344 if (fl == *before) /* lease_modify may have freed fl */
1345 before = &fl->fl_next;
1346 } 1369 }
1347} 1370}
1348 1371
@@ -1356,11 +1379,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1356static bool 1379static bool
1357any_leases_conflict(struct inode *inode, struct file_lock *breaker) 1380any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1358{ 1381{
1382 struct file_lock_context *ctx = inode->i_flctx;
1359 struct file_lock *fl; 1383 struct file_lock *fl;
1360 1384
1361 lockdep_assert_held(&inode->i_lock); 1385 lockdep_assert_held(&ctx->flc_lock);
1362 1386
1363 for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { 1387 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1364 if (leases_conflict(fl, breaker)) 1388 if (leases_conflict(fl, breaker))
1365 return true; 1389 return true;
1366 } 1390 }
@@ -1384,7 +1408,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1384{ 1408{
1385 int error = 0; 1409 int error = 0;
1386 struct file_lock *new_fl; 1410 struct file_lock *new_fl;
1387 struct file_lock *fl, **before; 1411 struct file_lock_context *ctx = inode->i_flctx;
1412 struct file_lock *fl;
1388 unsigned long break_time; 1413 unsigned long break_time;
1389 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1414 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1390 LIST_HEAD(dispose); 1415 LIST_HEAD(dispose);
@@ -1394,7 +1419,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1394 return PTR_ERR(new_fl); 1419 return PTR_ERR(new_fl);
1395 new_fl->fl_flags = type; 1420 new_fl->fl_flags = type;
1396 1421
1397 spin_lock(&inode->i_lock); 1422 /* typically we will check that ctx is non-NULL before calling */
1423 if (!ctx) {
1424 WARN_ON_ONCE(1);
1425 return error;
1426 }
1427
1428 spin_lock(&ctx->flc_lock);
1398 1429
1399 time_out_leases(inode, &dispose); 1430 time_out_leases(inode, &dispose);
1400 1431
@@ -1408,9 +1439,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1408 break_time++; /* so that 0 means no break time */ 1439 break_time++; /* so that 0 means no break time */
1409 } 1440 }
1410 1441
1411 for (before = &inode->i_flock; 1442 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1412 ((fl = *before) != NULL) && IS_LEASE(fl);
1413 before = &fl->fl_next) {
1414 if (!leases_conflict(fl, new_fl)) 1443 if (!leases_conflict(fl, new_fl))
1415 continue; 1444 continue;
1416 if (want_write) { 1445 if (want_write) {
@@ -1419,17 +1448,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1419 fl->fl_flags |= FL_UNLOCK_PENDING; 1448 fl->fl_flags |= FL_UNLOCK_PENDING;
1420 fl->fl_break_time = break_time; 1449 fl->fl_break_time = break_time;
1421 } else { 1450 } else {
1422 if (lease_breaking(inode->i_flock)) 1451 if (lease_breaking(fl))
1423 continue; 1452 continue;
1424 fl->fl_flags |= FL_DOWNGRADE_PENDING; 1453 fl->fl_flags |= FL_DOWNGRADE_PENDING;
1425 fl->fl_downgrade_time = break_time; 1454 fl->fl_downgrade_time = break_time;
1426 } 1455 }
1427 if (fl->fl_lmops->lm_break(fl)) 1456 if (fl->fl_lmops->lm_break(fl))
1428 locks_delete_lock(before, &dispose); 1457 locks_delete_lock_ctx(fl, &ctx->flc_lease_cnt,
1458 &dispose);
1429 } 1459 }
1430 1460
1431 fl = inode->i_flock; 1461 if (list_empty(&ctx->flc_lease))
1432 if (!fl || !IS_LEASE(fl))
1433 goto out; 1462 goto out;
1434 1463
1435 if (mode & O_NONBLOCK) { 1464 if (mode & O_NONBLOCK) {
@@ -1439,18 +1468,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1439 } 1468 }
1440 1469
1441restart: 1470restart:
1442 break_time = inode->i_flock->fl_break_time; 1471 fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1472 break_time = fl->fl_break_time;
1443 if (break_time != 0) 1473 if (break_time != 0)
1444 break_time -= jiffies; 1474 break_time -= jiffies;
1445 if (break_time == 0) 1475 if (break_time == 0)
1446 break_time++; 1476 break_time++;
1447 locks_insert_block(inode->i_flock, new_fl); 1477 locks_insert_block(fl, new_fl);
1448 trace_break_lease_block(inode, new_fl); 1478 trace_break_lease_block(inode, new_fl);
1449 spin_unlock(&inode->i_lock); 1479 spin_unlock(&ctx->flc_lock);
1450 locks_dispose_list(&dispose); 1480 locks_dispose_list(&dispose);
1451 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1481 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1452 !new_fl->fl_next, break_time); 1482 !new_fl->fl_next, break_time);
1453 spin_lock(&inode->i_lock); 1483 spin_lock(&ctx->flc_lock);
1454 trace_break_lease_unblock(inode, new_fl); 1484 trace_break_lease_unblock(inode, new_fl);
1455 locks_delete_block(new_fl); 1485 locks_delete_block(new_fl);
1456 if (error >= 0) { 1486 if (error >= 0) {
@@ -1462,12 +1492,10 @@ restart:
1462 time_out_leases(inode, &dispose); 1492 time_out_leases(inode, &dispose);
1463 if (any_leases_conflict(inode, new_fl)) 1493 if (any_leases_conflict(inode, new_fl))
1464 goto restart; 1494 goto restart;
1465
1466 error = 0; 1495 error = 0;
1467 } 1496 }
1468
1469out: 1497out:
1470 spin_unlock(&inode->i_lock); 1498 spin_unlock(&ctx->flc_lock);
1471 locks_dispose_list(&dispose); 1499 locks_dispose_list(&dispose);
1472 locks_free_lock(new_fl); 1500 locks_free_lock(new_fl);
1473 return error; 1501 return error;
@@ -1487,14 +1515,18 @@ EXPORT_SYMBOL(__break_lease);
1487void lease_get_mtime(struct inode *inode, struct timespec *time) 1515void lease_get_mtime(struct inode *inode, struct timespec *time)
1488{ 1516{
1489 bool has_lease = false; 1517 bool has_lease = false;
1490 struct file_lock *flock; 1518 struct file_lock_context *ctx = inode->i_flctx;
1519 struct file_lock *fl;
1491 1520
1492 if (inode->i_flock) { 1521 if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1493 spin_lock(&inode->i_lock); 1522 spin_lock(&ctx->flc_lock);
1494 flock = inode->i_flock; 1523 if (!list_empty(&ctx->flc_lease)) {
1495 if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) 1524 fl = list_first_entry(&ctx->flc_lease,
1496 has_lease = true; 1525 struct file_lock, fl_list);
1497 spin_unlock(&inode->i_lock); 1526 if (fl->fl_type == F_WRLCK)
1527 has_lease = true;
1528 }
1529 spin_unlock(&ctx->flc_lock);
1498 } 1530 }
1499 1531
1500 if (has_lease) 1532 if (has_lease)
@@ -1532,20 +1564,22 @@ int fcntl_getlease(struct file *filp)
1532{ 1564{
1533 struct file_lock *fl; 1565 struct file_lock *fl;
1534 struct inode *inode = file_inode(filp); 1566 struct inode *inode = file_inode(filp);
1567 struct file_lock_context *ctx = inode->i_flctx;
1535 int type = F_UNLCK; 1568 int type = F_UNLCK;
1536 LIST_HEAD(dispose); 1569 LIST_HEAD(dispose);
1537 1570
1538 spin_lock(&inode->i_lock); 1571 if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1539 time_out_leases(file_inode(filp), &dispose); 1572 spin_lock(&ctx->flc_lock);
1540 for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); 1573 time_out_leases(file_inode(filp), &dispose);
1541 fl = fl->fl_next) { 1574 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1542 if (fl->fl_file == filp) { 1575 if (fl->fl_file != filp)
1576 continue;
1543 type = target_leasetype(fl); 1577 type = target_leasetype(fl);
1544 break; 1578 break;
1545 } 1579 }
1580 spin_unlock(&ctx->flc_lock);
1581 locks_dispose_list(&dispose);
1546 } 1582 }
1547 spin_unlock(&inode->i_lock);
1548 locks_dispose_list(&dispose);
1549 return type; 1583 return type;
1550} 1584}
1551 1585
@@ -1578,9 +1612,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg)
1578static int 1612static int
1579generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) 1613generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
1580{ 1614{
1581 struct file_lock *fl, **before, **my_before = NULL, *lease; 1615 struct file_lock *fl, *my_fl = NULL, *lease;
1582 struct dentry *dentry = filp->f_path.dentry; 1616 struct dentry *dentry = filp->f_path.dentry;
1583 struct inode *inode = dentry->d_inode; 1617 struct inode *inode = dentry->d_inode;
1618 struct file_lock_context *ctx;
1584 bool is_deleg = (*flp)->fl_flags & FL_DELEG; 1619 bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1585 int error; 1620 int error;
1586 LIST_HEAD(dispose); 1621 LIST_HEAD(dispose);
@@ -1588,6 +1623,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1588 lease = *flp; 1623 lease = *flp;
1589 trace_generic_add_lease(inode, lease); 1624 trace_generic_add_lease(inode, lease);
1590 1625
1626 ctx = locks_get_lock_context(inode);
1627 if (!ctx)
1628 return -ENOMEM;
1629
1591 /* 1630 /*
1592 * In the delegation case we need mutual exclusion with 1631 * In the delegation case we need mutual exclusion with
1593 * a number of operations that take the i_mutex. We trylock 1632 * a number of operations that take the i_mutex. We trylock
@@ -1606,7 +1645,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1606 return -EINVAL; 1645 return -EINVAL;
1607 } 1646 }
1608 1647
1609 spin_lock(&inode->i_lock); 1648 spin_lock(&ctx->flc_lock);
1610 time_out_leases(inode, &dispose); 1649 time_out_leases(inode, &dispose);
1611 error = check_conflicting_open(dentry, arg); 1650 error = check_conflicting_open(dentry, arg);
1612 if (error) 1651 if (error)
@@ -1621,13 +1660,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1621 * except for this filp. 1660 * except for this filp.
1622 */ 1661 */
1623 error = -EAGAIN; 1662 error = -EAGAIN;
1624 for (before = &inode->i_flock; 1663 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1625 ((fl = *before) != NULL) && IS_LEASE(fl);
1626 before = &fl->fl_next) {
1627 if (fl->fl_file == filp) { 1664 if (fl->fl_file == filp) {
1628 my_before = before; 1665 my_fl = fl;
1629 continue; 1666 continue;
1630 } 1667 }
1668
1631 /* 1669 /*
1632 * No exclusive leases if someone else has a lease on 1670 * No exclusive leases if someone else has a lease on
1633 * this file: 1671 * this file:
@@ -1642,9 +1680,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1642 goto out; 1680 goto out;
1643 } 1681 }
1644 1682
1645 if (my_before != NULL) { 1683 if (my_fl != NULL) {
1646 lease = *my_before; 1684 error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
1647 error = lease->fl_lmops->lm_change(my_before, arg, &dispose);
1648 if (error) 1685 if (error)
1649 goto out; 1686 goto out;
1650 goto out_setup; 1687 goto out_setup;
@@ -1654,7 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1654 if (!leases_enable) 1691 if (!leases_enable)
1655 goto out; 1692 goto out;
1656 1693
1657 locks_insert_lock(before, lease); 1694 locks_insert_lock_ctx(lease, &ctx->flc_lease_cnt, &ctx->flc_lease);
1658 /* 1695 /*
1659 * The check in break_lease() is lockless. It's possible for another 1696 * The check in break_lease() is lockless. It's possible for another
1660 * open to race in after we did the earlier check for a conflicting 1697 * open to race in after we did the earlier check for a conflicting
@@ -1666,45 +1703,49 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1666 */ 1703 */
1667 smp_mb(); 1704 smp_mb();
1668 error = check_conflicting_open(dentry, arg); 1705 error = check_conflicting_open(dentry, arg);
1669 if (error) 1706 if (error) {
1670 goto out_unlink; 1707 locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt);
1708 goto out;
1709 }
1671 1710
1672out_setup: 1711out_setup:
1673 if (lease->fl_lmops->lm_setup) 1712 if (lease->fl_lmops->lm_setup)
1674 lease->fl_lmops->lm_setup(lease, priv); 1713 lease->fl_lmops->lm_setup(lease, priv);
1675out: 1714out:
1676 spin_unlock(&inode->i_lock); 1715 spin_unlock(&ctx->flc_lock);
1677 locks_dispose_list(&dispose); 1716 locks_dispose_list(&dispose);
1678 if (is_deleg) 1717 if (is_deleg)
1679 mutex_unlock(&inode->i_mutex); 1718 mutex_unlock(&inode->i_mutex);
1680 if (!error && !my_before) 1719 if (!error && !my_fl)
1681 *flp = NULL; 1720 *flp = NULL;
1682 return error; 1721 return error;
1683out_unlink:
1684 locks_unlink_lock(before);
1685 goto out;
1686} 1722}
1687 1723
1688static int generic_delete_lease(struct file *filp) 1724static int generic_delete_lease(struct file *filp)
1689{ 1725{
1690 int error = -EAGAIN; 1726 int error = -EAGAIN;
1691 struct file_lock *fl, **before; 1727 struct file_lock *fl, *victim = NULL;
1692 struct dentry *dentry = filp->f_path.dentry; 1728 struct dentry *dentry = filp->f_path.dentry;
1693 struct inode *inode = dentry->d_inode; 1729 struct inode *inode = dentry->d_inode;
1730 struct file_lock_context *ctx = inode->i_flctx;
1694 LIST_HEAD(dispose); 1731 LIST_HEAD(dispose);
1695 1732
1696 spin_lock(&inode->i_lock); 1733 if (!ctx) {
1697 time_out_leases(inode, &dispose); 1734 trace_generic_delete_lease(inode, NULL);
1698 for (before = &inode->i_flock; 1735 return error;
1699 ((fl = *before) != NULL) && IS_LEASE(fl); 1736 }
1700 before = &fl->fl_next) { 1737
1701 if (fl->fl_file == filp) 1738 spin_lock(&ctx->flc_lock);
1739 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1740 if (fl->fl_file == filp) {
1741 victim = fl;
1702 break; 1742 break;
1743 }
1703 } 1744 }
1704 trace_generic_delete_lease(inode, fl); 1745 trace_generic_delete_lease(inode, fl);
1705 if (fl && IS_LEASE(fl)) 1746 if (victim)
1706 error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); 1747 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1707 spin_unlock(&inode->i_lock); 1748 spin_unlock(&ctx->flc_lock);
1708 locks_dispose_list(&dispose); 1749 locks_dispose_list(&dispose);
1709 return error; 1750 return error;
1710} 1751}
@@ -2171,7 +2212,7 @@ again:
2171 */ 2212 */
2172 /* 2213 /*
2173 * we need that spin_lock here - it prevents reordering between 2214 * we need that spin_lock here - it prevents reordering between
2174 * update of inode->i_flock and check for it done in close(). 2215 * update of i_flctx->flc_posix and check for it done in close().
2175 * rcu_read_lock() wouldn't do. 2216 * rcu_read_lock() wouldn't do.
2176 */ 2217 */
2177 spin_lock(&current->files->file_lock); 2218 spin_lock(&current->files->file_lock);
@@ -2331,13 +2372,14 @@ out:
2331void locks_remove_posix(struct file *filp, fl_owner_t owner) 2372void locks_remove_posix(struct file *filp, fl_owner_t owner)
2332{ 2373{
2333 struct file_lock lock; 2374 struct file_lock lock;
2375 struct file_lock_context *ctx = file_inode(filp)->i_flctx;
2334 2376
2335 /* 2377 /*
2336 * If there are no locks held on this file, we don't need to call 2378 * If there are no locks held on this file, we don't need to call
2337 * posix_lock_file(). Another process could be setting a lock on this 2379 * posix_lock_file(). Another process could be setting a lock on this
2338 * file at the same time, but we wouldn't remove that lock anyway. 2380 * file at the same time, but we wouldn't remove that lock anyway.
2339 */ 2381 */
2340 if (!file_inode(filp)->i_flock) 2382 if (!ctx || list_empty(&ctx->flc_posix))
2341 return; 2383 return;
2342 2384
2343 lock.fl_type = F_UNLCK; 2385 lock.fl_type = F_UNLCK;
@@ -2358,67 +2400,67 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
2358 2400
2359EXPORT_SYMBOL(locks_remove_posix); 2401EXPORT_SYMBOL(locks_remove_posix);
2360 2402
2403/* The i_flctx must be valid when calling into here */
2404static void
2405locks_remove_flock(struct file *filp)
2406{
2407 struct file_lock fl = {
2408 .fl_owner = filp,
2409 .fl_pid = current->tgid,
2410 .fl_file = filp,
2411 .fl_flags = FL_FLOCK,
2412 .fl_type = F_UNLCK,
2413 .fl_end = OFFSET_MAX,
2414 };
2415 struct file_lock_context *flctx = file_inode(filp)->i_flctx;
2416
2417 if (list_empty(&flctx->flc_flock))
2418 return;
2419
2420 if (filp->f_op->flock)
2421 filp->f_op->flock(filp, F_SETLKW, &fl);
2422 else
2423 flock_lock_file(filp, &fl);
2424
2425 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2426 fl.fl_ops->fl_release_private(&fl);
2427}
2428
2429/* The i_flctx must be valid when calling into here */
2430static void
2431locks_remove_lease(struct file *filp)
2432{
2433 struct inode *inode = file_inode(filp);
2434 struct file_lock_context *ctx = inode->i_flctx;
2435 struct file_lock *fl, *tmp;
2436 LIST_HEAD(dispose);
2437
2438 if (list_empty(&ctx->flc_lease))
2439 return;
2440
2441 spin_lock(&ctx->flc_lock);
2442 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2443 lease_modify(fl, F_UNLCK, &dispose);
2444 spin_unlock(&ctx->flc_lock);
2445 locks_dispose_list(&dispose);
2446}
2447
2361/* 2448/*
2362 * This function is called on the last close of an open file. 2449 * This function is called on the last close of an open file.
2363 */ 2450 */
2364void locks_remove_file(struct file *filp) 2451void locks_remove_file(struct file *filp)
2365{ 2452{
2366 struct inode * inode = file_inode(filp); 2453 if (!file_inode(filp)->i_flctx)
2367 struct file_lock *fl;
2368 struct file_lock **before;
2369 LIST_HEAD(dispose);
2370
2371 if (!inode->i_flock)
2372 return; 2454 return;
2373 2455
2456 /* remove any OFD locks */
2374 locks_remove_posix(filp, filp); 2457 locks_remove_posix(filp, filp);
2375 2458
2376 if (filp->f_op->flock) { 2459 /* remove flock locks */
2377 struct file_lock fl = { 2460 locks_remove_flock(filp);
2378 .fl_owner = filp,
2379 .fl_pid = current->tgid,
2380 .fl_file = filp,
2381 .fl_flags = FL_FLOCK,
2382 .fl_type = F_UNLCK,
2383 .fl_end = OFFSET_MAX,
2384 };
2385 filp->f_op->flock(filp, F_SETLKW, &fl);
2386 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2387 fl.fl_ops->fl_release_private(&fl);
2388 }
2389
2390 spin_lock(&inode->i_lock);
2391 before = &inode->i_flock;
2392
2393 while ((fl = *before) != NULL) {
2394 if (fl->fl_file == filp) {
2395 if (IS_LEASE(fl)) {
2396 lease_modify(before, F_UNLCK, &dispose);
2397 continue;
2398 }
2399
2400 /*
2401 * There's a leftover lock on the list of a type that
2402 * we didn't expect to see. Most likely a classic
2403 * POSIX lock that ended up not getting released
2404 * properly, or that raced onto the list somehow. Log
2405 * some info about it and then just remove it from
2406 * the list.
2407 */
2408 WARN(!IS_FLOCK(fl),
2409 "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n",
2410 MAJOR(inode->i_sb->s_dev),
2411 MINOR(inode->i_sb->s_dev), inode->i_ino,
2412 fl->fl_type, fl->fl_flags,
2413 fl->fl_start, fl->fl_end);
2414 2461
2415 locks_delete_lock(before, &dispose); 2462 /* remove any leases */
2416 continue; 2463 locks_remove_lease(filp);
2417 }
2418 before = &fl->fl_next;
2419 }
2420 spin_unlock(&inode->i_lock);
2421 locks_dispose_list(&dispose);
2422} 2464}
2423 2465
2424/** 2466/**
@@ -2621,6 +2663,9 @@ static int __init filelock_init(void)
2621{ 2663{
2622 int i; 2664 int i;
2623 2665
2666 flctx_cache = kmem_cache_create("file_lock_ctx",
2667 sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2668
2624 filelock_cache = kmem_cache_create("file_lock_cache", 2669 filelock_cache = kmem_cache_create("file_lock_cache",
2625 sizeof(struct file_lock), 0, SLAB_PANIC, NULL); 2670 sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2626 2671
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f3f60641344..8cdb2b28a104 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
85{ 85{
86 struct inode *inode = state->inode; 86 struct inode *inode = state->inode;
87 struct file_lock *fl; 87 struct file_lock *fl;
88 struct file_lock_context *flctx = inode->i_flctx;
89 struct list_head *list;
88 int status = 0; 90 int status = 0;
89 91
90 if (inode->i_flock == NULL) 92 if (flctx == NULL)
91 goto out; 93 goto out;
92 94
93 /* Protect inode->i_flock using the i_lock */ 95 list = &flctx->flc_posix;
94 spin_lock(&inode->i_lock); 96 spin_lock(&flctx->flc_lock);
95 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 97restart:
96 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 98 list_for_each_entry(fl, list, fl_list) {
97 continue;
98 if (nfs_file_open_context(fl->fl_file) != ctx) 99 if (nfs_file_open_context(fl->fl_file) != ctx)
99 continue; 100 continue;
100 spin_unlock(&inode->i_lock); 101 spin_unlock(&flctx->flc_lock);
101 status = nfs4_lock_delegation_recall(fl, state, stateid); 102 status = nfs4_lock_delegation_recall(fl, state, stateid);
102 if (status < 0) 103 if (status < 0)
103 goto out; 104 goto out;
104 spin_lock(&inode->i_lock); 105 spin_lock(&flctx->flc_lock);
105 } 106 }
106 spin_unlock(&inode->i_lock); 107 if (list == &flctx->flc_posix) {
108 list = &flctx->flc_flock;
109 goto restart;
110 }
111 spin_unlock(&flctx->flc_lock);
107out: 112out:
108 return status; 113 return status;
109} 114}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5194933ed419..a3bb22ab68c5 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1366,49 +1366,55 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
1366 struct nfs_inode *nfsi = NFS_I(inode); 1366 struct nfs_inode *nfsi = NFS_I(inode);
1367 struct file_lock *fl; 1367 struct file_lock *fl;
1368 int status = 0; 1368 int status = 0;
1369 struct file_lock_context *flctx = inode->i_flctx;
1370 struct list_head *list;
1369 1371
1370 if (inode->i_flock == NULL) 1372 if (flctx == NULL)
1371 return 0; 1373 return 0;
1372 1374
1375 list = &flctx->flc_posix;
1376
1373 /* Guard against delegation returns and new lock/unlock calls */ 1377 /* Guard against delegation returns and new lock/unlock calls */
1374 down_write(&nfsi->rwsem); 1378 down_write(&nfsi->rwsem);
1375 /* Protect inode->i_flock using the BKL */ 1379 spin_lock(&flctx->flc_lock);
1376 spin_lock(&inode->i_lock); 1380restart:
1377 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1381 list_for_each_entry(fl, list, fl_list) {
1378 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
1379 continue;
1380 if (nfs_file_open_context(fl->fl_file)->state != state) 1382 if (nfs_file_open_context(fl->fl_file)->state != state)
1381 continue; 1383 continue;
1382 spin_unlock(&inode->i_lock); 1384 spin_unlock(&flctx->flc_lock);
1383 status = ops->recover_lock(state, fl); 1385 status = ops->recover_lock(state, fl);
1384 switch (status) { 1386 switch (status) {
1385 case 0: 1387 case 0:
1386 break; 1388 break;
1387 case -ESTALE: 1389 case -ESTALE:
1388 case -NFS4ERR_ADMIN_REVOKED: 1390 case -NFS4ERR_ADMIN_REVOKED:
1389 case -NFS4ERR_STALE_STATEID: 1391 case -NFS4ERR_STALE_STATEID:
1390 case -NFS4ERR_BAD_STATEID: 1392 case -NFS4ERR_BAD_STATEID:
1391 case -NFS4ERR_EXPIRED: 1393 case -NFS4ERR_EXPIRED:
1392 case -NFS4ERR_NO_GRACE: 1394 case -NFS4ERR_NO_GRACE:
1393 case -NFS4ERR_STALE_CLIENTID: 1395 case -NFS4ERR_STALE_CLIENTID:
1394 case -NFS4ERR_BADSESSION: 1396 case -NFS4ERR_BADSESSION:
1395 case -NFS4ERR_BADSLOT: 1397 case -NFS4ERR_BADSLOT:
1396 case -NFS4ERR_BAD_HIGH_SLOT: 1398 case -NFS4ERR_BAD_HIGH_SLOT:
1397 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1399 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1398 goto out; 1400 goto out;
1399 default: 1401 default:
1400 printk(KERN_ERR "NFS: %s: unhandled error %d\n", 1402 pr_err("NFS: %s: unhandled error %d\n",
1401 __func__, status); 1403 __func__, status);
1402 case -ENOMEM: 1404 case -ENOMEM:
1403 case -NFS4ERR_DENIED: 1405 case -NFS4ERR_DENIED:
1404 case -NFS4ERR_RECLAIM_BAD: 1406 case -NFS4ERR_RECLAIM_BAD:
1405 case -NFS4ERR_RECLAIM_CONFLICT: 1407 case -NFS4ERR_RECLAIM_CONFLICT:
1406 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 1408 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1407 status = 0; 1409 status = 0;
1408 } 1410 }
1409 spin_lock(&inode->i_lock); 1411 spin_lock(&flctx->flc_lock);
1410 } 1412 }
1411 spin_unlock(&inode->i_lock); 1413 if (list == &flctx->flc_posix) {
1414 list = &flctx->flc_flock;
1415 goto restart;
1416 }
1417 spin_unlock(&flctx->flc_lock);
1412out: 1418out:
1413 up_write(&nfsi->rwsem); 1419 up_write(&nfsi->rwsem);
1414 return status; 1420 return status;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2b5e769beb16..29c7f33c9cf1 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -826,11 +826,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
826 struct nfs_pageio_descriptor *pgio) 826 struct nfs_pageio_descriptor *pgio)
827{ 827{
828 size_t size; 828 size_t size;
829 struct file_lock_context *flctx;
829 830
830 if (prev) { 831 if (prev) {
831 if (!nfs_match_open_context(req->wb_context, prev->wb_context)) 832 if (!nfs_match_open_context(req->wb_context, prev->wb_context))
832 return false; 833 return false;
833 if (req->wb_context->dentry->d_inode->i_flock != NULL && 834 flctx = req->wb_context->dentry->d_inode->i_flctx;
835 if (flctx != NULL &&
836 !(list_empty_careful(&flctx->flc_posix) &&
837 list_empty_careful(&flctx->flc_flock)) &&
834 !nfs_match_lock_context(req->wb_lock_context, 838 !nfs_match_lock_context(req->wb_lock_context,
835 prev->wb_lock_context)) 839 prev->wb_lock_context))
836 return false; 840 return false;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af3af685a9e3..4ae66f416eb9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1091{ 1091{
1092 struct nfs_open_context *ctx = nfs_file_open_context(file); 1092 struct nfs_open_context *ctx = nfs_file_open_context(file);
1093 struct nfs_lock_context *l_ctx; 1093 struct nfs_lock_context *l_ctx;
1094 struct file_lock_context *flctx = file_inode(file)->i_flctx;
1094 struct nfs_page *req; 1095 struct nfs_page *req;
1095 int do_flush, status; 1096 int do_flush, status;
1096 /* 1097 /*
@@ -1109,7 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1109 do_flush = req->wb_page != page || req->wb_context != ctx; 1110 do_flush = req->wb_page != page || req->wb_context != ctx;
1110 /* for now, flush if more than 1 request in page_group */ 1111 /* for now, flush if more than 1 request in page_group */
1111 do_flush |= req->wb_this_page != req; 1112 do_flush |= req->wb_this_page != req;
1112 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { 1113 if (l_ctx && flctx &&
1114 !(list_empty_careful(&flctx->flc_posix) &&
1115 list_empty_careful(&flctx->flc_flock))) {
1113 do_flush |= l_ctx->lockowner.l_owner != current->files 1116 do_flush |= l_ctx->lockowner.l_owner != current->files
1114 || l_ctx->lockowner.l_pid != current->tgid; 1117 || l_ctx->lockowner.l_pid != current->tgid;
1115 } 1118 }
@@ -1170,6 +1173,13 @@ out:
1170 return PageUptodate(page) != 0; 1173 return PageUptodate(page) != 0;
1171} 1174}
1172 1175
1176static bool
1177is_whole_file_wrlock(struct file_lock *fl)
1178{
1179 return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
1180 fl->fl_type == F_WRLCK;
1181}
1182
1173/* If we know the page is up to date, and we're not using byte range locks (or 1183/* If we know the page is up to date, and we're not using byte range locks (or
1174 * if we have the whole file locked for writing), it may be more efficient to 1184 * if we have the whole file locked for writing), it may be more efficient to
1175 * extend the write to cover the entire page in order to avoid fragmentation 1185 * extend the write to cover the entire page in order to avoid fragmentation
@@ -1180,17 +1190,36 @@ out:
1180 */ 1190 */
1181static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) 1191static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
1182{ 1192{
1193 int ret;
1194 struct file_lock_context *flctx = inode->i_flctx;
1195 struct file_lock *fl;
1196
1183 if (file->f_flags & O_DSYNC) 1197 if (file->f_flags & O_DSYNC)
1184 return 0; 1198 return 0;
1185 if (!nfs_write_pageuptodate(page, inode)) 1199 if (!nfs_write_pageuptodate(page, inode))
1186 return 0; 1200 return 0;
1187 if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) 1201 if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
1188 return 1; 1202 return 1;
1189 if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && 1203 if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
1190 inode->i_flock->fl_end == OFFSET_MAX && 1204 list_empty_careful(&flctx->flc_posix)))
1191 inode->i_flock->fl_type != F_RDLCK)) 1205 return 0;
1192 return 1; 1206
1193 return 0; 1207 /* Check to see if there are whole file write locks */
1208 ret = 0;
1209 spin_lock(&flctx->flc_lock);
1210 if (!list_empty(&flctx->flc_posix)) {
1211 fl = list_first_entry(&flctx->flc_posix, struct file_lock,
1212 fl_list);
1213 if (is_whole_file_wrlock(fl))
1214 ret = 1;
1215 } else if (!list_empty(&flctx->flc_flock)) {
1216 fl = list_first_entry(&flctx->flc_flock, struct file_lock,
1217 fl_list);
1218 if (fl->fl_type == F_WRLCK)
1219 ret = 1;
1220 }
1221 spin_unlock(&flctx->flc_lock);
1222 return ret;
1194} 1223}
1195 1224
1196/* 1225/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c06a1ba80d73..532a60cca2fb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3477,7 +3477,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
3477} 3477}
3478 3478
3479static int 3479static int
3480nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) 3480nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
3481 struct list_head *dispose)
3481{ 3482{
3482 if (arg & F_UNLCK) 3483 if (arg & F_UNLCK)
3483 return lease_modify(onlist, arg, dispose); 3484 return lease_modify(onlist, arg, dispose);
@@ -5556,10 +5557,11 @@ out_nfserr:
5556static bool 5557static bool
5557check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) 5558check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
5558{ 5559{
5559 struct file_lock **flpp; 5560 struct file_lock *fl;
5560 int status = false; 5561 int status = false;
5561 struct file *filp = find_any_file(fp); 5562 struct file *filp = find_any_file(fp);
5562 struct inode *inode; 5563 struct inode *inode;
5564 struct file_lock_context *flctx;
5563 5565
5564 if (!filp) { 5566 if (!filp) {
5565 /* Any valid lock stateid should have some sort of access */ 5567 /* Any valid lock stateid should have some sort of access */
@@ -5568,15 +5570,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
5568 } 5570 }
5569 5571
5570 inode = file_inode(filp); 5572 inode = file_inode(filp);
5573 flctx = inode->i_flctx;
5571 5574
5572 spin_lock(&inode->i_lock); 5575 if (flctx && !list_empty_careful(&flctx->flc_posix)) {
5573 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { 5576 spin_lock(&flctx->flc_lock);
5574 if ((*flpp)->fl_owner == (fl_owner_t)lowner) { 5577 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
5575 status = true; 5578 if (fl->fl_owner == (fl_owner_t)lowner) {
5576 break; 5579 status = true;
5580 break;
5581 }
5577 } 5582 }
5583 spin_unlock(&flctx->flc_lock);
5578 } 5584 }
5579 spin_unlock(&inode->i_lock);
5580 fput(filp); 5585 fput(filp);
5581 return status; 5586 return status;
5582} 5587}
diff --git a/fs/read_write.c b/fs/read_write.c
index c0805c93b6fa..4060691e78f7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
358 return retval; 358 return retval;
359 } 359 }
360 360
361 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 361 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
362 retval = locks_mandatory_area( 362 retval = locks_mandatory_area(
363 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 363 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
364 inode, file, pos, count); 364 inode, file, pos, count);