aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Coddington <bcodding@redhat.com>2017-07-16 10:28:22 -0400
committerJeff Layton <jlayton@redhat.com>2017-07-16 10:28:22 -0400
commit9d5b86ac13c573795525ecac6ed2db39ab23e2a8 (patch)
tree927163715d1ec120350ba403a348994f3fe2e204
parent52306e882f77d3fd73f91435c41373d634acc5d2 (diff)
fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks
Since commit c69899a17ca4 "NFSv4: Update of VFS byte range lock must be atomic with the stateid update", NFSv4 has been inserting locks in rpciod worker context. The result is that the file_lock's fl_nspid is the kworker's pid instead of the original userspace pid. The fl_nspid is only used to represent the namespaced virtual pid number when displaying locks or returning from F_GETLK. There's no reason to set it for every inserted lock, since we can usually just look it up from fl_pid. So, instead of looking up and holding struct pid for every lock, let's just look up the virtual pid number from fl_pid when it is needed. That means we can remove fl_nspid entirely. The translaton and presentation of fl_pid should handle the following four cases: 1 - F_GETLK on a remote file with a remote lock: In this case, the filesystem should determine the l_pid to return here. Filesystems should indicate that the fl_pid represents a non-local pid value that should not be translated by returning an fl_pid <= 0. 2 - F_GETLK on a local file with a remote lock: This should be the l_pid of the lock manager process, and translated. 3 - F_GETLK on a remote file with a local lock, and 4 - F_GETLK on a local file with a local lock: These should be the translated l_pid of the local locking process. Fuse was already doing the correct thing by translating the pid into the caller's namespace. With this change we must update fuse to translate to init's pid namespace, so that the locks API can then translate from init's pid namespace into the pid namespace of the caller. With this change, the locks API will expect that if a filesystem returns a remote pid as opposed to a local pid for F_GETLK, that remote pid will be <= 0. This signifies that the pid is remote, and the locks API will forego translating that pid into the pid namespace of the local calling process. Finally, we convert remote filesystems to present remote pids using negative numbers. Have lustre, 9p, ceph, cifs, and dlm negate the remote pid returned for F_GETLK lock requests. Since local pids will never be larger than PID_MAX_LIMIT (which is currently defined as <= 4 million), but pid_t is an unsigned int, we should have plenty of room to represent remote pids with negative numbers if we assume that remote pid numbers are similarly limited. If this is not the case, then we run the risk of having a remote pid returned for which there is also a corresponding local pid. This is a problem we have now, but this patch should reduce the chances of that occurring, while also returning those remote pid numbers, for whatever that may be worth. Signed-off-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Jeff Layton <jlayton@redhat.com>
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c2
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/ceph/locks.c2
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/locks.c62
-rw-r--r--include/linux/fs.h1
8 files changed, 45 insertions, 34 deletions
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index b7f28b39c7b3..abcbf075acc0 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -596,7 +596,7 @@ granted:
596 default: 596 default:
597 getlk->fl_type = F_UNLCK; 597 getlk->fl_type = F_UNLCK;
598 } 598 }
599 getlk->fl_pid = (pid_t)lock->l_policy_data.l_flock.pid; 599 getlk->fl_pid = -(pid_t)lock->l_policy_data.l_flock.pid;
600 getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start; 600 getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start;
601 getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end; 601 getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end;
602 } else { 602 } else {
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3de3b4a89d89..43c242e17132 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -288,7 +288,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
288 fl->fl_end = OFFSET_MAX; 288 fl->fl_end = OFFSET_MAX;
289 else 289 else
290 fl->fl_end = glock.start + glock.length - 1; 290 fl->fl_end = glock.start + glock.length - 1;
291 fl->fl_pid = glock.proc_id; 291 fl->fl_pid = -glock.proc_id;
292 } 292 }
293 kfree(glock.client_id); 293 kfree(glock.client_id);
294 return res; 294 return res;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 64ae74472046..8cd63e8123d8 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -79,7 +79,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
79 err = ceph_mdsc_do_request(mdsc, inode, req); 79 err = ceph_mdsc_do_request(mdsc, inode, req);
80 80
81 if (operation == CEPH_MDS_OP_GETFILELOCK) { 81 if (operation == CEPH_MDS_OP_GETFILELOCK) {
82 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 82 fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
83 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 83 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
84 fl->fl_type = F_RDLCK; 84 fl->fl_type = F_RDLCK;
85 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) 85 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 72a53bd19865..118a63e7e221 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2522,7 +2522,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
2522 pLockData->fl_start = le64_to_cpu(parm_data->start); 2522 pLockData->fl_start = le64_to_cpu(parm_data->start);
2523 pLockData->fl_end = pLockData->fl_start + 2523 pLockData->fl_end = pLockData->fl_start +
2524 le64_to_cpu(parm_data->length) - 1; 2524 le64_to_cpu(parm_data->length) - 1;
2525 pLockData->fl_pid = le32_to_cpu(parm_data->pid); 2525 pLockData->fl_pid = -le32_to_cpu(parm_data->pid);
2526 } 2526 }
2527 } 2527 }
2528 2528
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index d401425f602a..e631b1689228 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -367,7 +367,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
367 locks_init_lock(fl); 367 locks_init_lock(fl);
368 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 368 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
369 fl->fl_flags = FL_POSIX; 369 fl->fl_flags = FL_POSIX;
370 fl->fl_pid = op->info.pid; 370 fl->fl_pid = -op->info.pid;
371 fl->fl_start = op->info.start; 371 fl->fl_start = op->info.start;
372 fl->fl_end = op->info.end; 372 fl->fl_end = op->info.end;
373 rv = 0; 373 rv = 0;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3ee4fdc3da9e..7cd692f51d1d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2101,11 +2101,11 @@ static int convert_fuse_file_lock(struct fuse_conn *fc,
2101 fl->fl_end = ffl->end; 2101 fl->fl_end = ffl->end;
2102 2102
2103 /* 2103 /*
2104 * Convert pid into the caller's pid namespace. If the pid 2104 * Convert pid into init's pid namespace. The locks API will
2105 * does not map into the namespace fl_pid will get set to 0. 2105 * translate it into the caller's pid namespace.
2106 */ 2106 */
2107 rcu_read_lock(); 2107 rcu_read_lock();
2108 fl->fl_pid = pid_vnr(find_pid_ns(ffl->pid, fc->pid_ns)); 2108 fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
2109 rcu_read_unlock(); 2109 rcu_read_unlock();
2110 break; 2110 break;
2111 2111
diff --git a/fs/locks.c b/fs/locks.c
index d7daa6c8932f..6d0949880ebd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -137,6 +137,7 @@
137#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 137#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
138#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) 138#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
139#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) 139#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
140#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
140 141
141static inline bool is_remote_lock(struct file *filp) 142static inline bool is_remote_lock(struct file *filp)
142{ 143{
@@ -733,7 +734,6 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
733static void 734static void
734locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before) 735locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
735{ 736{
736 fl->fl_nspid = get_pid(task_tgid(current));
737 list_add_tail(&fl->fl_list, before); 737 list_add_tail(&fl->fl_list, before);
738 locks_insert_global_locks(fl); 738 locks_insert_global_locks(fl);
739} 739}
@@ -743,10 +743,6 @@ locks_unlink_lock_ctx(struct file_lock *fl)
743{ 743{
744 locks_delete_global_locks(fl); 744 locks_delete_global_locks(fl);
745 list_del_init(&fl->fl_list); 745 list_del_init(&fl->fl_list);
746 if (fl->fl_nspid) {
747 put_pid(fl->fl_nspid);
748 fl->fl_nspid = NULL;
749 }
750 locks_wake_up_blocks(fl); 746 locks_wake_up_blocks(fl);
751} 747}
752 748
@@ -823,8 +819,6 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
823 list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { 819 list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
824 if (posix_locks_conflict(fl, cfl)) { 820 if (posix_locks_conflict(fl, cfl)) {
825 locks_copy_conflock(fl, cfl); 821 locks_copy_conflock(fl, cfl);
826 if (cfl->fl_nspid)
827 fl->fl_pid = pid_vnr(cfl->fl_nspid);
828 goto out; 822 goto out;
829 } 823 }
830 } 824 }
@@ -2048,9 +2042,33 @@ int vfs_test_lock(struct file *filp, struct file_lock *fl)
2048} 2042}
2049EXPORT_SYMBOL_GPL(vfs_test_lock); 2043EXPORT_SYMBOL_GPL(vfs_test_lock);
2050 2044
2045/**
2046 * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2047 * @fl: The file_lock who's fl_pid should be translated
2048 * @ns: The namespace into which the pid should be translated
2049 *
2050 * Used to tranlate a fl_pid into a namespace virtual pid number
2051 */
2052static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
2053{
2054 pid_t vnr;
2055 struct pid *pid;
2056
2057 if (IS_OFDLCK(fl))
2058 return -1;
2059 if (IS_REMOTELCK(fl))
2060 return fl->fl_pid;
2061
2062 rcu_read_lock();
2063 pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
2064 vnr = pid_nr_ns(pid, ns);
2065 rcu_read_unlock();
2066 return vnr;
2067}
2068
2051static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) 2069static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2052{ 2070{
2053 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; 2071 flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2054#if BITS_PER_LONG == 32 2072#if BITS_PER_LONG == 32
2055 /* 2073 /*
2056 * Make sure we can represent the posix lock via 2074 * Make sure we can represent the posix lock via
@@ -2072,7 +2090,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2072#if BITS_PER_LONG == 32 2090#if BITS_PER_LONG == 32
2073static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) 2091static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
2074{ 2092{
2075 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; 2093 flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2076 flock->l_start = fl->fl_start; 2094 flock->l_start = fl->fl_start;
2077 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 2095 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2078 fl->fl_end - fl->fl_start + 1; 2096 fl->fl_end - fl->fl_start + 1;
@@ -2584,22 +2602,16 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2584{ 2602{
2585 struct inode *inode = NULL; 2603 struct inode *inode = NULL;
2586 unsigned int fl_pid; 2604 unsigned int fl_pid;
2605 struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
2587 2606
2588 if (fl->fl_nspid) { 2607 fl_pid = locks_translate_pid(fl, proc_pidns);
2589 struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info; 2608 /*
2590 2609 * If there isn't a fl_pid don't display who is waiting on
2591 /* Don't let fl_pid change based on who is reading the file */ 2610 * the lock if we are called from locks_show, or if we are
2592 fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns); 2611 * called from __show_fd_info - skip lock entirely
2593 2612 */
2594 /* 2613 if (fl_pid == 0)
2595 * If there isn't a fl_pid don't display who is waiting on 2614 return;
2596 * the lock if we are called from locks_show, or if we are
2597 * called from __show_fd_info - skip lock entirely
2598 */
2599 if (fl_pid == 0)
2600 return;
2601 } else
2602 fl_pid = fl->fl_pid;
2603 2615
2604 if (fl->fl_file != NULL) 2616 if (fl->fl_file != NULL)
2605 inode = locks_inode(fl->fl_file); 2617 inode = locks_inode(fl->fl_file);
@@ -2674,7 +2686,7 @@ static int locks_show(struct seq_file *f, void *v)
2674 2686
2675 fl = hlist_entry(v, struct file_lock, fl_link); 2687 fl = hlist_entry(v, struct file_lock, fl_link);
2676 2688
2677 if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns)) 2689 if (locks_translate_pid(fl, proc_pidns) == 0)
2678 return 0; 2690 return 0;
2679 2691
2680 lock_get_status(f, fl, iter->li_pos, ""); 2692 lock_get_status(f, fl, iter->li_pos, "");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b5d6816542b..f0b108af9b02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -999,7 +999,6 @@ struct file_lock {
999 unsigned char fl_type; 999 unsigned char fl_type;
1000 unsigned int fl_pid; 1000 unsigned int fl_pid;
1001 int fl_link_cpu; /* what cpu's list is this on? */ 1001 int fl_link_cpu; /* what cpu's list is this on? */
1002 struct pid *fl_nspid;
1003 wait_queue_head_t fl_wait; 1002 wait_queue_head_t fl_wait;
1004 struct file *fl_file; 1003 struct file *fl_file;
1005 loff_t fl_start; 1004 loff_t fl_start;