aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig12
-rw-r--r--fs/afs/cell.c3
-rw-r--r--fs/afs/kafsasyncd.c9
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/afs/vlocation.c6
-rw-r--r--fs/afs/vnode.c3
-rw-r--r--fs/autofs4/expire.c3
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c16
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/dquot.c4
-rw-r--r--fs/exec.c147
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/jffs2/erase.c15
-rw-r--r--fs/jffs2/nodemgmt.c3
-rw-r--r--fs/jffs2/wbuf.c3
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/ocfs2/dlm/dlmast.c3
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c9
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c9
-rw-r--r--fs/ocfs2/dlm/dlmthread.c6
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c3
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/openpromfs/inode.c1158
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/base.c1086
-rw-r--r--fs/proc/inode.c11
-rw-r--r--fs/proc/internal.h22
-rw-r--r--fs/proc/task_mmu.c110
-rw-r--r--fs/proc/task_nommu.c21
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/smbfs/request.c6
-rw-r--r--fs/smbfs/smbiod.c3
-rw-r--r--fs/sysfs/dir.c10
42 files changed, 1122 insertions, 1638 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 1cdc043922d5..6c5051802bd2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1490,7 +1490,12 @@ config NFSD
1490 select LOCKD 1490 select LOCKD
1491 select SUNRPC 1491 select SUNRPC
1492 select EXPORTFS 1492 select EXPORTFS
1493 select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL 1493 select NFSD_V2_ACL if NFSD_V3_ACL
1494 select NFS_ACL_SUPPORT if NFSD_V2_ACL
1495 select NFSD_TCP if NFSD_V4
1496 select CRYPTO_MD5 if NFSD_V4
1497 select CRYPTO if NFSD_V4
1498 select FS_POSIX_ACL if NFSD_V4
1494 help 1499 help
1495 If you want your Linux box to act as an NFS *server*, so that other 1500 If you want your Linux box to act as an NFS *server*, so that other
1496 computers on your local network which support NFS can access certain 1501 computers on your local network which support NFS can access certain
@@ -1528,7 +1533,6 @@ config NFSD_V3
1528config NFSD_V3_ACL 1533config NFSD_V3_ACL
1529 bool "Provide server support for the NFSv3 ACL protocol extension" 1534 bool "Provide server support for the NFSv3 ACL protocol extension"
1530 depends on NFSD_V3 1535 depends on NFSD_V3
1531 select NFSD_V2_ACL
1532 help 1536 help
1533 Implement the NFSv3 ACL protocol extension for manipulating POSIX 1537 Implement the NFSv3 ACL protocol extension for manipulating POSIX
1534 Access Control Lists on exported file systems. NFS clients should 1538 Access Control Lists on exported file systems. NFS clients should
@@ -1538,10 +1542,6 @@ config NFSD_V3_ACL
1538config NFSD_V4 1542config NFSD_V4
1539 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1543 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1540 depends on NFSD_V3 && EXPERIMENTAL 1544 depends on NFSD_V3 && EXPERIMENTAL
1541 select NFSD_TCP
1542 select CRYPTO_MD5
1543 select CRYPTO
1544 select FS_POSIX_ACL
1545 help 1545 help
1546 If you would like to include the NFSv4 server as well as the NFSv2 1546 If you would like to include the NFSv4 server as well as the NFSv2
1547 and NFSv3 servers, say Y here. This feature is experimental, and 1547 and NFSv3 servers, say Y here. This feature is experimental, and
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 009a9ae88d61..bfc1fd22d5b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -413,8 +413,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
413 413
414 /* we found it in the graveyard - resurrect it */ 414 /* we found it in the graveyard - resurrect it */
415 found_dead_server: 415 found_dead_server:
416 list_del(&server->link); 416 list_move_tail(&server->link, &cell->sv_list);
417 list_add_tail(&server->link, &cell->sv_list);
418 afs_get_server(server); 417 afs_get_server(server);
419 afs_kafstimod_del_timer(&server->timeout); 418 afs_kafstimod_del_timer(&server->timeout);
420 spin_unlock(&cell->sv_gylock); 419 spin_unlock(&cell->sv_gylock);
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 7ac07d0d47b9..f09a794f248e 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -136,8 +136,7 @@ static int kafsasyncd(void *arg)
136 if (!list_empty(&kafsasyncd_async_attnq)) { 136 if (!list_empty(&kafsasyncd_async_attnq)) {
137 op = list_entry(kafsasyncd_async_attnq.next, 137 op = list_entry(kafsasyncd_async_attnq.next,
138 struct afs_async_op, link); 138 struct afs_async_op, link);
139 list_del(&op->link); 139 list_move_tail(&op->link,
140 list_add_tail(&op->link,
141 &kafsasyncd_async_busyq); 140 &kafsasyncd_async_busyq);
142 } 141 }
143 142
@@ -204,8 +203,7 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op)
204 init_waitqueue_entry(&op->waiter, kafsasyncd_task); 203 init_waitqueue_entry(&op->waiter, kafsasyncd_task);
205 add_wait_queue(&op->call->waitq, &op->waiter); 204 add_wait_queue(&op->call->waitq, &op->waiter);
206 205
207 list_del(&op->link); 206 list_move_tail(&op->link, &kafsasyncd_async_busyq);
208 list_add_tail(&op->link, &kafsasyncd_async_busyq);
209 207
210 spin_unlock(&kafsasyncd_async_lock); 208 spin_unlock(&kafsasyncd_async_lock);
211 209
@@ -223,8 +221,7 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op)
223 221
224 spin_lock(&kafsasyncd_async_lock); 222 spin_lock(&kafsasyncd_async_lock);
225 223
226 list_del(&op->link); 224 list_move_tail(&op->link, &kafsasyncd_async_attnq);
227 list_add_tail(&op->link, &kafsasyncd_async_attnq);
228 225
229 spin_unlock(&kafsasyncd_async_lock); 226 spin_unlock(&kafsasyncd_async_lock);
230 227
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 62b093aa41c6..22afaae1a4ce 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -123,8 +123,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
123 resurrect_server: 123 resurrect_server:
124 _debug("resurrecting server"); 124 _debug("resurrecting server");
125 125
126 list_del(&zombie->link); 126 list_move_tail(&zombie->link, &cell->sv_list);
127 list_add_tail(&zombie->link, &cell->sv_list);
128 afs_get_server(zombie); 127 afs_get_server(zombie);
129 afs_kafstimod_del_timer(&zombie->timeout); 128 afs_kafstimod_del_timer(&zombie->timeout);
130 spin_unlock(&cell->sv_gylock); 129 spin_unlock(&cell->sv_gylock);
@@ -168,8 +167,7 @@ void afs_put_server(struct afs_server *server)
168 } 167 }
169 168
170 spin_lock(&cell->sv_gylock); 169 spin_lock(&cell->sv_gylock);
171 list_del(&server->link); 170 list_move_tail(&server->link, &cell->sv_graveyard);
172 list_add_tail(&server->link, &cell->sv_graveyard);
173 171
174 /* time out in 10 secs */ 172 /* time out in 10 secs */
175 afs_kafstimod_add_timer(&server->timeout, 10 * HZ); 173 afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index eced20618ecc..331f730a1fb3 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -326,8 +326,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
326 /* found in the graveyard - resurrect */ 326 /* found in the graveyard - resurrect */
327 _debug("found in graveyard"); 327 _debug("found in graveyard");
328 atomic_inc(&vlocation->usage); 328 atomic_inc(&vlocation->usage);
329 list_del(&vlocation->link); 329 list_move_tail(&vlocation->link, &cell->vl_list);
330 list_add_tail(&vlocation->link, &cell->vl_list);
331 spin_unlock(&cell->vl_gylock); 330 spin_unlock(&cell->vl_gylock);
332 331
333 afs_kafstimod_del_timer(&vlocation->timeout); 332 afs_kafstimod_del_timer(&vlocation->timeout);
@@ -478,8 +477,7 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation)
478 } 477 }
479 478
480 /* move to graveyard queue */ 479 /* move to graveyard queue */
481 list_del(&vlocation->link); 480 list_move_tail(&vlocation->link,&cell->vl_graveyard);
482 list_add_tail(&vlocation->link,&cell->vl_graveyard);
483 481
484 /* remove from pending timeout queue (refcounted if actually being 482 /* remove from pending timeout queue (refcounted if actually being
485 * updated) */ 483 * updated) */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 9867fef3261d..cf62da5d7825 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -104,8 +104,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
104 vnode->cb_expiry * HZ); 104 vnode->cb_expiry * HZ);
105 105
106 spin_lock(&afs_cb_hash_lock); 106 spin_lock(&afs_cb_hash_lock);
107 list_del(&vnode->cb_hash_link); 107 list_move_tail(&vnode->cb_hash_link,
108 list_add_tail(&vnode->cb_hash_link,
109 &afs_cb_hash(server, &vnode->fid)); 108 &afs_cb_hash(server, &vnode->fid));
110 spin_unlock(&afs_cb_hash_lock); 109 spin_unlock(&afs_cb_hash_lock);
111 110
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4456d1daa40f..8dbd44f10e9d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -376,8 +376,7 @@ next:
376 DPRINTK("returning %p %.*s", 376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name); 377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock); 378 spin_lock(&dcache_lock);
379 list_del(&expired->d_parent->d_subdirs); 379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
381 spin_unlock(&dcache_lock); 380 spin_unlock(&dcache_lock);
382 return expired; 381 return expired;
383 } 382 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 6c6771db36da..7caee8d8ea3b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -259,7 +259,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
259 /* If request was not a signal, enqueue and don't free */ 259 /* If request was not a signal, enqueue and don't free */
260 if (!(req->uc_flags & REQ_ASYNC)) { 260 if (!(req->uc_flags & REQ_ASYNC)) {
261 req->uc_flags |= REQ_READ; 261 req->uc_flags |= REQ_READ;
262 list_add(&(req->uc_chain), vcp->vc_processing.prev); 262 list_add_tail(&(req->uc_chain), &vcp->vc_processing);
263 goto out; 263 goto out;
264 } 264 }
265 265
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index b040eba13a7d..a5b5e631ba61 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -725,7 +725,7 @@ static int coda_upcall(struct coda_sb_info *sbi,
725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique; 725 ((union inputArgs *)buffer)->ih.unique = req->uc_unique;
726 726
727 /* Append msg to pending queue and poke Venus. */ 727 /* Append msg to pending queue and poke Venus. */
728 list_add(&(req->uc_chain), vcommp->vc_pending.prev); 728 list_add_tail(&(req->uc_chain), &vcommp->vc_pending);
729 729
730 wake_up_interruptible(&vcommp->vc_waitq); 730 wake_up_interruptible(&vcommp->vc_waitq);
731 /* We can be interrupted while we wait for Venus to process 731 /* We can be interrupted while we wait for Venus to process
diff --git a/fs/compat.c b/fs/compat.c
index 7e7e5bc4f3cf..e31e9cf96647 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -55,6 +55,20 @@
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); 56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57 57
58int compat_log = 1;
59
60int compat_printk(const char *fmt, ...)
61{
62 va_list ap;
63 int ret;
64 if (!compat_log)
65 return 0;
66 va_start(ap, fmt);
67 ret = vprintk(fmt, ap);
68 va_end(ap);
69 return ret;
70}
71
58/* 72/*
59 * Not all architectures have sys_utime, so implement this in terms 73 * Not all architectures have sys_utime, so implement this in terms
60 * of sys_utimes. 74 * of sys_utimes.
@@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
359 sprintf(buf,"'%c'", (cmd>>24) & 0x3f); 373 sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
360 if (!isprint(buf[1])) 374 if (!isprint(buf[1]))
361 sprintf(buf, "%02x", buf[1]); 375 sprintf(buf, "%02x", buf[1]);
362 printk("ioctl32(%s:%d): Unknown cmd fd(%d) " 376 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
363 "cmd(%08x){%s} arg(%08x) on %s\n", 377 "cmd(%08x){%s} arg(%08x) on %s\n",
364 current->comm, current->pid, 378 current->comm, current->pid,
365 (int)fd, (unsigned int)cmd, buf, 379 (int)fd, (unsigned int)cmd, buf,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9eb9824dd332..d8ecfedef189 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -80,6 +80,7 @@
80#include <net/bluetooth/rfcomm.h> 80#include <net/bluetooth/rfcomm.h>
81 81
82#include <linux/capi.h> 82#include <linux/capi.h>
83#include <linux/gigaset_dev.h>
83 84
84#include <scsi/scsi.h> 85#include <scsi/scsi.h>
85#include <scsi/scsi_ioctl.h> 86#include <scsi/scsi_ioctl.h>
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5f952187fc53..207f8006fd6c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1009,8 +1009,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1009 /* fallthrough */ 1009 /* fallthrough */
1010 default: 1010 default:
1011 if (filp->f_pos == 2) { 1011 if (filp->f_pos == 2) {
1012 list_del(q); 1012 list_move(q, &parent_sd->s_children);
1013 list_add(q, &parent_sd->s_children);
1014 } 1013 }
1015 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 1014 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
1016 struct configfs_dirent *next; 1015 struct configfs_dirent *next;
@@ -1033,8 +1032,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1033 dt_type(next)) < 0) 1032 dt_type(next)) < 0)
1034 return 0; 1033 return 0;
1035 1034
1036 list_del(q); 1035 list_move(q, p);
1037 list_add(q, p);
1038 p = q; 1036 p = q;
1039 filp->f_pos++; 1037 filp->f_pos++;
1040 } 1038 }
diff --git a/fs/dcache.c b/fs/dcache.c
index b85fda360533..48b44a714b35 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -522,8 +522,7 @@ void shrink_dcache_sb(struct super_block * sb)
522 dentry = list_entry(tmp, struct dentry, d_lru); 522 dentry = list_entry(tmp, struct dentry, d_lru);
523 if (dentry->d_sb != sb) 523 if (dentry->d_sb != sb)
524 continue; 524 continue;
525 list_del(tmp); 525 list_move(tmp, &dentry_unused);
526 list_add(tmp, &dentry_unused);
527 } 526 }
528 527
529 /* 528 /*
@@ -638,7 +637,7 @@ resume:
638 * of the unused list for prune_dcache 637 * of the unused list for prune_dcache
639 */ 638 */
640 if (!atomic_read(&dentry->d_count)) { 639 if (!atomic_read(&dentry->d_count)) {
641 list_add(&dentry->d_lru, dentry_unused.prev); 640 list_add_tail(&dentry->d_lru, &dentry_unused);
642 dentry_stat.nr_unused++; 641 dentry_stat.nr_unused++;
643 found++; 642 found++;
644 } 643 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 81d87a413c68..0122a279106a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -250,7 +250,7 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block
250/* Add a dquot to the tail of the free list */ 250/* Add a dquot to the tail of the free list */
251static inline void put_dquot_last(struct dquot *dquot) 251static inline void put_dquot_last(struct dquot *dquot)
252{ 252{
253 list_add(&dquot->dq_free, free_dquots.prev); 253 list_add_tail(&dquot->dq_free, &free_dquots);
254 dqstats.free_dquots++; 254 dqstats.free_dquots++;
255} 255}
256 256
@@ -266,7 +266,7 @@ static inline void put_inuse(struct dquot *dquot)
266{ 266{
267 /* We add to the back of inuse list so we don't have to restart 267 /* We add to the back of inuse list so we don't have to restart
268 * when traversing this list and we block */ 268 * when traversing this list and we block */
269 list_add(&dquot->dq_inuse, inuse_list.prev); 269 list_add_tail(&dquot->dq_inuse, &inuse_list);
270 dqstats.allocated_dquots++; 270 dqstats.allocated_dquots++;
271} 271}
272 272
diff --git a/fs/exec.c b/fs/exec.c
index 0b88bf646143..c8494f513eaf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk)
666 * and to assume its PID: 666 * and to assume its PID:
667 */ 667 */
668 if (!thread_group_leader(current)) { 668 if (!thread_group_leader(current)) {
669 struct dentry *proc_dentry1, *proc_dentry2;
670
671 /* 669 /*
672 * Wait for the thread group leader to be a zombie. 670 * Wait for the thread group leader to be a zombie.
673 * It should already be zombie at this point, most 671 * It should already be zombie at this point, most
@@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk)
689 */ 687 */
690 current->start_time = leader->start_time; 688 current->start_time = leader->start_time;
691 689
692 spin_lock(&leader->proc_lock);
693 spin_lock(&current->proc_lock);
694 proc_dentry1 = proc_pid_unhash(current);
695 proc_dentry2 = proc_pid_unhash(leader);
696 write_lock_irq(&tasklist_lock); 690 write_lock_irq(&tasklist_lock);
697 691
698 BUG_ON(leader->tgid != current->tgid); 692 BUG_ON(leader->tgid != current->tgid);
@@ -713,7 +707,7 @@ static int de_thread(struct task_struct *tsk)
713 attach_pid(current, PIDTYPE_PID, current->pid); 707 attach_pid(current, PIDTYPE_PID, current->pid);
714 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 708 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
715 attach_pid(current, PIDTYPE_SID, current->signal->session); 709 attach_pid(current, PIDTYPE_SID, current->signal->session);
716 list_add_tail_rcu(&current->tasks, &init_task.tasks); 710 list_replace_rcu(&leader->tasks, &current->tasks);
717 711
718 current->group_leader = current; 712 current->group_leader = current;
719 leader->group_leader = current; 713 leader->group_leader = current;
@@ -721,7 +715,6 @@ static int de_thread(struct task_struct *tsk)
721 /* Reduce leader to a thread */ 715 /* Reduce leader to a thread */
722 detach_pid(leader, PIDTYPE_PGID); 716 detach_pid(leader, PIDTYPE_PGID);
723 detach_pid(leader, PIDTYPE_SID); 717 detach_pid(leader, PIDTYPE_SID);
724 list_del_init(&leader->tasks);
725 718
726 current->exit_signal = SIGCHLD; 719 current->exit_signal = SIGCHLD;
727 720
@@ -729,10 +722,6 @@ static int de_thread(struct task_struct *tsk)
729 leader->exit_state = EXIT_DEAD; 722 leader->exit_state = EXIT_DEAD;
730 723
731 write_unlock_irq(&tasklist_lock); 724 write_unlock_irq(&tasklist_lock);
732 spin_unlock(&leader->proc_lock);
733 spin_unlock(&current->proc_lock);
734 proc_pid_flush(proc_dentry1);
735 proc_pid_flush(proc_dentry2);
736 } 725 }
737 726
738 /* 727 /*
@@ -1379,67 +1368,102 @@ static void format_corename(char *corename, const char *pattern, long signr)
1379 *out_ptr = 0; 1368 *out_ptr = 0;
1380} 1369}
1381 1370
1382static void zap_threads (struct mm_struct *mm) 1371static void zap_process(struct task_struct *start)
1383{ 1372{
1384 struct task_struct *g, *p; 1373 struct task_struct *t;
1385 struct task_struct *tsk = current;
1386 struct completion *vfork_done = tsk->vfork_done;
1387 int traced = 0;
1388 1374
1389 /* 1375 start->signal->flags = SIGNAL_GROUP_EXIT;
1390 * Make sure nobody is waiting for us to release the VM, 1376 start->signal->group_stop_count = 0;
1391 * otherwise we can deadlock when we wait on each other
1392 */
1393 if (vfork_done) {
1394 tsk->vfork_done = NULL;
1395 complete(vfork_done);
1396 }
1397 1377
1398 read_lock(&tasklist_lock); 1378 t = start;
1399 do_each_thread(g,p) 1379 do {
1400 if (mm == p->mm && p != tsk) { 1380 if (t != current && t->mm) {
1401 force_sig_specific(SIGKILL, p); 1381 t->mm->core_waiters++;
1402 mm->core_waiters++; 1382 sigaddset(&t->pending.signal, SIGKILL);
1403 if (unlikely(p->ptrace) && 1383 signal_wake_up(t, 1);
1404 unlikely(p->parent->mm == mm))
1405 traced = 1;
1406 } 1384 }
1407 while_each_thread(g,p); 1385 } while ((t = next_thread(t)) != start);
1386}
1408 1387
1409 read_unlock(&tasklist_lock); 1388static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1389 int exit_code)
1390{
1391 struct task_struct *g, *p;
1392 unsigned long flags;
1393 int err = -EAGAIN;
1394
1395 spin_lock_irq(&tsk->sighand->siglock);
1396 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
1397 tsk->signal->group_exit_code = exit_code;
1398 zap_process(tsk);
1399 err = 0;
1400 }
1401 spin_unlock_irq(&tsk->sighand->siglock);
1402 if (err)
1403 return err;
1410 1404
1411 if (unlikely(traced)) { 1405 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
1412 /* 1406 goto done;
1413 * We are zapping a thread and the thread it ptraces. 1407
1414 * If the tracee went into a ptrace stop for exit tracing, 1408 rcu_read_lock();
1415 * we could deadlock since the tracer is waiting for this 1409 for_each_process(g) {
1416 * coredump to finish. Detach them so they can both die. 1410 if (g == tsk->group_leader)
1417 */ 1411 continue;
1418 write_lock_irq(&tasklist_lock); 1412
1419 do_each_thread(g,p) { 1413 p = g;
1420 if (mm == p->mm && p != tsk && 1414 do {
1421 p->ptrace && p->parent->mm == mm) { 1415 if (p->mm) {
1422 __ptrace_detach(p, 0); 1416 if (p->mm == mm) {
1417 /*
1418 * p->sighand can't disappear, but
1419 * may be changed by de_thread()
1420 */
1421 lock_task_sighand(p, &flags);
1422 zap_process(p);
1423 unlock_task_sighand(p, &flags);
1424 }
1425 break;
1423 } 1426 }
1424 } while_each_thread(g,p); 1427 } while ((p = next_thread(p)) != g);
1425 write_unlock_irq(&tasklist_lock);
1426 } 1428 }
1429 rcu_read_unlock();
1430done:
1431 return mm->core_waiters;
1427} 1432}
1428 1433
1429static void coredump_wait(struct mm_struct *mm) 1434static int coredump_wait(int exit_code)
1430{ 1435{
1431 DECLARE_COMPLETION(startup_done); 1436 struct task_struct *tsk = current;
1437 struct mm_struct *mm = tsk->mm;
1438 struct completion startup_done;
1439 struct completion *vfork_done;
1432 int core_waiters; 1440 int core_waiters;
1433 1441
1442 init_completion(&mm->core_done);
1443 init_completion(&startup_done);
1434 mm->core_startup_done = &startup_done; 1444 mm->core_startup_done = &startup_done;
1435 1445
1436 zap_threads(mm); 1446 core_waiters = zap_threads(tsk, mm, exit_code);
1437 core_waiters = mm->core_waiters;
1438 up_write(&mm->mmap_sem); 1447 up_write(&mm->mmap_sem);
1439 1448
1449 if (unlikely(core_waiters < 0))
1450 goto fail;
1451
1452 /*
1453 * Make sure nobody is waiting for us to release the VM,
1454 * otherwise we can deadlock when we wait on each other
1455 */
1456 vfork_done = tsk->vfork_done;
1457 if (vfork_done) {
1458 tsk->vfork_done = NULL;
1459 complete(vfork_done);
1460 }
1461
1440 if (core_waiters) 1462 if (core_waiters)
1441 wait_for_completion(&startup_done); 1463 wait_for_completion(&startup_done);
1464fail:
1442 BUG_ON(mm->core_waiters); 1465 BUG_ON(mm->core_waiters);
1466 return core_waiters;
1443} 1467}
1444 1468
1445int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1469int do_coredump(long signr, int exit_code, struct pt_regs * regs)
@@ -1473,22 +1497,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1473 } 1497 }
1474 mm->dumpable = 0; 1498 mm->dumpable = 0;
1475 1499
1476 retval = -EAGAIN; 1500 retval = coredump_wait(exit_code);
1477 spin_lock_irq(&current->sighand->siglock); 1501 if (retval < 0)
1478 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1479 current->signal->flags = SIGNAL_GROUP_EXIT;
1480 current->signal->group_exit_code = exit_code;
1481 current->signal->group_stop_count = 0;
1482 retval = 0;
1483 }
1484 spin_unlock_irq(&current->sighand->siglock);
1485 if (retval) {
1486 up_write(&mm->mmap_sem);
1487 goto fail; 1502 goto fail;
1488 }
1489
1490 init_completion(&mm->core_done);
1491 coredump_wait(mm);
1492 1503
1493 /* 1504 /*
1494 * Clear any false indication of pending signals that might 1505 * Clear any false indication of pending signals that might
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b2891cc29db1..b7483360a2db 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -630,7 +630,7 @@ enum {
630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 630 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 631 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 632 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 633 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 634 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 635 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 636 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -666,6 +666,7 @@ static match_table_t tokens = {
666 {Opt_noreservation, "noreservation"}, 666 {Opt_noreservation, "noreservation"},
667 {Opt_noload, "noload"}, 667 {Opt_noload, "noload"},
668 {Opt_nobh, "nobh"}, 668 {Opt_nobh, "nobh"},
669 {Opt_bh, "bh"},
669 {Opt_commit, "commit=%u"}, 670 {Opt_commit, "commit=%u"},
670 {Opt_journal_update, "journal=update"}, 671 {Opt_journal_update, "journal=update"},
671 {Opt_journal_inum, "journal=%u"}, 672 {Opt_journal_inum, "journal=%u"},
@@ -1014,6 +1015,9 @@ clear_qf_name:
1014 case Opt_nobh: 1015 case Opt_nobh:
1015 set_opt(sbi->s_mount_opt, NOBH); 1016 set_opt(sbi->s_mount_opt, NOBH);
1016 break; 1017 break;
1018 case Opt_bh:
1019 clear_opt(sbi->s_mount_opt, NOBH);
1020 break;
1017 default: 1021 default:
1018 printk (KERN_ERR 1022 printk (KERN_ERR
1019 "EXT3-fs: Unrecognized mount option \"%s\" " 1023 "EXT3-fs: Unrecognized mount option \"%s\" "
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 1862e8bc101d..b8886f048eaa 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -53,8 +53,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
53 if (!instr) { 53 if (!instr) {
54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
55 spin_lock(&c->erase_completion_lock); 55 spin_lock(&c->erase_completion_lock);
56 list_del(&jeb->list); 56 list_move(&jeb->list, &c->erase_pending_list);
57 list_add(&jeb->list, &c->erase_pending_list);
58 c->erasing_size -= c->sector_size; 57 c->erasing_size -= c->sector_size;
59 c->dirty_size += c->sector_size; 58 c->dirty_size += c->sector_size;
60 jeb->dirty_size = c->sector_size; 59 jeb->dirty_size = c->sector_size;
@@ -86,8 +85,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
86 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
87 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
88 spin_lock(&c->erase_completion_lock); 87 spin_lock(&c->erase_completion_lock);
89 list_del(&jeb->list); 88 list_move(&jeb->list, &c->erase_pending_list);
90 list_add(&jeb->list, &c->erase_pending_list);
91 c->erasing_size -= c->sector_size; 89 c->erasing_size -= c->sector_size;
92 c->dirty_size += c->sector_size; 90 c->dirty_size += c->sector_size;
93 jeb->dirty_size = c->sector_size; 91 jeb->dirty_size = c->sector_size;
@@ -161,8 +159,7 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
161{ 159{
162 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 160 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
163 spin_lock(&c->erase_completion_lock); 161 spin_lock(&c->erase_completion_lock);
164 list_del(&jeb->list); 162 list_move_tail(&jeb->list, &c->erase_complete_list);
165 list_add_tail(&jeb->list, &c->erase_complete_list);
166 spin_unlock(&c->erase_completion_lock); 163 spin_unlock(&c->erase_completion_lock);
167 /* Ensure that kupdated calls us again to mark them clean */ 164 /* Ensure that kupdated calls us again to mark them clean */
168 jffs2_erase_pending_trigger(c); 165 jffs2_erase_pending_trigger(c);
@@ -178,8 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
178 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 175 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
179 /* We'd like to give this block another try. */ 176 /* We'd like to give this block another try. */
180 spin_lock(&c->erase_completion_lock); 177 spin_lock(&c->erase_completion_lock);
181 list_del(&jeb->list); 178 list_move(&jeb->list, &c->erase_pending_list);
182 list_add(&jeb->list, &c->erase_pending_list);
183 c->erasing_size -= c->sector_size; 179 c->erasing_size -= c->sector_size;
184 c->dirty_size += c->sector_size; 180 c->dirty_size += c->sector_size;
185 jeb->dirty_size = c->sector_size; 181 jeb->dirty_size = c->sector_size;
@@ -191,8 +187,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
191 spin_lock(&c->erase_completion_lock); 187 spin_lock(&c->erase_completion_lock);
192 c->erasing_size -= c->sector_size; 188 c->erasing_size -= c->sector_size;
193 c->bad_size += c->sector_size; 189 c->bad_size += c->sector_size;
194 list_del(&jeb->list); 190 list_move(&jeb->list, &c->bad_list);
195 list_add(&jeb->list, &c->bad_list);
196 c->nr_erasing_blocks--; 191 c->nr_erasing_blocks--;
197 spin_unlock(&c->erase_completion_lock); 192 spin_unlock(&c->erase_completion_lock);
198 wake_up(&c->erase_wait); 193 wake_up(&c->erase_wait);
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 8bedfd2ff689..ac0c350ed7d7 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -211,8 +211,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
211 struct jffs2_eraseblock *ejeb; 211 struct jffs2_eraseblock *ejeb;
212 212
213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 213 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
214 list_del(&ejeb->list); 214 list_move_tail(&ejeb->list, &c->erase_pending_list);
215 list_add_tail(&ejeb->list, &c->erase_pending_list);
216 c->nr_erasing_blocks++; 215 c->nr_erasing_blocks++;
217 jffs2_erase_pending_trigger(c); 216 jffs2_erase_pending_trigger(c);
218 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 217 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a7f153f79ecb..b9b700730dfe 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -495,8 +495,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
495 /* Fix up the original jeb now it's on the bad_list */ 495 /* Fix up the original jeb now it's on the bad_list */
496 if (first_raw == jeb->first_node) { 496 if (first_raw == jeb->first_node) {
497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 497 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
498 list_del(&jeb->list); 498 list_move(&jeb->list, &c->erase_pending_list);
499 list_add(&jeb->list, &c->erase_pending_list);
500 c->nr_erasing_blocks++; 499 c->nr_erasing_blocks++;
501 jffs2_erase_pending_trigger(c); 500 jffs2_erase_pending_trigger(c);
502 } 501 }
diff --git a/fs/libfs.c b/fs/libfs.c
index fc785d8befb9..ac02ea602c3d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -149,10 +149,9 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
149 /* fallthrough */ 149 /* fallthrough */
150 default: 150 default:
151 spin_lock(&dcache_lock); 151 spin_lock(&dcache_lock);
152 if (filp->f_pos == 2) { 152 if (filp->f_pos == 2)
153 list_del(q); 153 list_move(q, &dentry->d_subdirs);
154 list_add(q, &dentry->d_subdirs); 154
155 }
156 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
157 struct dentry *next; 156 struct dentry *next;
158 next = list_entry(p, struct dentry, d_u.d_child); 157 next = list_entry(p, struct dentry, d_u.d_child);
@@ -164,8 +163,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
164 return 0; 163 return 0;
165 spin_lock(&dcache_lock); 164 spin_lock(&dcache_lock);
166 /* next is still alive */ 165 /* next is still alive */
167 list_del(q); 166 list_move(q, p);
168 list_add(q, p);
169 p = q; 167 p = q;
170 filp->f_pos++; 168 filp->f_pos++;
171 } 169 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 866430bb024d..b3ed212ea416 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -526,10 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
526{ 526{
527 struct vfsmount *p; 527 struct vfsmount *p;
528 528
529 for (p = mnt; p; p = next_mnt(p, mnt)) { 529 for (p = mnt; p; p = next_mnt(p, mnt))
530 list_del(&p->mnt_hash); 530 list_move(&p->mnt_hash, kill);
531 list_add(&p->mnt_hash, kill);
532 }
533 531
534 if (propagate) 532 if (propagate)
535 propagate_umount(kill); 533 propagate_umount(kill);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 96c7578cbe1e..1630b5670dc2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -529,8 +529,7 @@ move_to_confirmed(struct nfs4_client *clp)
529 529
530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 530 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
531 list_del_init(&clp->cl_strhash); 531 list_del_init(&clp->cl_strhash);
532 list_del_init(&clp->cl_idhash); 532 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
533 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
534 strhashval = clientstr_hashval(clp->cl_recdir); 533 strhashval = clientstr_hashval(clp->cl_recdir);
535 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 534 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
536 renew_client(clp); 535 renew_client(clp);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d852ebb538e3..fdf7cf3dfadc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -103,8 +103,7 @@ nfsd_cache_shutdown(void)
103static void 103static void
104lru_put_end(struct svc_cacherep *rp) 104lru_put_end(struct svc_cacherep *rp)
105{ 105{
106 list_del(&rp->c_lru); 106 list_move_tail(&rp->c_lru, &lru_head);
107 list_add_tail(&rp->c_lru, &lru_head);
108} 107}
109 108
110/* 109/*
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 355593dd8ef8..87ee29cad50b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -381,8 +381,7 @@ do_ast:
381 ret = DLM_NORMAL; 381 ret = DLM_NORMAL;
382 if (past->type == DLM_AST) { 382 if (past->type == DLM_AST) {
383 /* do not alter lock refcount. switching lists. */ 383 /* do not alter lock refcount. switching lists. */
384 list_del_init(&lock->list); 384 list_move_tail(&lock->list, &res->granted);
385 list_add_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 385 mlog(0, "ast: adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 386 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 387 if (lock->ml.convert_type != LKM_IVMODE) {
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 8285228d9e37..70888b31e751 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -231,8 +231,7 @@ switch_queues:
231 231
232 lock->ml.convert_type = type; 232 lock->ml.convert_type = type;
233 /* do not alter lock refcount. switching lists. */ 233 /* do not alter lock refcount. switching lists. */
234 list_del_init(&lock->list); 234 list_move_tail(&lock->list, &res->converting);
235 list_add_tail(&lock->list, &res->converting);
236 235
237unlock_exit: 236unlock_exit:
238 spin_unlock(&lock->spinlock); 237 spin_unlock(&lock->spinlock);
@@ -248,8 +247,7 @@ void dlm_revert_pending_convert(struct dlm_lock_resource *res,
248 struct dlm_lock *lock) 247 struct dlm_lock *lock)
249{ 248{
250 /* do not alter lock refcount. switching lists. */ 249 /* do not alter lock refcount. switching lists. */
251 list_del_init(&lock->list); 250 list_move_tail(&lock->list, &res->granted);
252 list_add_tail(&lock->list, &res->granted);
253 lock->ml.convert_type = LKM_IVMODE; 251 lock->ml.convert_type = LKM_IVMODE;
254 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 252 lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
255} 253}
@@ -294,8 +292,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
294 res->state |= DLM_LOCK_RES_IN_PROGRESS; 292 res->state |= DLM_LOCK_RES_IN_PROGRESS;
295 /* move lock to local convert queue */ 293 /* move lock to local convert queue */
296 /* do not alter lock refcount. switching lists. */ 294 /* do not alter lock refcount. switching lists. */
297 list_del_init(&lock->list); 295 list_move_tail(&lock->list, &res->converting);
298 list_add_tail(&lock->list, &res->converting);
299 lock->convert_pending = 1; 296 lock->convert_pending = 1;
300 lock->ml.convert_type = type; 297 lock->ml.convert_type = type;
301 298
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 6fea28318d6d..55cda25ae11b 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -239,8 +239,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
239 mlog(0, "%s: $RECOVERY lock for this node (%u) is " 239 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
240 "mastered by %u; got lock, manually granting (no ast)\n", 240 "mastered by %u; got lock, manually granting (no ast)\n",
241 dlm->name, dlm->node_num, res->owner); 241 dlm->name, dlm->node_num, res->owner);
242 list_del_init(&lock->list); 242 list_move_tail(&lock->list, &res->granted);
243 list_add_tail(&lock->list, &res->granted);
244 } 243 }
245 spin_unlock(&res->spinlock); 244 spin_unlock(&res->spinlock);
246 245
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 805cbabac051..9962190e7416 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -905,13 +905,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
905 mlog(0, "found lockres owned by dead node while " 905 mlog(0, "found lockres owned by dead node while "
906 "doing recovery for node %u. sending it.\n", 906 "doing recovery for node %u. sending it.\n",
907 dead_node); 907 dead_node);
908 list_del_init(&res->recovering); 908 list_move_tail(&res->recovering, list);
909 list_add_tail(&res->recovering, list);
910 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 909 } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
911 mlog(0, "found UNKNOWN owner while doing recovery " 910 mlog(0, "found UNKNOWN owner while doing recovery "
912 "for node %u. sending it.\n", dead_node); 911 "for node %u. sending it.\n", dead_node);
913 list_del_init(&res->recovering); 912 list_move_tail(&res->recovering, list);
914 list_add_tail(&res->recovering, list);
915 } 913 }
916 } 914 }
917 spin_unlock(&dlm->spinlock); 915 spin_unlock(&dlm->spinlock);
@@ -1529,8 +1527,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1529 1527
1530 /* move the lock to its proper place */ 1528 /* move the lock to its proper place */
1531 /* do not alter lock refcount. switching lists. */ 1529 /* do not alter lock refcount. switching lists. */
1532 list_del_init(&lock->list); 1530 list_move_tail(&lock->list, queue);
1533 list_add_tail(&lock->list, queue);
1534 spin_unlock(&res->spinlock); 1531 spin_unlock(&res->spinlock);
1535 1532
1536 mlog(0, "just reordered a local lock!\n"); 1533 mlog(0, "just reordered a local lock!\n");
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 5be9d14f12cb..44d3b57ae8a8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -318,8 +318,7 @@ converting:
318 318
319 target->ml.type = target->ml.convert_type; 319 target->ml.type = target->ml.convert_type;
320 target->ml.convert_type = LKM_IVMODE; 320 target->ml.convert_type = LKM_IVMODE;
321 list_del_init(&target->list); 321 list_move_tail(&target->list, &res->granted);
322 list_add_tail(&target->list, &res->granted);
323 322
324 BUG_ON(!target->lksb); 323 BUG_ON(!target->lksb);
325 target->lksb->status = DLM_NORMAL; 324 target->lksb->status = DLM_NORMAL;
@@ -380,8 +379,7 @@ blocked:
380 target->ml.type, target->ml.node); 379 target->ml.type, target->ml.node);
381 380
382 // target->ml.type is already correct 381 // target->ml.type is already correct
383 list_del_init(&target->list); 382 list_move_tail(&target->list, &res->granted);
384 list_add_tail(&target->list, &res->granted);
385 383
386 BUG_ON(!target->lksb); 384 BUG_ON(!target->lksb);
387 target->lksb->status = DLM_NORMAL; 385 target->lksb->status = DLM_NORMAL;
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 7b1a27542674..ac89c509daf9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -271,8 +271,7 @@ void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
271void dlm_commit_pending_cancel(struct dlm_lock_resource *res, 271void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
272 struct dlm_lock *lock) 272 struct dlm_lock *lock)
273{ 273{
274 list_del_init(&lock->list); 274 list_move_tail(&lock->list, &res->granted);
275 list_add_tail(&lock->list, &res->granted);
276 lock->ml.convert_type = LKM_IVMODE; 275 lock->ml.convert_type = LKM_IVMODE;
277} 276}
278 277
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index eebc3cfa6be8..3fe8781c22cb 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -222,8 +222,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list)); 222 BUG_ON(!list_empty(&OCFS2_I(inode)->ip_handle_list));
223 223
224 OCFS2_I(inode)->ip_handle = handle; 224 OCFS2_I(inode)->ip_handle = handle;
225 list_del(&(OCFS2_I(inode)->ip_handle_list)); 225 list_move_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
226 list_add_tail(&(OCFS2_I(inode)->ip_handle_list), &(handle->inode_list));
227} 226}
228 227
229static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) 228static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index efc7c91128af..93a56bd4a2b7 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,5 +1,4 @@
1/* $Id: inode.c,v 1.15 2001/11/12 09:43:39 davem Exp $ 1/* inode.c: /proc/openprom handling routines
2 * openpromfs.c: /proc/openprom handling routines
3 * 2 *
4 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) 3 * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com)
5 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 4 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
@@ -12,762 +11,245 @@
12#include <linux/openprom_fs.h> 11#include <linux/openprom_fs.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
15#include <linux/smp_lock.h> 14#include <linux/seq_file.h>
16 15
17#include <asm/openprom.h> 16#include <asm/openprom.h>
18#include <asm/oplib.h> 17#include <asm/oplib.h>
18#include <asm/prom.h>
19#include <asm/uaccess.h> 19#include <asm/uaccess.h>
20 20
21#define ALIASES_NNODES 64 21static DEFINE_MUTEX(op_mutex);
22
23typedef struct {
24 u16 parent;
25 u16 next;
26 u16 child;
27 u16 first_prop;
28 u32 node;
29} openpromfs_node;
30
31typedef struct {
32#define OPP_STRING 0x10
33#define OPP_STRINGLIST 0x20
34#define OPP_BINARY 0x40
35#define OPP_HEXSTRING 0x80
36#define OPP_DIRTY 0x01
37#define OPP_QUOTED 0x02
38#define OPP_NOTQUOTED 0x04
39#define OPP_ASCIIZ 0x08
40 u32 flag;
41 u32 alloclen;
42 u32 len;
43 char *value;
44 char name[8];
45} openprom_property;
46
47static openpromfs_node *nodes;
48static int alloced;
49static u16 last_node;
50static u16 first_prop;
51static u16 options = 0xffff;
52static u16 aliases = 0xffff;
53static int aliases_nodes;
54static char *alias_names [ALIASES_NNODES];
55
56#define OPENPROM_ROOT_INO 16
57#define OPENPROM_FIRST_INO OPENPROM_ROOT_INO
58#define NODE(ino) nodes[ino - OPENPROM_FIRST_INO]
59#define NODE2INO(node) (node + OPENPROM_FIRST_INO)
60#define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node)
61
62static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *);
63static int openpromfs_readdir(struct file *, void *, filldir_t);
64static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd);
65static int openpromfs_unlink (struct inode *, struct dentry *dentry);
66 22
67static inline u16 ptr_nod(void *p) 23#define OPENPROM_ROOT_INO 0
68{
69 return (long)p & 0xFFFF;
70}
71 24
72static ssize_t nodenum_read(struct file *file, char __user *buf, 25enum op_inode_type {
73 size_t count, loff_t *ppos) 26 op_inode_node,
27 op_inode_prop,
28};
29
30union op_inode_data {
31 struct device_node *node;
32 struct property *prop;
33};
34
35struct op_inode_info {
36 struct inode vfs_inode;
37 enum op_inode_type type;
38 union op_inode_data u;
39};
40
41static inline struct op_inode_info *OP_I(struct inode *inode)
74{ 42{
75 struct inode *inode = file->f_dentry->d_inode; 43 return container_of(inode, struct op_inode_info, vfs_inode);
76 char buffer[10];
77
78 if (count < 0 || !inode->u.generic_ip)
79 return -EINVAL;
80 sprintf (buffer, "%8.8lx\n", (long)inode->u.generic_ip);
81 if (file->f_pos >= 9)
82 return 0;
83 if (count > 9 - file->f_pos)
84 count = 9 - file->f_pos;
85 if (copy_to_user(buf, buffer + file->f_pos, count))
86 return -EFAULT;
87 *ppos += count;
88 return count;
89} 44}
90 45
91static ssize_t property_read(struct file *filp, char __user *buf, 46static int is_string(unsigned char *p, int len)
92 size_t count, loff_t *ppos)
93{ 47{
94 struct inode *inode = filp->f_dentry->d_inode; 48 int i;
95 int i, j, k;
96 u32 node;
97 char *p, *s;
98 u32 *q;
99 openprom_property *op;
100 char buffer[64];
101
102 if (!filp->private_data) {
103 node = nodes[ptr_nod(inode->u.generic_ip)].node;
104 i = ((u32)(long)inode->u.generic_ip) >> 16;
105 if (ptr_nod(inode->u.generic_ip) == aliases) {
106 if (i >= aliases_nodes)
107 p = NULL;
108 else
109 p = alias_names [i];
110 } else
111 for (p = prom_firstprop (node, buffer);
112 i && p && *p;
113 p = prom_nextprop (node, p, buffer), i--)
114 /* nothing */ ;
115 if (!p || !*p)
116 return -EIO;
117 i = prom_getproplen (node, p);
118 if (i < 0) {
119 if (ptr_nod(inode->u.generic_ip) == aliases)
120 i = 0;
121 else
122 return -EIO;
123 }
124 k = i;
125 if (i < 64) i = 64;
126 filp->private_data = kmalloc (sizeof (openprom_property)
127 + (j = strlen (p)) + 2 * i,
128 GFP_KERNEL);
129 if (!filp->private_data)
130 return -ENOMEM;
131 op = filp->private_data;
132 op->flag = 0;
133 op->alloclen = 2 * i;
134 strcpy (op->name, p);
135 op->value = (char *)(((unsigned long)(op->name + j + 4)) & ~3);
136 op->len = k;
137 if (k && prom_getproperty (node, p, op->value, i) < 0)
138 return -EIO;
139 op->value [k] = 0;
140 if (k) {
141 for (s = NULL, p = op->value; p < op->value + k; p++) {
142 if ((*p >= ' ' && *p <= '~') || *p == '\n') {
143 op->flag |= OPP_STRING;
144 s = p;
145 continue;
146 }
147 if (p > op->value && !*p && s == p - 1) {
148 if (p < op->value + k - 1)
149 op->flag |= OPP_STRINGLIST;
150 else
151 op->flag |= OPP_ASCIIZ;
152 continue;
153 }
154 if (k == 1 && !*p) {
155 op->flag |= (OPP_STRING|OPP_ASCIIZ);
156 break;
157 }
158 op->flag &= ~(OPP_STRING|OPP_STRINGLIST);
159 if (k & 3)
160 op->flag |= OPP_HEXSTRING;
161 else
162 op->flag |= OPP_BINARY;
163 break;
164 }
165 if (op->flag & OPP_STRINGLIST)
166 op->flag &= ~(OPP_STRING);
167 if (op->flag & OPP_ASCIIZ)
168 op->len--;
169 }
170 } else
171 op = filp->private_data;
172 if (!count || !(op->len || (op->flag & OPP_ASCIIZ)))
173 return 0;
174 if (*ppos >= 0xffffff || count >= 0xffffff)
175 return -EINVAL;
176 if (op->flag & OPP_STRINGLIST) {
177 for (k = 0, p = op->value; p < op->value + op->len; p++)
178 if (!*p)
179 k++;
180 i = op->len + 4 * k + 3;
181 } else if (op->flag & OPP_STRING) {
182 i = op->len + 3;
183 } else if (op->flag & OPP_BINARY) {
184 i = (op->len * 9) >> 2;
185 } else {
186 i = (op->len << 1) + 1;
187 }
188 k = *ppos;
189 if (k >= i) return 0;
190 if (count > i - k) count = i - k;
191 if (op->flag & OPP_STRING) {
192 if (!k) {
193 if (put_user('\'', buf))
194 return -EFAULT;
195 k++;
196 count--;
197 }
198 49
199 if (k + count >= i - 2) 50 for (i = 0; i < len; i++) {
200 j = i - 2 - k; 51 unsigned char val = p[i];
201 else
202 j = count;
203
204 if (j >= 0) {
205 if (copy_to_user(buf + k - *ppos,
206 op->value + k - 1, j))
207 return -EFAULT;
208 count -= j;
209 k += j;
210 }
211 52
212 if (count) { 53 if ((i && !val) ||
213 if (put_user('\'', &buf [k++ - *ppos])) 54 (val >= ' ' && val <= '~'))
214 return -EFAULT; 55 continue;
215 }
216 if (count > 1) {
217 if (put_user('\n', &buf [k++ - *ppos]))
218 return -EFAULT;
219 }
220 } else if (op->flag & OPP_STRINGLIST) {
221 char *tmp;
222
223 tmp = kmalloc (i, GFP_KERNEL);
224 if (!tmp)
225 return -ENOMEM;
226
227 s = tmp;
228 *s++ = '\'';
229 for (p = op->value; p < op->value + op->len; p++) {
230 if (!*p) {
231 strcpy(s, "' + '");
232 s += 5;
233 continue;
234 }
235 *s++ = *p;
236 }
237 strcpy(s, "'\n");
238
239 if (copy_to_user(buf, tmp + k, count))
240 return -EFAULT;
241
242 kfree(tmp);
243 k += count;
244
245 } else if (op->flag & OPP_BINARY) {
246 char buffer[10];
247 u32 *first, *last;
248 int first_off, last_cnt;
249
250 first = ((u32 *)op->value) + k / 9;
251 first_off = k % 9;
252 last = ((u32 *)op->value) + (k + count - 1) / 9;
253 last_cnt = (k + count) % 9;
254 if (!last_cnt) last_cnt = 9;
255
256 if (first == last) {
257 sprintf (buffer, "%08x.", *first);
258 if (copy_to_user(buf, buffer + first_off,
259 last_cnt - first_off))
260 return -EFAULT;
261 buf += last_cnt - first_off;
262 } else {
263 for (q = first; q <= last; q++) {
264 sprintf (buffer, "%08x.", *q);
265 if (q == first) {
266 if (copy_to_user(buf, buffer + first_off,
267 9 - first_off))
268 return -EFAULT;
269 buf += 9 - first_off;
270 } else if (q == last) {
271 if (copy_to_user(buf, buffer, last_cnt))
272 return -EFAULT;
273 buf += last_cnt;
274 } else {
275 if (copy_to_user(buf, buffer, 9))
276 return -EFAULT;
277 buf += 9;
278 }
279 }
280 }
281 56
282 if (last == (u32 *)(op->value + op->len - 4) && last_cnt == 9) { 57 return 0;
283 if (put_user('\n', (buf - 1))) 58 }
284 return -EFAULT;
285 }
286 59
287 k += count; 60 return 1;
61}
288 62
289 } else if (op->flag & OPP_HEXSTRING) { 63static int property_show(struct seq_file *f, void *v)
290 char buffer[3]; 64{
65 struct property *prop = f->private;
66 void *pval;
67 int len;
291 68
292 if ((k < i - 1) && (k & 1)) { 69 len = prop->length;
293 sprintf (buffer, "%02x", 70 pval = prop->value;
294 (unsigned char) *(op->value + (k >> 1)) & 0xff);
295 if (put_user(buffer[1], &buf[k++ - *ppos]))
296 return -EFAULT;
297 count--;
298 }
299 71
300 for (; (count > 1) && (k < i - 1); k += 2) { 72 if (is_string(pval, len)) {
301 sprintf (buffer, "%02x", 73 while (len > 0) {
302 (unsigned char) *(op->value + (k >> 1)) & 0xff); 74 int n = strlen(pval);
303 if (copy_to_user(buf + k - *ppos, buffer, 2))
304 return -EFAULT;
305 count -= 2;
306 }
307 75
308 if (count && (k < i - 1)) { 76 seq_printf(f, "%s", (char *) pval);
309 sprintf (buffer, "%02x",
310 (unsigned char) *(op->value + (k >> 1)) & 0xff);
311 if (put_user(buffer[0], &buf[k++ - *ppos]))
312 return -EFAULT;
313 count--;
314 }
315 77
316 if (count) { 78 /* Skip over the NULL byte too. */
317 if (put_user('\n', &buf [k++ - *ppos])) 79 pval += n + 1;
318 return -EFAULT; 80 len -= n + 1;
319 }
320 }
321 count = k - *ppos;
322 *ppos = k;
323 return count;
324}
325 81
326static ssize_t property_write(struct file *filp, const char __user *buf, 82 if (len > 0)
327 size_t count, loff_t *ppos) 83 seq_printf(f, " + ");
328{
329 int i, j, k;
330 char *p;
331 u32 *q;
332 void *b;
333 openprom_property *op;
334
335 if (*ppos >= 0xffffff || count >= 0xffffff)
336 return -EINVAL;
337 if (!filp->private_data) {
338 i = property_read (filp, NULL, 0, NULL);
339 if (i)
340 return i;
341 }
342 k = *ppos;
343 op = filp->private_data;
344 if (!(op->flag & OPP_STRING)) {
345 u32 *first, *last;
346 int first_off, last_cnt;
347 u32 mask, mask2;
348 char tmp [9];
349 int forcelen = 0;
350
351 j = k % 9;
352 for (i = 0; i < count; i++, j++) {
353 if (j == 9) j = 0;
354 if (!j) {
355 char ctmp;
356 if (get_user(ctmp, &buf[i]))
357 return -EFAULT;
358 if (ctmp != '.') {
359 if (ctmp != '\n') {
360 if (op->flag & OPP_BINARY)
361 return -EINVAL;
362 else
363 goto write_try_string;
364 } else {
365 count = i + 1;
366 forcelen = 1;
367 break;
368 }
369 }
370 } else {
371 char ctmp;
372 if (get_user(ctmp, &buf[i]))
373 return -EFAULT;
374 if (ctmp < '0' ||
375 (ctmp > '9' && ctmp < 'A') ||
376 (ctmp > 'F' && ctmp < 'a') ||
377 ctmp > 'f') {
378 if (op->flag & OPP_BINARY)
379 return -EINVAL;
380 else
381 goto write_try_string;
382 }
383 }
384 }
385 op->flag |= OPP_BINARY;
386 tmp [8] = 0;
387 i = ((count + k + 8) / 9) << 2;
388 if (op->alloclen <= i) {
389 b = kmalloc (sizeof (openprom_property) + 2 * i,
390 GFP_KERNEL);
391 if (!b)
392 return -ENOMEM;
393 memcpy (b, filp->private_data,
394 sizeof (openprom_property)
395 + strlen (op->name) + op->alloclen);
396 memset (b + sizeof (openprom_property)
397 + strlen (op->name) + op->alloclen,
398 0, 2 * i - op->alloclen);
399 op = b;
400 op->alloclen = 2*i;
401 b = filp->private_data;
402 filp->private_data = op;
403 kfree (b);
404 } 84 }
405 first = ((u32 *)op->value) + (k / 9); 85 } else {
406 first_off = k % 9; 86 if (len & 3) {
407 last = (u32 *)(op->value + i); 87 while (len) {
408 last_cnt = (k + count) % 9; 88 len--;
409 if (first + 1 == last) { 89 if (len)
410 memset (tmp, '0', 8); 90 seq_printf(f, "%02x.",
411 if (copy_from_user(tmp + first_off, buf, 91 *(unsigned char *) pval);
412 (count + first_off > 8) ? 92 else
413 8 - first_off : count)) 93 seq_printf(f, "%02x",
414 return -EFAULT; 94 *(unsigned char *) pval);
415 mask = 0xffffffff; 95 pval++;
416 mask2 = 0xffffffff;
417 for (j = 0; j < first_off; j++)
418 mask >>= 1;
419 for (j = 8 - count - first_off; j > 0; j--)
420 mask2 <<= 1;
421 mask &= mask2;
422 if (mask) {
423 *first &= ~mask;
424 *first |= simple_strtoul (tmp, NULL, 16);
425 op->flag |= OPP_DIRTY;
426 } 96 }
427 } else { 97 } else {
428 op->flag |= OPP_DIRTY; 98 while (len >= 4) {
429 for (q = first; q < last; q++) { 99 len -= 4;
430 if (q == first) { 100
431 if (first_off < 8) { 101 if (len)
432 memset (tmp, '0', 8); 102 seq_printf(f, "%08x.",
433 if (copy_from_user(tmp + first_off, 103 *(unsigned int *) pval);
434 buf, 104 else
435 8 - first_off)) 105 seq_printf(f, "%08x",
436 return -EFAULT; 106 *(unsigned int *) pval);
437 mask = 0xffffffff; 107 pval += 4;
438 for (j = 0; j < first_off; j++)
439 mask >>= 1;
440 *q &= ~mask;
441 *q |= simple_strtoul (tmp,NULL,16);
442 }
443 buf += 9;
444 } else if ((q == last - 1) && last_cnt
445 && (last_cnt < 8)) {
446 memset (tmp, '0', 8);
447 if (copy_from_user(tmp, buf, last_cnt))
448 return -EFAULT;
449 mask = 0xffffffff;
450 for (j = 0; j < 8 - last_cnt; j++)
451 mask <<= 1;
452 *q &= ~mask;
453 *q |= simple_strtoul (tmp, NULL, 16);
454 buf += last_cnt;
455 } else {
456 char tchars[2 * sizeof(long) + 1];
457
458 if (copy_from_user(tchars, buf, sizeof(tchars) - 1))
459 return -EFAULT;
460 tchars[sizeof(tchars) - 1] = '\0';
461 *q = simple_strtoul (tchars, NULL, 16);
462 buf += 9;
463 }
464 }
465 }
466 if (!forcelen) {
467 if (op->len < i)
468 op->len = i;
469 } else
470 op->len = i;
471 *ppos += count;
472 }
473write_try_string:
474 if (!(op->flag & OPP_BINARY)) {
475 if (!(op->flag & (OPP_QUOTED | OPP_NOTQUOTED))) {
476 char ctmp;
477
478 /* No way, if somebody starts writing from the middle,
479 * we don't know whether he uses quotes around or not
480 */
481 if (k > 0)
482 return -EINVAL;
483 if (get_user(ctmp, buf))
484 return -EFAULT;
485 if (ctmp == '\'') {
486 op->flag |= OPP_QUOTED;
487 buf++;
488 count--;
489 (*ppos)++;
490 if (!count) {
491 op->flag |= OPP_STRING;
492 return 1;
493 }
494 } else
495 op->flag |= OPP_NOTQUOTED;
496 }
497 op->flag |= OPP_STRING;
498 if (op->alloclen <= count + *ppos) {
499 b = kmalloc (sizeof (openprom_property)
500 + 2 * (count + *ppos), GFP_KERNEL);
501 if (!b)
502 return -ENOMEM;
503 memcpy (b, filp->private_data,
504 sizeof (openprom_property)
505 + strlen (op->name) + op->alloclen);
506 memset (b + sizeof (openprom_property)
507 + strlen (op->name) + op->alloclen,
508 0, 2*(count - *ppos) - op->alloclen);
509 op = b;
510 op->alloclen = 2*(count + *ppos);
511 b = filp->private_data;
512 filp->private_data = op;
513 kfree (b);
514 }
515 p = op->value + *ppos - ((op->flag & OPP_QUOTED) ? 1 : 0);
516 if (copy_from_user(p, buf, count))
517 return -EFAULT;
518 op->flag |= OPP_DIRTY;
519 for (i = 0; i < count; i++, p++)
520 if (*p == '\n') {
521 *p = 0;
522 break;
523 } 108 }
524 if (i < count) {
525 op->len = p - op->value;
526 *ppos += i + 1;
527 if ((p > op->value) && (op->flag & OPP_QUOTED)
528 && (*(p - 1) == '\''))
529 op->len--;
530 } else {
531 if (p - op->value > op->len)
532 op->len = p - op->value;
533 *ppos += count;
534 } 109 }
535 } 110 }
536 return *ppos - k; 111 seq_printf(f, "\n");
112
113 return 0;
537} 114}
538 115
539int property_release (struct inode *inode, struct file *filp) 116static void *property_start(struct seq_file *f, loff_t *pos)
540{ 117{
541 openprom_property *op = filp->private_data; 118 if (*pos == 0)
542 int error; 119 return pos;
543 u32 node; 120 return NULL;
544 121}
545 if (!op) 122
546 return 0; 123static void *property_next(struct seq_file *f, void *v, loff_t *pos)
547 lock_kernel(); 124{
548 node = nodes[ptr_nod(inode->u.generic_ip)].node; 125 (*pos)++;
549 if (ptr_nod(inode->u.generic_ip) == aliases) { 126 return NULL;
550 if ((op->flag & OPP_DIRTY) && (op->flag & OPP_STRING)) { 127}
551 char *p = op->name; 128
552 int i = (op->value - op->name) - strlen (op->name) - 1; 129static void property_stop(struct seq_file *f, void *v)
553 op->value [op->len] = 0; 130{
554 *(op->value - 1) = ' '; 131 /* Nothing to do */
555 if (i) { 132}
556 for (p = op->value - i - 2; p >= op->name; p--) 133
557 p[i] = *p; 134static struct seq_operations property_op = {
558 p = op->name + i; 135 .start = property_start,
559 } 136 .next = property_next,
560 memcpy (p - 8, "nvalias ", 8); 137 .stop = property_stop,
561 prom_feval (p - 8); 138 .show = property_show
562 } 139};
563 } else if (op->flag & OPP_DIRTY) { 140
564 if (op->flag & OPP_STRING) { 141static int property_open(struct inode *inode, struct file *file)
565 op->value [op->len] = 0; 142{
566 error = prom_setprop (node, op->name, 143 struct op_inode_info *oi = OP_I(inode);
567 op->value, op->len + 1); 144 int ret;
568 if (error <= 0) 145
569 printk (KERN_WARNING "openpromfs: " 146 BUG_ON(oi->type != op_inode_prop);
570 "Couldn't write property %s\n", 147
571 op->name); 148 ret = seq_open(file, &property_op);
572 } else if ((op->flag & OPP_BINARY) || !op->len) { 149 if (!ret) {
573 error = prom_setprop (node, op->name, 150 struct seq_file *m = file->private_data;
574 op->value, op->len); 151 m->private = oi->u.prop;
575 if (error <= 0)
576 printk (KERN_WARNING "openpromfs: "
577 "Couldn't write property %s\n",
578 op->name);
579 } else {
580 printk (KERN_WARNING "openpromfs: "
581 "Unknown property type of %s\n",
582 op->name);
583 }
584 } 152 }
585 unlock_kernel(); 153 return ret;
586 kfree (filp->private_data);
587 return 0;
588} 154}
589 155
590static const struct file_operations openpromfs_prop_ops = { 156static const struct file_operations openpromfs_prop_ops = {
591 .read = property_read, 157 .open = property_open,
592 .write = property_write, 158 .read = seq_read,
593 .release = property_release, 159 .llseek = seq_lseek,
160 .release = seq_release,
594}; 161};
595 162
596static const struct file_operations openpromfs_nodenum_ops = { 163static int openpromfs_readdir(struct file *, void *, filldir_t);
597 .read = nodenum_read,
598};
599 164
600static const struct file_operations openprom_operations = { 165static const struct file_operations openprom_operations = {
601 .read = generic_read_dir, 166 .read = generic_read_dir,
602 .readdir = openpromfs_readdir, 167 .readdir = openpromfs_readdir,
603}; 168};
604 169
605static struct inode_operations openprom_alias_inode_operations = { 170static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
606 .create = openpromfs_create,
607 .lookup = openpromfs_lookup,
608 .unlink = openpromfs_unlink,
609};
610 171
611static struct inode_operations openprom_inode_operations = { 172static struct inode_operations openprom_inode_operations = {
612 .lookup = openpromfs_lookup, 173 .lookup = openpromfs_lookup,
613}; 174};
614 175
615static int lookup_children(u16 n, const char * name, int len) 176static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
616{ 177{
617 int ret; 178 struct op_inode_info *ent_oi, *oi = OP_I(dir);
618 u16 node; 179 struct device_node *dp, *child;
619 for (; n != 0xffff; n = nodes[n].next) { 180 struct property *prop;
620 node = nodes[n].child; 181 enum op_inode_type ent_type;
621 if (node != 0xffff) { 182 union op_inode_data ent_data;
622 char buffer[128];
623 int i;
624 char *p;
625
626 while (node != 0xffff) {
627 if (prom_getname (nodes[node].node,
628 buffer, 128) >= 0) {
629 i = strlen (buffer);
630 if ((len == i)
631 && !strncmp (buffer, name, len))
632 return NODE2INO(node);
633 p = strchr (buffer, '@');
634 if (p && (len == p - buffer)
635 && !strncmp (buffer, name, len))
636 return NODE2INO(node);
637 }
638 node = nodes[node].next;
639 }
640 } else
641 continue;
642 ret = lookup_children (nodes[n].child, name, len);
643 if (ret) return ret;
644 }
645 return 0;
646}
647
648static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
649{
650 int ino = 0;
651#define OPFSL_DIR 0
652#define OPFSL_PROPERTY 1
653#define OPFSL_NODENUM 2
654 int type = 0;
655 char buffer[128];
656 char *p;
657 const char *name; 183 const char *name;
658 u32 n;
659 u16 dirnode;
660 unsigned int len;
661 int i;
662 struct inode *inode; 184 struct inode *inode;
663 char buffer2[64]; 185 unsigned int ino;
186 int len;
664 187
665 inode = NULL; 188 BUG_ON(oi->type != op_inode_node);
189
190 dp = oi->u.node;
191
666 name = dentry->d_name.name; 192 name = dentry->d_name.name;
667 len = dentry->d_name.len; 193 len = dentry->d_name.len;
668 lock_kernel(); 194
669 if (name [0] == '.' && len == 5 && !strncmp (name + 1, "node", 4)) { 195 mutex_lock(&op_mutex);
670 ino = NODEP2INO(NODE(dir->i_ino).first_prop); 196
671 type = OPFSL_NODENUM; 197 child = dp->child;
672 } 198 while (child) {
673 if (!ino) { 199 int n = strlen(child->path_component_name);
674 u16 node = NODE(dir->i_ino).child; 200
675 while (node != 0xffff) { 201 if (len == n &&
676 if (prom_getname (nodes[node].node, buffer, 128) >= 0) { 202 !strncmp(child->path_component_name, name, len)) {
677 i = strlen (buffer); 203 ent_type = op_inode_node;
678 if (len == i && !strncmp (buffer, name, len)) { 204 ent_data.node = child;
679 ino = NODE2INO(node); 205 ino = child->unique_id;
680 type = OPFSL_DIR; 206 goto found;
681 break;
682 }
683 p = strchr (buffer, '@');
684 if (p && (len == p - buffer)
685 && !strncmp (buffer, name, len)) {
686 ino = NODE2INO(node);
687 type = OPFSL_DIR;
688 break;
689 }
690 }
691 node = nodes[node].next;
692 }
693 }
694 n = NODE(dir->i_ino).node;
695 dirnode = dir->i_ino - OPENPROM_FIRST_INO;
696 if (!ino) {
697 int j = NODEP2INO(NODE(dir->i_ino).first_prop);
698 if (dirnode != aliases) {
699 for (p = prom_firstprop (n, buffer2);
700 p && *p;
701 p = prom_nextprop (n, p, buffer2)) {
702 j++;
703 if ((len == strlen (p))
704 && !strncmp (p, name, len)) {
705 ino = j;
706 type = OPFSL_PROPERTY;
707 break;
708 }
709 }
710 } else {
711 int k;
712 for (k = 0; k < aliases_nodes; k++) {
713 j++;
714 if (alias_names [k]
715 && (len == strlen (alias_names [k]))
716 && !strncmp (alias_names [k], name, len)) {
717 ino = j;
718 type = OPFSL_PROPERTY;
719 break;
720 }
721 }
722 } 207 }
208 child = child->sibling;
723 } 209 }
724 if (!ino) { 210
725 ino = lookup_children (NODE(dir->i_ino).child, name, len); 211 prop = dp->properties;
726 if (ino) 212 while (prop) {
727 type = OPFSL_DIR; 213 int n = strlen(prop->name);
728 else { 214
729 unlock_kernel(); 215 if (len == n && !strncmp(prop->name, name, len)) {
730 return ERR_PTR(-ENOENT); 216 ent_type = op_inode_prop;
217 ent_data.prop = prop;
218 ino = prop->unique_id;
219 goto found;
731 } 220 }
221
222 prop = prop->next;
732 } 223 }
733 inode = iget (dir->i_sb, ino); 224
734 unlock_kernel(); 225 mutex_unlock(&op_mutex);
226 return ERR_PTR(-ENOENT);
227
228found:
229 inode = iget(dir->i_sb, ino);
230 mutex_unlock(&op_mutex);
735 if (!inode) 231 if (!inode)
736 return ERR_PTR(-EINVAL); 232 return ERR_PTR(-EINVAL);
737 switch (type) { 233 ent_oi = OP_I(inode);
738 case OPFSL_DIR: 234 ent_oi->type = ent_type;
235 ent_oi->u = ent_data;
236
237 switch (ent_type) {
238 case op_inode_node:
739 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 239 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
740 if (ino == OPENPROM_FIRST_INO + aliases) { 240 inode->i_op = &openprom_inode_operations;
741 inode->i_mode |= S_IWUSR;
742 inode->i_op = &openprom_alias_inode_operations;
743 } else
744 inode->i_op = &openprom_inode_operations;
745 inode->i_fop = &openprom_operations; 241 inode->i_fop = &openprom_operations;
746 inode->i_nlink = 2; 242 inode->i_nlink = 2;
747 break; 243 break;
748 case OPFSL_NODENUM: 244 case op_inode_prop:
749 inode->i_mode = S_IFREG | S_IRUGO; 245 if (!strcmp(dp->name, "options") && (len == 17) &&
750 inode->i_fop = &openpromfs_nodenum_ops; 246 !strncmp (name, "security-password", 17))
751 inode->i_nlink = 1;
752 inode->u.generic_ip = (void *)(long)(n);
753 break;
754 case OPFSL_PROPERTY:
755 if ((dirnode == options) && (len == 17)
756 && !strncmp (name, "security-password", 17))
757 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; 247 inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
758 else { 248 else
759 inode->i_mode = S_IFREG | S_IRUGO; 249 inode->i_mode = S_IFREG | S_IRUGO;
760 if (dirnode == options || dirnode == aliases) {
761 if (len != 4 || strncmp (name, "name", 4))
762 inode->i_mode |= S_IWUSR;
763 }
764 }
765 inode->i_fop = &openpromfs_prop_ops; 250 inode->i_fop = &openpromfs_prop_ops;
766 inode->i_nlink = 1; 251 inode->i_nlink = 1;
767 if (inode->i_size < 0) 252 inode->i_size = ent_oi->u.prop->length;
768 inode->i_size = 0;
769 inode->u.generic_ip = (void *)(long)(((u16)dirnode) |
770 (((u16)(ino - NODEP2INO(NODE(dir->i_ino).first_prop) - 1)) << 16));
771 break; 253 break;
772 } 254 }
773 255
@@ -781,237 +263,89 @@ static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentr
781static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 263static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
782{ 264{
783 struct inode *inode = filp->f_dentry->d_inode; 265 struct inode *inode = filp->f_dentry->d_inode;
266 struct op_inode_info *oi = OP_I(inode);
267 struct device_node *dp = oi->u.node;
268 struct device_node *child;
269 struct property *prop;
784 unsigned int ino; 270 unsigned int ino;
785 u32 n; 271 int i;
786 int i, j; 272
787 char buffer[128]; 273 mutex_lock(&op_mutex);
788 u16 node;
789 char *p;
790 char buffer2[64];
791
792 lock_kernel();
793 274
794 ino = inode->i_ino; 275 ino = inode->i_ino;
795 i = filp->f_pos; 276 i = filp->f_pos;
796 switch (i) { 277 switch (i) {
797 case 0: 278 case 0:
798 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) goto out; 279 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
280 goto out;
799 i++; 281 i++;
800 filp->f_pos++; 282 filp->f_pos++;
801 /* fall thru */ 283 /* fall thru */
802 case 1: 284 case 1:
803 if (filldir(dirent, "..", 2, i, 285 if (filldir(dirent, "..", 2, i,
804 (NODE(ino).parent == 0xffff) ? 286 (dp->parent == NULL ?
805 OPENPROM_ROOT_INO : NODE2INO(NODE(ino).parent), DT_DIR) < 0) 287 OPENPROM_ROOT_INO :
288 dp->parent->unique_id), DT_DIR) < 0)
806 goto out; 289 goto out;
807 i++; 290 i++;
808 filp->f_pos++; 291 filp->f_pos++;
809 /* fall thru */ 292 /* fall thru */
810 default: 293 default:
811 i -= 2; 294 i -= 2;
812 node = NODE(ino).child; 295
813 while (i && node != 0xffff) { 296 /* First, the children nodes as directories. */
814 node = nodes[node].next; 297 child = dp->child;
298 while (i && child) {
299 child = child->sibling;
815 i--; 300 i--;
816 } 301 }
817 while (node != 0xffff) { 302 while (child) {
818 if (prom_getname (nodes[node].node, buffer, 128) < 0) 303 if (filldir(dirent,
819 goto out; 304 child->path_component_name,
820 if (filldir(dirent, buffer, strlen(buffer), 305 strlen(child->path_component_name),
821 filp->f_pos, NODE2INO(node), DT_DIR) < 0) 306 filp->f_pos, child->unique_id, DT_DIR) < 0)
822 goto out; 307 goto out;
308
823 filp->f_pos++; 309 filp->f_pos++;
824 node = nodes[node].next; 310 child = child->sibling;
825 } 311 }
826 j = NODEP2INO(NODE(ino).first_prop); 312
827 if (!i) { 313 /* Next, the properties as files. */
828 if (filldir(dirent, ".node", 5, filp->f_pos, j, DT_REG) < 0) 314 prop = dp->properties;
315 while (i && prop) {
316 prop = prop->next;
317 i--;
318 }
319 while (prop) {
320 if (filldir(dirent, prop->name, strlen(prop->name),
321 filp->f_pos, prop->unique_id, DT_REG) < 0)
829 goto out; 322 goto out;
323
830 filp->f_pos++; 324 filp->f_pos++;
831 } else 325 prop = prop->next;
832 i--;
833 n = NODE(ino).node;
834 if (ino == OPENPROM_FIRST_INO + aliases) {
835 for (j++; i < aliases_nodes; i++, j++) {
836 if (alias_names [i]) {
837 if (filldir (dirent, alias_names [i],
838 strlen (alias_names [i]),
839 filp->f_pos, j, DT_REG) < 0) goto out;
840 filp->f_pos++;
841 }
842 }
843 } else {
844 for (p = prom_firstprop (n, buffer2);
845 p && *p;
846 p = prom_nextprop (n, p, buffer2)) {
847 j++;
848 if (i) i--;
849 else {
850 if (filldir(dirent, p, strlen(p),
851 filp->f_pos, j, DT_REG) < 0)
852 goto out;
853 filp->f_pos++;
854 }
855 }
856 } 326 }
857 } 327 }
858out: 328out:
859 unlock_kernel(); 329 mutex_unlock(&op_mutex);
860 return 0;
861}
862
863static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode,
864 struct nameidata *nd)
865{
866 char *p;
867 struct inode *inode;
868
869 if (!dir)
870 return -ENOENT;
871 if (dentry->d_name.len > 256)
872 return -EINVAL;
873 p = kmalloc (dentry->d_name.len + 1, GFP_KERNEL);
874 if (!p)
875 return -ENOMEM;
876 strncpy (p, dentry->d_name.name, dentry->d_name.len);
877 p [dentry->d_name.len] = 0;
878 lock_kernel();
879 if (aliases_nodes == ALIASES_NNODES) {
880 kfree(p);
881 unlock_kernel();
882 return -EIO;
883 }
884 alias_names [aliases_nodes++] = p;
885 inode = iget (dir->i_sb,
886 NODEP2INO(NODE(dir->i_ino).first_prop) + aliases_nodes);
887 if (!inode) {
888 unlock_kernel();
889 return -EINVAL;
890 }
891 inode->i_mode = S_IFREG | S_IRUGO | S_IWUSR;
892 inode->i_fop = &openpromfs_prop_ops;
893 inode->i_nlink = 1;
894 if (inode->i_size < 0) inode->i_size = 0;
895 inode->u.generic_ip = (void *)(long)(((u16)aliases) |
896 (((u16)(aliases_nodes - 1)) << 16));
897 unlock_kernel();
898 d_instantiate(dentry, inode);
899 return 0; 330 return 0;
900} 331}
901 332
902static int openpromfs_unlink (struct inode *dir, struct dentry *dentry) 333static kmem_cache_t *op_inode_cachep;
903{
904 unsigned int len;
905 char *p;
906 const char *name;
907 int i;
908
909 name = dentry->d_name.name;
910 len = dentry->d_name.len;
911 lock_kernel();
912 for (i = 0; i < aliases_nodes; i++)
913 if ((strlen (alias_names [i]) == len)
914 && !strncmp (name, alias_names[i], len)) {
915 char buffer[512];
916
917 p = alias_names [i];
918 alias_names [i] = NULL;
919 kfree (p);
920 strcpy (buffer, "nvunalias ");
921 memcpy (buffer + 10, name, len);
922 buffer [10 + len] = 0;
923 prom_feval (buffer);
924 }
925 unlock_kernel();
926 return 0;
927}
928 334
929/* {{{ init section */ 335static struct inode *openprom_alloc_inode(struct super_block *sb)
930static int __init check_space (u16 n)
931{ 336{
932 unsigned long pages; 337 struct op_inode_info *oi;
933 338
934 if ((1 << alloced) * PAGE_SIZE < (n + 2) * sizeof(openpromfs_node)) { 339 oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
935 pages = __get_free_pages (GFP_KERNEL, alloced + 1); 340 if (!oi)
936 if (!pages) 341 return NULL;
937 return -1;
938 342
939 if (nodes) { 343 return &oi->vfs_inode;
940 memcpy ((char *)pages, nodes,
941 (1 << alloced) * PAGE_SIZE);
942 free_pages ((unsigned long)nodes, alloced);
943 }
944 alloced++;
945 nodes = (openpromfs_node *)pages;
946 }
947 return 0;
948} 344}
949 345
950static u16 __init get_nodes (u16 parent, u32 node) 346static void openprom_destroy_inode(struct inode *inode)
951{ 347{
952 char *p; 348 kmem_cache_free(op_inode_cachep, OP_I(inode));
953 u16 n = last_node++, i;
954 char buffer[64];
955
956 if (check_space (n) < 0)
957 return 0xffff;
958 nodes[n].parent = parent;
959 nodes[n].node = node;
960 nodes[n].next = 0xffff;
961 nodes[n].child = 0xffff;
962 nodes[n].first_prop = first_prop++;
963 if (!parent) {
964 char buffer[8];
965 int j;
966
967 if ((j = prom_getproperty (node, "name", buffer, 8)) >= 0) {
968 buffer[j] = 0;
969 if (!strcmp (buffer, "options"))
970 options = n;
971 else if (!strcmp (buffer, "aliases"))
972 aliases = n;
973 }
974 }
975 if (n != aliases)
976 for (p = prom_firstprop (node, buffer);
977 p && p != (char *)-1 && *p;
978 p = prom_nextprop (node, p, buffer))
979 first_prop++;
980 else {
981 char *q;
982 for (p = prom_firstprop (node, buffer);
983 p && p != (char *)-1 && *p;
984 p = prom_nextprop (node, p, buffer)) {
985 if (aliases_nodes == ALIASES_NNODES)
986 break;
987 for (i = 0; i < aliases_nodes; i++)
988 if (!strcmp (p, alias_names [i]))
989 break;
990 if (i < aliases_nodes)
991 continue;
992 q = kmalloc (strlen (p) + 1, GFP_KERNEL);
993 if (!q)
994 return 0xffff;
995 strcpy (q, p);
996 alias_names [aliases_nodes++] = q;
997 }
998 first_prop += ALIASES_NNODES;
999 }
1000 node = prom_getchild (node);
1001 if (node) {
1002 parent = get_nodes (n, node);
1003 if (parent == 0xffff)
1004 return 0xffff;
1005 nodes[n].child = parent;
1006 while ((node = prom_getsibling (node)) != 0) {
1007 i = get_nodes (n, node);
1008 if (i == 0xffff)
1009 return 0xffff;
1010 nodes[parent].next = i;
1011 parent = i;
1012 }
1013 }
1014 return n;
1015} 349}
1016 350
1017static void openprom_read_inode(struct inode * inode) 351static void openprom_read_inode(struct inode * inode)
@@ -1031,6 +365,8 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
1031} 365}
1032 366
1033static struct super_operations openprom_sops = { 367static struct super_operations openprom_sops = {
368 .alloc_inode = openprom_alloc_inode,
369 .destroy_inode = openprom_destroy_inode,
1034 .read_inode = openprom_read_inode, 370 .read_inode = openprom_read_inode,
1035 .statfs = simple_statfs, 371 .statfs = simple_statfs,
1036 .remount_fs = openprom_remount, 372 .remount_fs = openprom_remount,
@@ -1038,7 +374,8 @@ static struct super_operations openprom_sops = {
1038 374
1039static int openprom_fill_super(struct super_block *s, void *data, int silent) 375static int openprom_fill_super(struct super_block *s, void *data, int silent)
1040{ 376{
1041 struct inode * root_inode; 377 struct inode *root_inode;
378 struct op_inode_info *oi;
1042 379
1043 s->s_flags |= MS_NOATIME; 380 s->s_flags |= MS_NOATIME;
1044 s->s_blocksize = 1024; 381 s->s_blocksize = 1024;
@@ -1049,6 +386,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
1049 root_inode = iget(s, OPENPROM_ROOT_INO); 386 root_inode = iget(s, OPENPROM_ROOT_INO);
1050 if (!root_inode) 387 if (!root_inode)
1051 goto out_no_root; 388 goto out_no_root;
389
390 oi = OP_I(root_inode);
391 oi->type = op_inode_node;
392 oi->u.node = of_find_node_by_path("/");
393
1052 s->s_root = d_alloc_root(root_inode); 394 s->s_root = d_alloc_root(root_inode);
1053 if (!s->s_root) 395 if (!s->s_root)
1054 goto out_no_root; 396 goto out_no_root;
@@ -1073,29 +415,39 @@ static struct file_system_type openprom_fs_type = {
1073 .kill_sb = kill_anon_super, 415 .kill_sb = kill_anon_super,
1074}; 416};
1075 417
418static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
419{
420 struct op_inode_info *oi = (struct op_inode_info *) data;
421
422 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
423 SLAB_CTOR_CONSTRUCTOR)
424 inode_init_once(&oi->vfs_inode);
425}
426
1076static int __init init_openprom_fs(void) 427static int __init init_openprom_fs(void)
1077{ 428{
1078 nodes = (openpromfs_node *)__get_free_pages(GFP_KERNEL, 0); 429 int err;
1079 if (!nodes) { 430
1080 printk (KERN_WARNING "openpromfs: can't get free page\n"); 431 op_inode_cachep = kmem_cache_create("op_inode_cache",
1081 return -EIO; 432 sizeof(struct op_inode_info),
1082 } 433 0,
1083 if (get_nodes (0xffff, prom_root_node) == 0xffff) { 434 (SLAB_RECLAIM_ACCOUNT |
1084 printk (KERN_WARNING "openpromfs: couldn't setup tree\n"); 435 SLAB_MEM_SPREAD),
1085 return -EIO; 436 op_inode_init_once, NULL);
1086 } 437 if (!op_inode_cachep)
1087 nodes[last_node].first_prop = first_prop; 438 return -ENOMEM;
1088 return register_filesystem(&openprom_fs_type); 439
440 err = register_filesystem(&openprom_fs_type);
441 if (err)
442 kmem_cache_destroy(op_inode_cachep);
443
444 return err;
1089} 445}
1090 446
1091static void __exit exit_openprom_fs(void) 447static void __exit exit_openprom_fs(void)
1092{ 448{
1093 int i;
1094 unregister_filesystem(&openprom_fs_type); 449 unregister_filesystem(&openprom_fs_type);
1095 free_pages ((unsigned long)nodes, alloced); 450 kmem_cache_destroy(op_inode_cachep);
1096 for (i = 0; i < aliases_nodes; i++)
1097 kfree (alias_names [i]);
1098 nodes = NULL;
1099} 451}
1100 452
1101module_init(init_openprom_fs) 453module_init(init_openprom_fs)
diff --git a/fs/pnode.c b/fs/pnode.c
index 37b568ed0e05..da42ee61c1df 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -53,8 +53,7 @@ static int do_make_slave(struct vfsmount *mnt)
53 if (master) { 53 if (master) {
54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) 54 list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
55 slave_mnt->mnt_master = master; 55 slave_mnt->mnt_master = master;
56 list_del(&mnt->mnt_slave); 56 list_move(&mnt->mnt_slave, &master->mnt_slave_list);
57 list_add(&mnt->mnt_slave, &master->mnt_slave_list);
58 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); 57 list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
59 INIT_LIST_HEAD(&mnt->mnt_slave_list); 58 INIT_LIST_HEAD(&mnt->mnt_slave_list);
60 } else { 59 } else {
@@ -283,10 +282,8 @@ static void __propagate_umount(struct vfsmount *mnt)
283 * umount the child only if the child has no 282 * umount the child only if the child has no
284 * other children 283 * other children
285 */ 284 */
286 if (child && list_empty(&child->mnt_mounts)) { 285 if (child && list_empty(&child->mnt_mounts))
287 list_del(&child->mnt_hash); 286 list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
288 list_add_tail(&child->mnt_hash, &mnt->mnt_hash);
289 }
290 } 287 }
291} 288}
292 289
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6afff725a8c9..6ba7785319de 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -74,6 +74,16 @@
74#include <linux/poll.h> 74#include <linux/poll.h>
75#include "internal.h" 75#include "internal.h"
76 76
77/* NOTE:
78 * Implementing inode permission operations in /proc is almost
79 * certainly an error. Permission checks need to happen during
80 * each system call not at open time. The reason is that most of
81 * what we wish to check for permissions in /proc varies at runtime.
82 *
83 * The classic example of a problem is opening file descriptors
84 * in /proc for a task before it execs a suid executable.
85 */
86
77/* 87/*
78 * For hysterical raisins we keep the same inumbers as in the old procfs. 88 * For hysterical raisins we keep the same inumbers as in the old procfs.
79 * Feel free to change the macro below - just keep the range distinct from 89 * Feel free to change the macro below - just keep the range distinct from
@@ -121,6 +131,8 @@ enum pid_directory_inos {
121 PROC_TGID_ATTR_PREV, 131 PROC_TGID_ATTR_PREV,
122 PROC_TGID_ATTR_EXEC, 132 PROC_TGID_ATTR_EXEC,
123 PROC_TGID_ATTR_FSCREATE, 133 PROC_TGID_ATTR_FSCREATE,
134 PROC_TGID_ATTR_KEYCREATE,
135 PROC_TGID_ATTR_SOCKCREATE,
124#endif 136#endif
125#ifdef CONFIG_AUDITSYSCALL 137#ifdef CONFIG_AUDITSYSCALL
126 PROC_TGID_LOGINUID, 138 PROC_TGID_LOGINUID,
@@ -162,6 +174,8 @@ enum pid_directory_inos {
162 PROC_TID_ATTR_PREV, 174 PROC_TID_ATTR_PREV,
163 PROC_TID_ATTR_EXEC, 175 PROC_TID_ATTR_EXEC,
164 PROC_TID_ATTR_FSCREATE, 176 PROC_TID_ATTR_FSCREATE,
177 PROC_TID_ATTR_KEYCREATE,
178 PROC_TID_ATTR_SOCKCREATE,
165#endif 179#endif
166#ifdef CONFIG_AUDITSYSCALL 180#ifdef CONFIG_AUDITSYSCALL
167 PROC_TID_LOGINUID, 181 PROC_TID_LOGINUID,
@@ -173,6 +187,9 @@ enum pid_directory_inos {
173 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 187 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
174}; 188};
175 189
190/* Worst case buffer size needed for holding an integer. */
191#define PROC_NUMBUF 10
192
176struct pid_entry { 193struct pid_entry {
177 int type; 194 int type;
178 int len; 195 int len;
@@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = {
275 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 292 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
276 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 293 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
277 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 294 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
295 E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
296 E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
278 {0,0,NULL,0} 297 {0,0,NULL,0}
279}; 298};
280static struct pid_entry tid_attr_stuff[] = { 299static struct pid_entry tid_attr_stuff[] = {
@@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = {
282 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 301 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
283 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 302 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
284 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 303 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
304 E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
305 E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
285 {0,0,NULL,0} 306 {0,0,NULL,0}
286}; 307};
287#endif 308#endif
@@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = {
290 311
291static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 312static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
292{ 313{
293 struct task_struct *task = proc_task(inode); 314 struct task_struct *task = get_proc_task(inode);
294 struct files_struct *files; 315 struct files_struct *files = NULL;
295 struct file *file; 316 struct file *file;
296 int fd = proc_type(inode) - PROC_TID_FD_DIR; 317 int fd = proc_fd(inode);
297 318
298 files = get_files_struct(task); 319 if (task) {
320 files = get_files_struct(task);
321 put_task_struct(task);
322 }
299 if (files) { 323 if (files) {
300 /* 324 /*
301 * We are not taking a ref to the file structure, so we must 325 * We are not taking a ref to the file structure, so we must
@@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
327 return fs; 351 return fs;
328} 352}
329 353
330static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 354static int get_nr_threads(struct task_struct *tsk)
331{ 355{
332 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 356 /* Must be called with the rcu_read_lock held */
333 int result = -ENOENT; 357 unsigned long flags;
334 if (fs) { 358 int count = 0;
335 read_lock(&fs->lock); 359
336 *mnt = mntget(fs->pwdmnt); 360 if (lock_task_sighand(tsk, &flags)) {
337 *dentry = dget(fs->pwd); 361 count = atomic_read(&tsk->signal->count);
338 read_unlock(&fs->lock); 362 unlock_task_sighand(tsk, &flags);
339 result = 0;
340 put_fs_struct(fs);
341 } 363 }
342 return result; 364 return count;
343} 365}
344 366
345static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 367static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
346{ 368{
347 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 369 struct task_struct *task = get_proc_task(inode);
370 struct fs_struct *fs = NULL;
348 int result = -ENOENT; 371 int result = -ENOENT;
372
373 if (task) {
374 fs = get_fs_struct(task);
375 put_task_struct(task);
376 }
349 if (fs) { 377 if (fs) {
350 read_lock(&fs->lock); 378 read_lock(&fs->lock);
351 *mnt = mntget(fs->rootmnt); 379 *mnt = mntget(fs->pwdmnt);
352 *dentry = dget(fs->root); 380 *dentry = dget(fs->pwd);
353 read_unlock(&fs->lock); 381 read_unlock(&fs->lock);
354 result = 0; 382 result = 0;
355 put_fs_struct(fs); 383 put_fs_struct(fs);
@@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
357 return result; 385 return result;
358} 386}
359 387
360 388static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
361/* Same as proc_root_link, but this addionally tries to get fs from other
362 * threads in the group */
363static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
364 struct vfsmount **mnt)
365{ 389{
366 struct fs_struct *fs; 390 struct task_struct *task = get_proc_task(inode);
391 struct fs_struct *fs = NULL;
367 int result = -ENOENT; 392 int result = -ENOENT;
368 struct task_struct *leader = proc_task(inode);
369 393
370 task_lock(leader); 394 if (task) {
371 fs = leader->fs; 395 fs = get_fs_struct(task);
372 if (fs) { 396 put_task_struct(task);
373 atomic_inc(&fs->count);
374 task_unlock(leader);
375 } else {
376 /* Try to get fs from other threads */
377 task_unlock(leader);
378 read_lock(&tasklist_lock);
379 if (pid_alive(leader)) {
380 struct task_struct *task = leader;
381
382 while ((task = next_thread(task)) != leader) {
383 task_lock(task);
384 fs = task->fs;
385 if (fs) {
386 atomic_inc(&fs->count);
387 task_unlock(task);
388 break;
389 }
390 task_unlock(task);
391 }
392 }
393 read_unlock(&tasklist_lock);
394 } 397 }
395
396 if (fs) { 398 if (fs) {
397 read_lock(&fs->lock); 399 read_lock(&fs->lock);
398 *mnt = mntget(fs->rootmnt); 400 *mnt = mntget(fs->rootmnt);
@@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
404 return result; 406 return result;
405} 407}
406 408
407
408#define MAY_PTRACE(task) \ 409#define MAY_PTRACE(task) \
409 (task == current || \ 410 (task == current || \
410 (task->parent == current && \ 411 (task->parent == current && \
@@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
535/************************************************************************/ 536/************************************************************************/
536 537
537/* permission checks */ 538/* permission checks */
538 539static int proc_fd_access_allowed(struct inode *inode)
539/* If the process being read is separated by chroot from the reading process,
540 * don't let the reader access the threads.
541 *
542 * note: this does dput(root) and mntput(vfsmnt) on exit.
543 */
544static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
545{
546 struct dentry *de, *base;
547 struct vfsmount *our_vfsmnt, *mnt;
548 int res = 0;
549
550 read_lock(&current->fs->lock);
551 our_vfsmnt = mntget(current->fs->rootmnt);
552 base = dget(current->fs->root);
553 read_unlock(&current->fs->lock);
554
555 spin_lock(&vfsmount_lock);
556 de = root;
557 mnt = vfsmnt;
558
559 while (mnt != our_vfsmnt) {
560 if (mnt == mnt->mnt_parent)
561 goto out;
562 de = mnt->mnt_mountpoint;
563 mnt = mnt->mnt_parent;
564 }
565
566 if (!is_subdir(de, base))
567 goto out;
568 spin_unlock(&vfsmount_lock);
569
570exit:
571 dput(base);
572 mntput(our_vfsmnt);
573 dput(root);
574 mntput(vfsmnt);
575 return res;
576out:
577 spin_unlock(&vfsmount_lock);
578 res = -EACCES;
579 goto exit;
580}
581
582static int proc_check_root(struct inode *inode)
583{
584 struct dentry *root;
585 struct vfsmount *vfsmnt;
586
587 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
588 return -ENOENT;
589 return proc_check_chroot(root, vfsmnt);
590}
591
592static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
593{
594 if (generic_permission(inode, mask, NULL) != 0)
595 return -EACCES;
596 return proc_check_root(inode);
597}
598
599static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
600{
601 struct dentry *root;
602 struct vfsmount *vfsmnt;
603
604 if (generic_permission(inode, mask, NULL) != 0)
605 return -EACCES;
606
607 if (proc_task_root_link(inode, &root, &vfsmnt))
608 return -ENOENT;
609
610 return proc_check_chroot(root, vfsmnt);
611}
612
613extern struct seq_operations proc_pid_maps_op;
614static int maps_open(struct inode *inode, struct file *file)
615{
616 struct task_struct *task = proc_task(inode);
617 int ret = seq_open(file, &proc_pid_maps_op);
618 if (!ret) {
619 struct seq_file *m = file->private_data;
620 m->private = task;
621 }
622 return ret;
623}
624
625static struct file_operations proc_maps_operations = {
626 .open = maps_open,
627 .read = seq_read,
628 .llseek = seq_lseek,
629 .release = seq_release,
630};
631
632#ifdef CONFIG_NUMA
633extern struct seq_operations proc_pid_numa_maps_op;
634static int numa_maps_open(struct inode *inode, struct file *file)
635{
636 struct task_struct *task = proc_task(inode);
637 int ret = seq_open(file, &proc_pid_numa_maps_op);
638 if (!ret) {
639 struct seq_file *m = file->private_data;
640 m->private = task;
641 }
642 return ret;
643}
644
645static struct file_operations proc_numa_maps_operations = {
646 .open = numa_maps_open,
647 .read = seq_read,
648 .llseek = seq_lseek,
649 .release = seq_release,
650};
651#endif
652
653#ifdef CONFIG_MMU
654extern struct seq_operations proc_pid_smaps_op;
655static int smaps_open(struct inode *inode, struct file *file)
656{ 540{
657 struct task_struct *task = proc_task(inode); 541 struct task_struct *task;
658 int ret = seq_open(file, &proc_pid_smaps_op); 542 int allowed = 0;
659 if (!ret) { 543 /* Allow access to a task's file descriptors if it is us or we
660 struct seq_file *m = file->private_data; 544 * may use ptrace attach to the process and find out that
661 m->private = task; 545 * information.
546 */
547 task = get_proc_task(inode);
548 if (task) {
549 allowed = ptrace_may_attach(task);
550 put_task_struct(task);
662 } 551 }
663 return ret; 552 return allowed;
664} 553}
665 554
666static struct file_operations proc_smaps_operations = {
667 .open = smaps_open,
668 .read = seq_read,
669 .llseek = seq_lseek,
670 .release = seq_release,
671};
672#endif
673
674extern struct seq_operations mounts_op; 555extern struct seq_operations mounts_op;
675struct proc_mounts { 556struct proc_mounts {
676 struct seq_file m; 557 struct seq_file m;
@@ -679,16 +560,19 @@ struct proc_mounts {
679 560
680static int mounts_open(struct inode *inode, struct file *file) 561static int mounts_open(struct inode *inode, struct file *file)
681{ 562{
682 struct task_struct *task = proc_task(inode); 563 struct task_struct *task = get_proc_task(inode);
683 struct namespace *namespace; 564 struct namespace *namespace = NULL;
684 struct proc_mounts *p; 565 struct proc_mounts *p;
685 int ret = -EINVAL; 566 int ret = -EINVAL;
686 567
687 task_lock(task); 568 if (task) {
688 namespace = task->namespace; 569 task_lock(task);
689 if (namespace) 570 namespace = task->namespace;
690 get_namespace(namespace); 571 if (namespace)
691 task_unlock(task); 572 get_namespace(namespace);
573 task_unlock(task);
574 put_task_struct(task);
575 }
692 576
693 if (namespace) { 577 if (namespace) {
694 ret = -ENOMEM; 578 ret = -ENOMEM;
@@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = {
745extern struct seq_operations mountstats_op; 629extern struct seq_operations mountstats_op;
746static int mountstats_open(struct inode *inode, struct file *file) 630static int mountstats_open(struct inode *inode, struct file *file)
747{ 631{
748 struct task_struct *task = proc_task(inode);
749 int ret = seq_open(file, &mountstats_op); 632 int ret = seq_open(file, &mountstats_op);
750 633
751 if (!ret) { 634 if (!ret) {
752 struct seq_file *m = file->private_data; 635 struct seq_file *m = file->private_data;
753 struct namespace *namespace; 636 struct namespace *namespace = NULL;
754 task_lock(task); 637 struct task_struct *task = get_proc_task(inode);
755 namespace = task->namespace; 638
756 if (namespace) 639 if (task) {
757 get_namespace(namespace); 640 task_lock(task);
758 task_unlock(task); 641 namespace = task->namespace;
642 if (namespace)
643 get_namespace(namespace);
644 task_unlock(task);
645 put_task_struct(task);
646 }
759 647
760 if (namespace) 648 if (namespace)
761 m->private = namespace; 649 m->private = namespace;
@@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
782 struct inode * inode = file->f_dentry->d_inode; 670 struct inode * inode = file->f_dentry->d_inode;
783 unsigned long page; 671 unsigned long page;
784 ssize_t length; 672 ssize_t length;
785 struct task_struct *task = proc_task(inode); 673 struct task_struct *task = get_proc_task(inode);
674
675 length = -ESRCH;
676 if (!task)
677 goto out_no_task;
786 678
787 if (count > PROC_BLOCK_SIZE) 679 if (count > PROC_BLOCK_SIZE)
788 count = PROC_BLOCK_SIZE; 680 count = PROC_BLOCK_SIZE;
681
682 length = -ENOMEM;
789 if (!(page = __get_free_page(GFP_KERNEL))) 683 if (!(page = __get_free_page(GFP_KERNEL)))
790 return -ENOMEM; 684 goto out;
791 685
792 length = PROC_I(inode)->op.proc_read(task, (char*)page); 686 length = PROC_I(inode)->op.proc_read(task, (char*)page);
793 687
794 if (length >= 0) 688 if (length >= 0)
795 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 689 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
796 free_page(page); 690 free_page(page);
691out:
692 put_task_struct(task);
693out_no_task:
797 return length; 694 return length;
798} 695}
799 696
@@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file)
810static ssize_t mem_read(struct file * file, char __user * buf, 707static ssize_t mem_read(struct file * file, char __user * buf,
811 size_t count, loff_t *ppos) 708 size_t count, loff_t *ppos)
812{ 709{
813 struct task_struct *task = proc_task(file->f_dentry->d_inode); 710 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
814 char *page; 711 char *page;
815 unsigned long src = *ppos; 712 unsigned long src = *ppos;
816 int ret = -ESRCH; 713 int ret = -ESRCH;
817 struct mm_struct *mm; 714 struct mm_struct *mm;
818 715
716 if (!task)
717 goto out_no_task;
718
819 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 719 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
820 goto out; 720 goto out;
821 721
@@ -865,6 +765,8 @@ out_put:
865out_free: 765out_free:
866 free_page((unsigned long) page); 766 free_page((unsigned long) page);
867out: 767out:
768 put_task_struct(task);
769out_no_task:
868 return ret; 770 return ret;
869} 771}
870 772
@@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf,
877{ 779{
878 int copied = 0; 780 int copied = 0;
879 char *page; 781 char *page;
880 struct task_struct *task = proc_task(file->f_dentry->d_inode); 782 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
881 unsigned long dst = *ppos; 783 unsigned long dst = *ppos;
882 784
785 copied = -ESRCH;
786 if (!task)
787 goto out_no_task;
788
883 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 789 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
884 return -ESRCH; 790 goto out;
885 791
792 copied = -ENOMEM;
886 page = (char *)__get_free_page(GFP_USER); 793 page = (char *)__get_free_page(GFP_USER);
887 if (!page) 794 if (!page)
888 return -ENOMEM; 795 goto out;
889 796
890 while (count > 0) { 797 while (count > 0) {
891 int this_len, retval; 798 int this_len, retval;
@@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf,
908 } 815 }
909 *ppos = dst; 816 *ppos = dst;
910 free_page((unsigned long) page); 817 free_page((unsigned long) page);
818out:
819 put_task_struct(task);
820out_no_task:
911 return copied; 821 return copied;
912} 822}
913#endif 823#endif
@@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = {
938static ssize_t oom_adjust_read(struct file *file, char __user *buf, 848static ssize_t oom_adjust_read(struct file *file, char __user *buf,
939 size_t count, loff_t *ppos) 849 size_t count, loff_t *ppos)
940{ 850{
941 struct task_struct *task = proc_task(file->f_dentry->d_inode); 851 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
942 char buffer[8]; 852 char buffer[PROC_NUMBUF];
943 size_t len; 853 size_t len;
944 int oom_adjust = task->oomkilladj; 854 int oom_adjust;
945 loff_t __ppos = *ppos; 855 loff_t __ppos = *ppos;
946 856
947 len = sprintf(buffer, "%i\n", oom_adjust); 857 if (!task)
858 return -ESRCH;
859 oom_adjust = task->oomkilladj;
860 put_task_struct(task);
861
862 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
948 if (__ppos >= len) 863 if (__ppos >= len)
949 return 0; 864 return 0;
950 if (count > len-__ppos) 865 if (count > len-__ppos)
@@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
958static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 873static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
959 size_t count, loff_t *ppos) 874 size_t count, loff_t *ppos)
960{ 875{
961 struct task_struct *task = proc_task(file->f_dentry->d_inode); 876 struct task_struct *task;
962 char buffer[8], *end; 877 char buffer[PROC_NUMBUF], *end;
963 int oom_adjust; 878 int oom_adjust;
964 879
965 if (!capable(CAP_SYS_RESOURCE)) 880 if (!capable(CAP_SYS_RESOURCE))
966 return -EPERM; 881 return -EPERM;
967 memset(buffer, 0, 8); 882 memset(buffer, 0, sizeof(buffer));
968 if (count > 6) 883 if (count > sizeof(buffer) - 1)
969 count = 6; 884 count = sizeof(buffer) - 1;
970 if (copy_from_user(buffer, buf, count)) 885 if (copy_from_user(buffer, buf, count))
971 return -EFAULT; 886 return -EFAULT;
972 oom_adjust = simple_strtol(buffer, &end, 0); 887 oom_adjust = simple_strtol(buffer, &end, 0);
@@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
974 return -EINVAL; 889 return -EINVAL;
975 if (*end == '\n') 890 if (*end == '\n')
976 end++; 891 end++;
892 task = get_proc_task(file->f_dentry->d_inode);
893 if (!task)
894 return -ESRCH;
977 task->oomkilladj = oom_adjust; 895 task->oomkilladj = oom_adjust;
896 put_task_struct(task);
978 if (end - buffer == 0) 897 if (end - buffer == 0)
979 return -EIO; 898 return -EIO;
980 return end - buffer; 899 return end - buffer;
@@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = {
985 .write = oom_adjust_write, 904 .write = oom_adjust_write,
986}; 905};
987 906
988static struct inode_operations proc_mem_inode_operations = {
989 .permission = proc_permission,
990};
991
992#ifdef CONFIG_AUDITSYSCALL 907#ifdef CONFIG_AUDITSYSCALL
993#define TMPBUFLEN 21 908#define TMPBUFLEN 21
994static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 909static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
995 size_t count, loff_t *ppos) 910 size_t count, loff_t *ppos)
996{ 911{
997 struct inode * inode = file->f_dentry->d_inode; 912 struct inode * inode = file->f_dentry->d_inode;
998 struct task_struct *task = proc_task(inode); 913 struct task_struct *task = get_proc_task(inode);
999 ssize_t length; 914 ssize_t length;
1000 char tmpbuf[TMPBUFLEN]; 915 char tmpbuf[TMPBUFLEN];
1001 916
917 if (!task)
918 return -ESRCH;
1002 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 919 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1003 audit_get_loginuid(task->audit_context)); 920 audit_get_loginuid(task->audit_context));
921 put_task_struct(task);
1004 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 922 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1005} 923}
1006 924
@@ -1010,13 +928,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1010 struct inode * inode = file->f_dentry->d_inode; 928 struct inode * inode = file->f_dentry->d_inode;
1011 char *page, *tmp; 929 char *page, *tmp;
1012 ssize_t length; 930 ssize_t length;
1013 struct task_struct *task = proc_task(inode);
1014 uid_t loginuid; 931 uid_t loginuid;
1015 932
1016 if (!capable(CAP_AUDIT_CONTROL)) 933 if (!capable(CAP_AUDIT_CONTROL))
1017 return -EPERM; 934 return -EPERM;
1018 935
1019 if (current != task) 936 if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
1020 return -EPERM; 937 return -EPERM;
1021 938
1022 if (count >= PAGE_SIZE) 939 if (count >= PAGE_SIZE)
@@ -1040,7 +957,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1040 goto out_free_page; 957 goto out_free_page;
1041 958
1042 } 959 }
1043 length = audit_set_loginuid(task, loginuid); 960 length = audit_set_loginuid(current, loginuid);
1044 if (likely(length == 0)) 961 if (likely(length == 0))
1045 length = count; 962 length = count;
1046 963
@@ -1059,13 +976,16 @@ static struct file_operations proc_loginuid_operations = {
1059static ssize_t seccomp_read(struct file *file, char __user *buf, 976static ssize_t seccomp_read(struct file *file, char __user *buf,
1060 size_t count, loff_t *ppos) 977 size_t count, loff_t *ppos)
1061{ 978{
1062 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 979 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1063 char __buf[20]; 980 char __buf[20];
1064 loff_t __ppos = *ppos; 981 loff_t __ppos = *ppos;
1065 size_t len; 982 size_t len;
1066 983
984 if (!tsk)
985 return -ESRCH;
1067 /* no need to print the trailing zero, so use only len */ 986 /* no need to print the trailing zero, so use only len */
1068 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 987 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
988 put_task_struct(tsk);
1069 if (__ppos >= len) 989 if (__ppos >= len)
1070 return 0; 990 return 0;
1071 if (count > len - __ppos) 991 if (count > len - __ppos)
@@ -1079,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
1079static ssize_t seccomp_write(struct file *file, const char __user *buf, 999static ssize_t seccomp_write(struct file *file, const char __user *buf,
1080 size_t count, loff_t *ppos) 1000 size_t count, loff_t *ppos)
1081{ 1001{
1082 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1002 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
1083 char __buf[20], *end; 1003 char __buf[20], *end;
1084 unsigned int seccomp_mode; 1004 unsigned int seccomp_mode;
1005 ssize_t result;
1006
1007 result = -ESRCH;
1008 if (!tsk)
1009 goto out_no_task;
1085 1010
1086 /* can set it only once to be even more secure */ 1011 /* can set it only once to be even more secure */
1012 result = -EPERM;
1087 if (unlikely(tsk->seccomp.mode)) 1013 if (unlikely(tsk->seccomp.mode))
1088 return -EPERM; 1014 goto out;
1089 1015
1016 result = -EFAULT;
1090 memset(__buf, 0, sizeof(__buf)); 1017 memset(__buf, 0, sizeof(__buf));
1091 count = min(count, sizeof(__buf) - 1); 1018 count = min(count, sizeof(__buf) - 1);
1092 if (copy_from_user(__buf, buf, count)) 1019 if (copy_from_user(__buf, buf, count))
1093 return -EFAULT; 1020 goto out;
1021
1094 seccomp_mode = simple_strtoul(__buf, &end, 0); 1022 seccomp_mode = simple_strtoul(__buf, &end, 0);
1095 if (*end == '\n') 1023 if (*end == '\n')
1096 end++; 1024 end++;
1025 result = -EINVAL;
1097 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1026 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
1098 tsk->seccomp.mode = seccomp_mode; 1027 tsk->seccomp.mode = seccomp_mode;
1099 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1028 set_tsk_thread_flag(tsk, TIF_SECCOMP);
1100 } else 1029 } else
1101 return -EINVAL; 1030 goto out;
1031 result = -EIO;
1102 if (unlikely(!(end - __buf))) 1032 if (unlikely(!(end - __buf)))
1103 return -EIO; 1033 goto out;
1104 return end - __buf; 1034 result = end - __buf;
1035out:
1036 put_task_struct(tsk);
1037out_no_task:
1038 return result;
1105} 1039}
1106 1040
1107static struct file_operations proc_seccomp_operations = { 1041static struct file_operations proc_seccomp_operations = {
@@ -1118,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1118 /* We don't need a base pointer in the /proc filesystem */ 1052 /* We don't need a base pointer in the /proc filesystem */
1119 path_release(nd); 1053 path_release(nd);
1120 1054
1121 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1055 /* Are we allowed to snoop on the tasks file descriptors? */
1122 goto out; 1056 if (!proc_fd_access_allowed(inode))
1123 error = proc_check_root(inode);
1124 if (error)
1125 goto out; 1057 goto out;
1126 1058
1127 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1059 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
@@ -1163,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1163 struct dentry *de; 1095 struct dentry *de;
1164 struct vfsmount *mnt = NULL; 1096 struct vfsmount *mnt = NULL;
1165 1097
1166 lock_kernel(); 1098 /* Are we allowed to snoop on the tasks file descriptors? */
1167 1099 if (!proc_fd_access_allowed(inode))
1168 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1169 goto out;
1170 error = proc_check_root(inode);
1171 if (error)
1172 goto out; 1100 goto out;
1173 1101
1174 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1102 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
@@ -1179,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1179 dput(de); 1107 dput(de);
1180 mntput(mnt); 1108 mntput(mnt);
1181out: 1109out:
1182 unlock_kernel();
1183 return error; 1110 return error;
1184} 1111}
1185 1112
@@ -1188,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = {
1188 .follow_link = proc_pid_follow_link 1115 .follow_link = proc_pid_follow_link
1189}; 1116};
1190 1117
1191#define NUMBUF 10
1192
1193static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1118static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1194{ 1119{
1195 struct inode *inode = filp->f_dentry->d_inode; 1120 struct dentry *dentry = filp->f_dentry;
1196 struct task_struct *p = proc_task(inode); 1121 struct inode *inode = dentry->d_inode;
1122 struct task_struct *p = get_proc_task(inode);
1197 unsigned int fd, tid, ino; 1123 unsigned int fd, tid, ino;
1198 int retval; 1124 int retval;
1199 char buf[NUMBUF]; 1125 char buf[PROC_NUMBUF];
1200 struct files_struct * files; 1126 struct files_struct * files;
1201 struct fdtable *fdt; 1127 struct fdtable *fdt;
1202 1128
1203 retval = -ENOENT; 1129 retval = -ENOENT;
1204 if (!pid_alive(p)) 1130 if (!p)
1205 goto out; 1131 goto out_no_task;
1206 retval = 0; 1132 retval = 0;
1207 tid = p->pid; 1133 tid = p->pid;
1208 1134
@@ -1213,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1213 goto out; 1139 goto out;
1214 filp->f_pos++; 1140 filp->f_pos++;
1215 case 1: 1141 case 1:
1216 ino = fake_ino(tid, PROC_TID_INO); 1142 ino = parent_ino(dentry);
1217 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1143 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1218 goto out; 1144 goto out;
1219 filp->f_pos++; 1145 filp->f_pos++;
@@ -1232,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1232 continue; 1158 continue;
1233 rcu_read_unlock(); 1159 rcu_read_unlock();
1234 1160
1235 j = NUMBUF; 1161 j = PROC_NUMBUF;
1236 i = fd; 1162 i = fd;
1237 do { 1163 do {
1238 j--; 1164 j--;
@@ -1241,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1241 } while (i); 1167 } while (i);
1242 1168
1243 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1169 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1244 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1170 if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1245 rcu_read_lock(); 1171 rcu_read_lock();
1246 break; 1172 break;
1247 } 1173 }
@@ -1251,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1251 put_files_struct(files); 1177 put_files_struct(files);
1252 } 1178 }
1253out: 1179out:
1180 put_task_struct(p);
1181out_no_task:
1254 return retval; 1182 return retval;
1255} 1183}
1256 1184
@@ -1262,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp,
1262 int pid; 1190 int pid;
1263 struct dentry *dentry = filp->f_dentry; 1191 struct dentry *dentry = filp->f_dentry;
1264 struct inode *inode = dentry->d_inode; 1192 struct inode *inode = dentry->d_inode;
1193 struct task_struct *task = get_proc_task(inode);
1265 struct pid_entry *p; 1194 struct pid_entry *p;
1266 ino_t ino; 1195 ino_t ino;
1267 int ret; 1196 int ret;
1268 1197
1269 ret = -ENOENT; 1198 ret = -ENOENT;
1270 if (!pid_alive(proc_task(inode))) 1199 if (!task)
1271 goto out; 1200 goto out;
1272 1201
1273 ret = 0; 1202 ret = 0;
1274 pid = proc_task(inode)->pid; 1203 pid = task->pid;
1204 put_task_struct(task);
1275 i = filp->f_pos; 1205 i = filp->f_pos;
1276 switch (i) { 1206 switch (i) {
1277 case 0: 1207 case 0:
@@ -1354,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1354 1284
1355 /* Common stuff */ 1285 /* Common stuff */
1356 ei = PROC_I(inode); 1286 ei = PROC_I(inode);
1357 ei->task = NULL;
1358 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1287 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1359 inode->i_ino = fake_ino(task->pid, ino); 1288 inode->i_ino = fake_ino(task->pid, ino);
1360 1289
1361 if (!pid_alive(task))
1362 goto out_unlock;
1363
1364 /* 1290 /*
1365 * grab the reference to task. 1291 * grab the reference to task.
1366 */ 1292 */
1367 get_task_struct(task); 1293 ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
1368 ei->task = task; 1294 if (!ei->pid)
1369 ei->type = ino; 1295 goto out_unlock;
1296
1370 inode->i_uid = 0; 1297 inode->i_uid = 0;
1371 inode->i_gid = 0; 1298 inode->i_gid = 0;
1372 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1299 if (task_dumpable(task)) {
1373 inode->i_uid = task->euid; 1300 inode->i_uid = task->euid;
1374 inode->i_gid = task->egid; 1301 inode->i_gid = task->egid;
1375 } 1302 }
@@ -1379,7 +1306,6 @@ out:
1379 return inode; 1306 return inode;
1380 1307
1381out_unlock: 1308out_unlock:
1382 ei->pde = NULL;
1383 iput(inode); 1309 iput(inode);
1384 return NULL; 1310 return NULL;
1385} 1311}
@@ -1393,13 +1319,21 @@ out_unlock:
1393 * 1319 *
1394 * Rewrite the inode's ownerships here because the owning task may have 1320 * Rewrite the inode's ownerships here because the owning task may have
1395 * performed a setuid(), etc. 1321 * performed a setuid(), etc.
1322 *
1323 * Before the /proc/pid/status file was created the only way to read
1324 * the effective uid of a /process was to stat /proc/pid. Reading
1325 * /proc/pid/status is slow enough that procps and other packages
1326 * kept stating /proc/pid. To keep the rules in /proc simple I have
1327 * made this apply to all per process world readable and executable
1328 * directories.
1396 */ 1329 */
1397static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1330static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1398{ 1331{
1399 struct inode *inode = dentry->d_inode; 1332 struct inode *inode = dentry->d_inode;
1400 struct task_struct *task = proc_task(inode); 1333 struct task_struct *task = get_proc_task(inode);
1401 if (pid_alive(task)) { 1334 if (task) {
1402 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1335 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1336 task_dumpable(task)) {
1403 inode->i_uid = task->euid; 1337 inode->i_uid = task->euid;
1404 inode->i_gid = task->egid; 1338 inode->i_gid = task->egid;
1405 } else { 1339 } else {
@@ -1407,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1407 inode->i_gid = 0; 1341 inode->i_gid = 0;
1408 } 1342 }
1409 security_task_to_inode(task, inode); 1343 security_task_to_inode(task, inode);
1344 put_task_struct(task);
1410 return 1; 1345 return 1;
1411 } 1346 }
1412 d_drop(dentry); 1347 d_drop(dentry);
1413 return 0; 1348 return 0;
1414} 1349}
1415 1350
1351static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1352{
1353 struct inode *inode = dentry->d_inode;
1354 struct task_struct *task;
1355 generic_fillattr(inode, stat);
1356
1357 rcu_read_lock();
1358 stat->uid = 0;
1359 stat->gid = 0;
1360 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1361 if (task) {
1362 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1363 task_dumpable(task)) {
1364 stat->uid = task->euid;
1365 stat->gid = task->egid;
1366 }
1367 }
1368 rcu_read_unlock();
1369 return 0;
1370}
1371
1416static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1372static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1417{ 1373{
1418 struct inode *inode = dentry->d_inode; 1374 struct inode *inode = dentry->d_inode;
1419 struct task_struct *task = proc_task(inode); 1375 struct task_struct *task = get_proc_task(inode);
1420 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1376 int fd = proc_fd(inode);
1421 struct files_struct *files; 1377 struct files_struct *files;
1422 1378
1423 files = get_files_struct(task); 1379 if (task) {
1424 if (files) { 1380 files = get_files_struct(task);
1425 rcu_read_lock(); 1381 if (files) {
1426 if (fcheck_files(files, fd)) { 1382 rcu_read_lock();
1383 if (fcheck_files(files, fd)) {
1384 rcu_read_unlock();
1385 put_files_struct(files);
1386 if (task_dumpable(task)) {
1387 inode->i_uid = task->euid;
1388 inode->i_gid = task->egid;
1389 } else {
1390 inode->i_uid = 0;
1391 inode->i_gid = 0;
1392 }
1393 security_task_to_inode(task, inode);
1394 put_task_struct(task);
1395 return 1;
1396 }
1427 rcu_read_unlock(); 1397 rcu_read_unlock();
1428 put_files_struct(files); 1398 put_files_struct(files);
1429 if (task_dumpable(task)) {
1430 inode->i_uid = task->euid;
1431 inode->i_gid = task->egid;
1432 } else {
1433 inode->i_uid = 0;
1434 inode->i_gid = 0;
1435 }
1436 security_task_to_inode(task, inode);
1437 return 1;
1438 } 1399 }
1439 rcu_read_unlock(); 1400 put_task_struct(task);
1440 put_files_struct(files);
1441 } 1401 }
1442 d_drop(dentry); 1402 d_drop(dentry);
1443 return 0; 1403 return 0;
1444} 1404}
1445 1405
1446static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1447{
1448 struct task_struct *task = proc_task(inode);
1449 spin_lock(&task->proc_lock);
1450 if (task->proc_dentry == dentry)
1451 task->proc_dentry = NULL;
1452 spin_unlock(&task->proc_lock);
1453 iput(inode);
1454}
1455
1456static int pid_delete_dentry(struct dentry * dentry) 1406static int pid_delete_dentry(struct dentry * dentry)
1457{ 1407{
1458 /* Is the task we represent dead? 1408 /* Is the task we represent dead?
1459 * If so, then don't put the dentry on the lru list, 1409 * If so, then don't put the dentry on the lru list,
1460 * kill it immediately. 1410 * kill it immediately.
1461 */ 1411 */
1462 return !pid_alive(proc_task(dentry->d_inode)); 1412 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1463} 1413}
1464 1414
1465static struct dentry_operations tid_fd_dentry_operations = 1415static struct dentry_operations tid_fd_dentry_operations =
@@ -1474,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations =
1474 .d_delete = pid_delete_dentry, 1424 .d_delete = pid_delete_dentry,
1475}; 1425};
1476 1426
1477static struct dentry_operations pid_base_dentry_operations =
1478{
1479 .d_revalidate = pid_revalidate,
1480 .d_iput = pid_base_iput,
1481 .d_delete = pid_delete_dentry,
1482};
1483
1484/* Lookups */ 1427/* Lookups */
1485 1428
1486static unsigned name_to_int(struct dentry *dentry) 1429static unsigned name_to_int(struct dentry *dentry)
@@ -1508,22 +1451,24 @@ out:
1508/* SMP-safe */ 1451/* SMP-safe */
1509static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1452static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1510{ 1453{
1511 struct task_struct *task = proc_task(dir); 1454 struct task_struct *task = get_proc_task(dir);
1512 unsigned fd = name_to_int(dentry); 1455 unsigned fd = name_to_int(dentry);
1456 struct dentry *result = ERR_PTR(-ENOENT);
1513 struct file * file; 1457 struct file * file;
1514 struct files_struct * files; 1458 struct files_struct * files;
1515 struct inode *inode; 1459 struct inode *inode;
1516 struct proc_inode *ei; 1460 struct proc_inode *ei;
1517 1461
1462 if (!task)
1463 goto out_no_task;
1518 if (fd == ~0U) 1464 if (fd == ~0U)
1519 goto out; 1465 goto out;
1520 if (!pid_alive(task))
1521 goto out;
1522 1466
1523 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1467 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1524 if (!inode) 1468 if (!inode)
1525 goto out; 1469 goto out;
1526 ei = PROC_I(inode); 1470 ei = PROC_I(inode);
1471 ei->fd = fd;
1527 files = get_files_struct(task); 1472 files = get_files_struct(task);
1528 if (!files) 1473 if (!files)
1529 goto out_unlock; 1474 goto out_unlock;
@@ -1548,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1548 ei->op.proc_get_link = proc_fd_link; 1493 ei->op.proc_get_link = proc_fd_link;
1549 dentry->d_op = &tid_fd_dentry_operations; 1494 dentry->d_op = &tid_fd_dentry_operations;
1550 d_add(dentry, inode); 1495 d_add(dentry, inode);
1551 return NULL; 1496 /* Close the race of the process dying before we return the dentry */
1497 if (tid_fd_revalidate(dentry, NULL))
1498 result = NULL;
1499out:
1500 put_task_struct(task);
1501out_no_task:
1502 return result;
1552 1503
1553out_unlock2: 1504out_unlock2:
1554 spin_unlock(&files->file_lock); 1505 spin_unlock(&files->file_lock);
1555 put_files_struct(files); 1506 put_files_struct(files);
1556out_unlock: 1507out_unlock:
1557 iput(inode); 1508 iput(inode);
1558out: 1509 goto out;
1559 return ERR_PTR(-ENOENT);
1560} 1510}
1561 1511
1562static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1512static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1563static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1513static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1514static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
1564 1515
1565static struct file_operations proc_fd_operations = { 1516static struct file_operations proc_fd_operations = {
1566 .read = generic_read_dir, 1517 .read = generic_read_dir,
@@ -1577,12 +1528,11 @@ static struct file_operations proc_task_operations = {
1577 */ 1528 */
1578static struct inode_operations proc_fd_inode_operations = { 1529static struct inode_operations proc_fd_inode_operations = {
1579 .lookup = proc_lookupfd, 1530 .lookup = proc_lookupfd,
1580 .permission = proc_permission,
1581}; 1531};
1582 1532
1583static struct inode_operations proc_task_inode_operations = { 1533static struct inode_operations proc_task_inode_operations = {
1584 .lookup = proc_task_lookup, 1534 .lookup = proc_task_lookup,
1585 .permission = proc_task_permission, 1535 .getattr = proc_task_getattr,
1586}; 1536};
1587 1537
1588#ifdef CONFIG_SECURITY 1538#ifdef CONFIG_SECURITY
@@ -1592,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1592 struct inode * inode = file->f_dentry->d_inode; 1542 struct inode * inode = file->f_dentry->d_inode;
1593 unsigned long page; 1543 unsigned long page;
1594 ssize_t length; 1544 ssize_t length;
1595 struct task_struct *task = proc_task(inode); 1545 struct task_struct *task = get_proc_task(inode);
1546
1547 length = -ESRCH;
1548 if (!task)
1549 goto out_no_task;
1596 1550
1597 if (count > PAGE_SIZE) 1551 if (count > PAGE_SIZE)
1598 count = PAGE_SIZE; 1552 count = PAGE_SIZE;
1553 length = -ENOMEM;
1599 if (!(page = __get_free_page(GFP_KERNEL))) 1554 if (!(page = __get_free_page(GFP_KERNEL)))
1600 return -ENOMEM; 1555 goto out;
1601 1556
1602 length = security_getprocattr(task, 1557 length = security_getprocattr(task,
1603 (char*)file->f_dentry->d_name.name, 1558 (char*)file->f_dentry->d_name.name,
@@ -1605,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1605 if (length >= 0) 1560 if (length >= 0)
1606 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1561 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1607 free_page(page); 1562 free_page(page);
1563out:
1564 put_task_struct(task);
1565out_no_task:
1608 return length; 1566 return length;
1609} 1567}
1610 1568
@@ -1614,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1614 struct inode * inode = file->f_dentry->d_inode; 1572 struct inode * inode = file->f_dentry->d_inode;
1615 char *page; 1573 char *page;
1616 ssize_t length; 1574 ssize_t length;
1617 struct task_struct *task = proc_task(inode); 1575 struct task_struct *task = get_proc_task(inode);
1618 1576
1577 length = -ESRCH;
1578 if (!task)
1579 goto out_no_task;
1619 if (count > PAGE_SIZE) 1580 if (count > PAGE_SIZE)
1620 count = PAGE_SIZE; 1581 count = PAGE_SIZE;
1621 if (*ppos != 0) { 1582
1622 /* No partial writes. */ 1583 /* No partial writes. */
1623 return -EINVAL; 1584 length = -EINVAL;
1624 } 1585 if (*ppos != 0)
1586 goto out;
1587
1588 length = -ENOMEM;
1625 page = (char*)__get_free_page(GFP_USER); 1589 page = (char*)__get_free_page(GFP_USER);
1626 if (!page) 1590 if (!page)
1627 return -ENOMEM; 1591 goto out;
1592
1628 length = -EFAULT; 1593 length = -EFAULT;
1629 if (copy_from_user(page, buf, count)) 1594 if (copy_from_user(page, buf, count))
1630 goto out; 1595 goto out_free;
1631 1596
1632 length = security_setprocattr(task, 1597 length = security_setprocattr(task,
1633 (char*)file->f_dentry->d_name.name, 1598 (char*)file->f_dentry->d_name.name,
1634 (void*)page, count); 1599 (void*)page, count);
1635out: 1600out_free:
1636 free_page((unsigned long) page); 1601 free_page((unsigned long) page);
1602out:
1603 put_task_struct(task);
1604out_no_task:
1637 return length; 1605 return length;
1638} 1606}
1639 1607
@@ -1648,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations;
1648static struct inode_operations proc_tgid_attr_inode_operations; 1616static struct inode_operations proc_tgid_attr_inode_operations;
1649#endif 1617#endif
1650 1618
1651static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1652
1653/* SMP-safe */ 1619/* SMP-safe */
1654static struct dentry *proc_pident_lookup(struct inode *dir, 1620static struct dentry *proc_pident_lookup(struct inode *dir,
1655 struct dentry *dentry, 1621 struct dentry *dentry,
1656 struct pid_entry *ents) 1622 struct pid_entry *ents)
1657{ 1623{
1658 struct inode *inode; 1624 struct inode *inode;
1659 int error; 1625 struct dentry *error;
1660 struct task_struct *task = proc_task(dir); 1626 struct task_struct *task = get_proc_task(dir);
1661 struct pid_entry *p; 1627 struct pid_entry *p;
1662 struct proc_inode *ei; 1628 struct proc_inode *ei;
1663 1629
1664 error = -ENOENT; 1630 error = ERR_PTR(-ENOENT);
1665 inode = NULL; 1631 inode = NULL;
1666 1632
1667 if (!pid_alive(task)) 1633 if (!task)
1668 goto out; 1634 goto out_no_task;
1669 1635
1670 for (p = ents; p->name; p++) { 1636 for (p = ents; p->name; p++) {
1671 if (p->len != dentry->d_name.len) 1637 if (p->len != dentry->d_name.len)
@@ -1676,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1676 if (!p->name) 1642 if (!p->name)
1677 goto out; 1643 goto out;
1678 1644
1679 error = -EINVAL; 1645 error = ERR_PTR(-EINVAL);
1680 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1646 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1681 if (!inode) 1647 if (!inode)
1682 goto out; 1648 goto out;
@@ -1689,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1689 */ 1655 */
1690 switch(p->type) { 1656 switch(p->type) {
1691 case PROC_TGID_TASK: 1657 case PROC_TGID_TASK:
1692 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1658 inode->i_nlink = 2;
1693 inode->i_op = &proc_task_inode_operations; 1659 inode->i_op = &proc_task_inode_operations;
1694 inode->i_fop = &proc_task_operations; 1660 inode->i_fop = &proc_task_operations;
1695 break; 1661 break;
@@ -1759,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1759#endif 1725#endif
1760 case PROC_TID_MEM: 1726 case PROC_TID_MEM:
1761 case PROC_TGID_MEM: 1727 case PROC_TGID_MEM:
1762 inode->i_op = &proc_mem_inode_operations;
1763 inode->i_fop = &proc_mem_operations; 1728 inode->i_fop = &proc_mem_operations;
1764 break; 1729 break;
1765#ifdef CONFIG_SECCOMP 1730#ifdef CONFIG_SECCOMP
@@ -1801,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1801 case PROC_TGID_ATTR_EXEC: 1766 case PROC_TGID_ATTR_EXEC:
1802 case PROC_TID_ATTR_FSCREATE: 1767 case PROC_TID_ATTR_FSCREATE:
1803 case PROC_TGID_ATTR_FSCREATE: 1768 case PROC_TGID_ATTR_FSCREATE:
1769 case PROC_TID_ATTR_KEYCREATE:
1770 case PROC_TGID_ATTR_KEYCREATE:
1771 case PROC_TID_ATTR_SOCKCREATE:
1772 case PROC_TGID_ATTR_SOCKCREATE:
1804 inode->i_fop = &proc_pid_attr_operations; 1773 inode->i_fop = &proc_pid_attr_operations;
1805 break; 1774 break;
1806#endif 1775#endif
@@ -1842,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1842 default: 1811 default:
1843 printk("procfs: impossible type (%d)",p->type); 1812 printk("procfs: impossible type (%d)",p->type);
1844 iput(inode); 1813 iput(inode);
1845 return ERR_PTR(-EINVAL); 1814 error = ERR_PTR(-EINVAL);
1815 goto out;
1846 } 1816 }
1847 dentry->d_op = &pid_dentry_operations; 1817 dentry->d_op = &pid_dentry_operations;
1848 d_add(dentry, inode); 1818 d_add(dentry, inode);
1849 return NULL; 1819 /* Close the race of the process dying before we return the dentry */
1850 1820 if (pid_revalidate(dentry, NULL))
1821 error = NULL;
1851out: 1822out:
1852 return ERR_PTR(error); 1823 put_task_struct(task);
1824out_no_task:
1825 return error;
1853} 1826}
1854 1827
1855static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1828static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
@@ -1872,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = {
1872 1845
1873static struct inode_operations proc_tgid_base_inode_operations = { 1846static struct inode_operations proc_tgid_base_inode_operations = {
1874 .lookup = proc_tgid_base_lookup, 1847 .lookup = proc_tgid_base_lookup,
1848 .getattr = pid_getattr,
1875}; 1849};
1876 1850
1877static struct inode_operations proc_tid_base_inode_operations = { 1851static struct inode_operations proc_tid_base_inode_operations = {
1878 .lookup = proc_tid_base_lookup, 1852 .lookup = proc_tid_base_lookup,
1853 .getattr = pid_getattr,
1879}; 1854};
1880 1855
1881#ifdef CONFIG_SECURITY 1856#ifdef CONFIG_SECURITY
@@ -1917,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1917 1892
1918static struct inode_operations proc_tgid_attr_inode_operations = { 1893static struct inode_operations proc_tgid_attr_inode_operations = {
1919 .lookup = proc_tgid_attr_lookup, 1894 .lookup = proc_tgid_attr_lookup,
1895 .getattr = pid_getattr,
1920}; 1896};
1921 1897
1922static struct inode_operations proc_tid_attr_inode_operations = { 1898static struct inode_operations proc_tid_attr_inode_operations = {
1923 .lookup = proc_tid_attr_lookup, 1899 .lookup = proc_tid_attr_lookup,
1900 .getattr = pid_getattr,
1924}; 1901};
1925#endif 1902#endif
1926 1903
@@ -1930,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = {
1930static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1907static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1931 int buflen) 1908 int buflen)
1932{ 1909{
1933 char tmp[30]; 1910 char tmp[PROC_NUMBUF];
1934 sprintf(tmp, "%d", current->tgid); 1911 sprintf(tmp, "%d", current->tgid);
1935 return vfs_readlink(dentry,buffer,buflen,tmp); 1912 return vfs_readlink(dentry,buffer,buflen,tmp);
1936} 1913}
1937 1914
1938static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1915static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1939{ 1916{
1940 char tmp[30]; 1917 char tmp[PROC_NUMBUF];
1941 sprintf(tmp, "%d", current->tgid); 1918 sprintf(tmp, "%d", current->tgid);
1942 return ERR_PTR(vfs_follow_link(nd,tmp)); 1919 return ERR_PTR(vfs_follow_link(nd,tmp));
1943} 1920}
@@ -1948,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = {
1948}; 1925};
1949 1926
1950/** 1927/**
1951 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1928 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
1952 * @p: task that should be flushed. 1929 *
1930 * @task: task that should be flushed.
1931 *
1932 * Looks in the dcache for
1933 * /proc/@pid
1934 * /proc/@tgid/task/@pid
1935 * if either directory is present flushes it and all of it'ts children
1936 * from the dcache.
1953 * 1937 *
1954 * Drops the /proc/@pid dcache entry from the hash chains. 1938 * It is safe and reasonable to cache /proc entries for a task until
1939 * that task exits. After that they just clog up the dcache with
1940 * useless entries, possibly causing useful dcache entries to be
1941 * flushed instead. This routine is proved to flush those useless
1942 * dcache entries at process exit time.
1955 * 1943 *
1956 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1944 * NOTE: This routine is just an optimization so it does not guarantee
1957 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1945 * that no dcache entries will exist at process exit time it
1958 * if the pid value is immediately reused. This is enforced by 1946 * just makes it very unlikely that any will persist.
1959 * - caller must acquire spin_lock(p->proc_lock)
1960 * - must be called before detach_pid()
1961 * - proc_pid_lookup acquires proc_lock, and checks that
1962 * the target is not dead by looking at the attach count
1963 * of PIDTYPE_PID.
1964 */ 1947 */
1965 1948void proc_flush_task(struct task_struct *task)
1966struct dentry *proc_pid_unhash(struct task_struct *p)
1967{ 1949{
1968 struct dentry *proc_dentry; 1950 struct dentry *dentry, *leader, *dir;
1951 char buf[PROC_NUMBUF];
1952 struct qstr name;
1953
1954 name.name = buf;
1955 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1956 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1957 if (dentry) {
1958 shrink_dcache_parent(dentry);
1959 d_drop(dentry);
1960 dput(dentry);
1961 }
1969 1962
1970 proc_dentry = p->proc_dentry; 1963 if (thread_group_leader(task))
1971 if (proc_dentry != NULL) { 1964 goto out;
1972 1965
1973 spin_lock(&dcache_lock); 1966 name.name = buf;
1974 spin_lock(&proc_dentry->d_lock); 1967 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
1975 if (!d_unhashed(proc_dentry)) { 1968 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
1976 dget_locked(proc_dentry); 1969 if (!leader)
1977 __d_drop(proc_dentry); 1970 goto out;
1978 spin_unlock(&proc_dentry->d_lock);
1979 } else {
1980 spin_unlock(&proc_dentry->d_lock);
1981 proc_dentry = NULL;
1982 }
1983 spin_unlock(&dcache_lock);
1984 }
1985 return proc_dentry;
1986}
1987 1971
1988/** 1972 name.name = "task";
1989 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1973 name.len = strlen(name.name);
1990 * @proc_dentry: directoy to prune. 1974 dir = d_hash_and_lookup(leader, &name);
1991 * 1975 if (!dir)
1992 * Shrink the /proc directory that was used by the just killed thread. 1976 goto out_put_leader;
1993 */ 1977
1994 1978 name.name = buf;
1995void proc_pid_flush(struct dentry *proc_dentry) 1979 name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
1996{ 1980 dentry = d_hash_and_lookup(dir, &name);
1997 might_sleep(); 1981 if (dentry) {
1998 if(proc_dentry != NULL) { 1982 shrink_dcache_parent(dentry);
1999 shrink_dcache_parent(proc_dentry); 1983 d_drop(dentry);
2000 dput(proc_dentry); 1984 dput(dentry);
2001 } 1985 }
1986
1987 dput(dir);
1988out_put_leader:
1989 dput(leader);
1990out:
1991 return;
2002} 1992}
2003 1993
2004/* SMP-safe */ 1994/* SMP-safe */
2005struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1995struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2006{ 1996{
1997 struct dentry *result = ERR_PTR(-ENOENT);
2007 struct task_struct *task; 1998 struct task_struct *task;
2008 struct inode *inode; 1999 struct inode *inode;
2009 struct proc_inode *ei; 2000 struct proc_inode *ei;
2010 unsigned tgid; 2001 unsigned tgid;
2011 int died;
2012 2002
2013 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 2003 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
2014 inode = new_inode(dir->i_sb); 2004 inode = new_inode(dir->i_sb);
@@ -2029,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2029 if (tgid == ~0U) 2019 if (tgid == ~0U)
2030 goto out; 2020 goto out;
2031 2021
2032 read_lock(&tasklist_lock); 2022 rcu_read_lock();
2033 task = find_task_by_pid(tgid); 2023 task = find_task_by_pid(tgid);
2034 if (task) 2024 if (task)
2035 get_task_struct(task); 2025 get_task_struct(task);
2036 read_unlock(&tasklist_lock); 2026 rcu_read_unlock();
2037 if (!task) 2027 if (!task)
2038 goto out; 2028 goto out;
2039 2029
2040 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 2030 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
2031 if (!inode)
2032 goto out_put_task;
2041 2033
2042
2043 if (!inode) {
2044 put_task_struct(task);
2045 goto out;
2046 }
2047 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2034 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2048 inode->i_op = &proc_tgid_base_inode_operations; 2035 inode->i_op = &proc_tgid_base_inode_operations;
2049 inode->i_fop = &proc_tgid_base_operations; 2036 inode->i_fop = &proc_tgid_base_operations;
@@ -2054,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2054 inode->i_nlink = 4; 2041 inode->i_nlink = 4;
2055#endif 2042#endif
2056 2043
2057 dentry->d_op = &pid_base_dentry_operations; 2044 dentry->d_op = &pid_dentry_operations;
2058 2045
2059 died = 0;
2060 d_add(dentry, inode); 2046 d_add(dentry, inode);
2061 spin_lock(&task->proc_lock); 2047 /* Close the race of the process dying before we return the dentry */
2062 task->proc_dentry = dentry; 2048 if (pid_revalidate(dentry, NULL))
2063 if (!pid_alive(task)) { 2049 result = NULL;
2064 dentry = proc_pid_unhash(task);
2065 died = 1;
2066 }
2067 spin_unlock(&task->proc_lock);
2068 2050
2051out_put_task:
2069 put_task_struct(task); 2052 put_task_struct(task);
2070 if (died) {
2071 proc_pid_flush(dentry);
2072 goto out;
2073 }
2074 return NULL;
2075out: 2053out:
2076 return ERR_PTR(-ENOENT); 2054 return result;
2077} 2055}
2078 2056
2079/* SMP-safe */ 2057/* SMP-safe */
2080static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2058static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
2081{ 2059{
2060 struct dentry *result = ERR_PTR(-ENOENT);
2082 struct task_struct *task; 2061 struct task_struct *task;
2083 struct task_struct *leader = proc_task(dir); 2062 struct task_struct *leader = get_proc_task(dir);
2084 struct inode *inode; 2063 struct inode *inode;
2085 unsigned tid; 2064 unsigned tid;
2086 2065
2066 if (!leader)
2067 goto out_no_task;
2068
2087 tid = name_to_int(dentry); 2069 tid = name_to_int(dentry);
2088 if (tid == ~0U) 2070 if (tid == ~0U)
2089 goto out; 2071 goto out;
2090 2072
2091 read_lock(&tasklist_lock); 2073 rcu_read_lock();
2092 task = find_task_by_pid(tid); 2074 task = find_task_by_pid(tid);
2093 if (task) 2075 if (task)
2094 get_task_struct(task); 2076 get_task_struct(task);
2095 read_unlock(&tasklist_lock); 2077 rcu_read_unlock();
2096 if (!task) 2078 if (!task)
2097 goto out; 2079 goto out;
2098 if (leader->tgid != task->tgid) 2080 if (leader->tgid != task->tgid)
@@ -2113,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2113 inode->i_nlink = 3; 2095 inode->i_nlink = 3;
2114#endif 2096#endif
2115 2097
2116 dentry->d_op = &pid_base_dentry_operations; 2098 dentry->d_op = &pid_dentry_operations;
2117 2099
2118 d_add(dentry, inode); 2100 d_add(dentry, inode);
2101 /* Close the race of the process dying before we return the dentry */
2102 if (pid_revalidate(dentry, NULL))
2103 result = NULL;
2119 2104
2120 put_task_struct(task);
2121 return NULL;
2122out_drop_task: 2105out_drop_task:
2123 put_task_struct(task); 2106 put_task_struct(task);
2124out: 2107out:
2125 return ERR_PTR(-ENOENT); 2108 put_task_struct(leader);
2109out_no_task:
2110 return result;
2126} 2111}
2127 2112
2128#define PROC_NUMBUF 10
2129#define PROC_MAXPIDS 20
2130
2131/* 2113/*
2132 * Get a few tgid's to return for filldir - we need to hold the 2114 * Find the first tgid to return to user space.
2133 * tasklist lock while doing this, and we must release it before 2115 *
2134 * we actually do the filldir itself, so we use a temp buffer.. 2116 * Usually this is just whatever follows &init_task, but if the users
2117 * buffer was too small to hold the full list or there was a seek into
2118 * the middle of the directory we have more work to do.
2119 *
2120 * In the case of a short read we start with find_task_by_pid.
2121 *
2122 * In the case of a seek we start with &init_task and walk nr
2123 * threads past it.
2135 */ 2124 */
2136static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2125static struct task_struct *first_tgid(int tgid, unsigned int nr)
2137{ 2126{
2138 struct task_struct *p; 2127 struct task_struct *pos;
2139 int nr_tgids = 0; 2128 rcu_read_lock();
2140 2129 if (tgid && nr) {
2141 index--; 2130 pos = find_task_by_pid(tgid);
2142 read_lock(&tasklist_lock); 2131 if (pos && thread_group_leader(pos))
2143 p = NULL; 2132 goto found;
2144 if (version) {
2145 p = find_task_by_pid(version);
2146 if (p && !thread_group_leader(p))
2147 p = NULL;
2148 } 2133 }
2134 /* If nr exceeds the number of processes get out quickly */
2135 pos = NULL;
2136 if (nr && nr >= nr_processes())
2137 goto done;
2149 2138
2150 if (p) 2139 /* If we haven't found our starting place yet start with
2151 index = 0; 2140 * the init_task and walk nr tasks forward.
2152 else 2141 */
2153 p = next_task(&init_task); 2142 for (pos = next_task(&init_task); nr > 0; --nr) {
2154 2143 pos = next_task(pos);
2155 for ( ; p != &init_task; p = next_task(p)) { 2144 if (pos == &init_task) {
2156 int tgid = p->pid; 2145 pos = NULL;
2157 if (!pid_alive(p)) 2146 goto done;
2158 continue; 2147 }
2159 if (--index >= 0)
2160 continue;
2161 tgids[nr_tgids] = tgid;
2162 nr_tgids++;
2163 if (nr_tgids >= PROC_MAXPIDS)
2164 break;
2165 } 2148 }
2166 read_unlock(&tasklist_lock); 2149found:
2167 return nr_tgids; 2150 get_task_struct(pos);
2151done:
2152 rcu_read_unlock();
2153 return pos;
2168} 2154}
2169 2155
2170/* 2156/*
2171 * Get a few tid's to return for filldir - we need to hold the 2157 * Find the next task in the task list.
2172 * tasklist lock while doing this, and we must release it before 2158 * Return NULL if we loop or there is any error.
2173 * we actually do the filldir itself, so we use a temp buffer.. 2159 *
2160 * The reference to the input task_struct is released.
2174 */ 2161 */
2175static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2162static struct task_struct *next_tgid(struct task_struct *start)
2176{ 2163{
2177 struct task_struct *leader_task = proc_task(dir); 2164 struct task_struct *pos;
2178 struct task_struct *task = leader_task; 2165 rcu_read_lock();
2179 int nr_tids = 0; 2166 pos = start;
2180 2167 if (pid_alive(start))
2181 index -= 2; 2168 pos = next_task(start);
2182 read_lock(&tasklist_lock); 2169 if (pid_alive(pos) && (pos != &init_task)) {
2183 /* 2170 get_task_struct(pos);
2184 * The starting point task (leader_task) might be an already 2171 goto done;
2185 * unlinked task, which cannot be used to access the task-list 2172 }
2186 * via next_thread(). 2173 pos = NULL;
2187 */ 2174done:
2188 if (pid_alive(task)) do { 2175 rcu_read_unlock();
2189 int tid = task->pid; 2176 put_task_struct(start);
2190 2177 return pos;
2191 if (--index >= 0)
2192 continue;
2193 if (tids != NULL)
2194 tids[nr_tids] = tid;
2195 nr_tids++;
2196 if (nr_tids >= PROC_MAXPIDS)
2197 break;
2198 } while ((task = next_thread(task)) != leader_task);
2199 read_unlock(&tasklist_lock);
2200 return nr_tids;
2201} 2178}
2202 2179
2203/* for the /proc/ directory itself, after non-process stuff has been done */ 2180/* for the /proc/ directory itself, after non-process stuff has been done */
2204int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2181int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2205{ 2182{
2206 unsigned int tgid_array[PROC_MAXPIDS];
2207 char buf[PROC_NUMBUF]; 2183 char buf[PROC_NUMBUF];
2208 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2184 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2209 unsigned int nr_tgids, i; 2185 struct task_struct *task;
2210 int next_tgid; 2186 int tgid;
2211 2187
2212 if (!nr) { 2188 if (!nr) {
2213 ino_t ino = fake_ino(0,PROC_TGID_INO); 2189 ino_t ino = fake_ino(0,PROC_TGID_INO);
@@ -2216,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2216 filp->f_pos++; 2192 filp->f_pos++;
2217 nr++; 2193 nr++;
2218 } 2194 }
2195 nr -= 1;
2219 2196
2220 /* f_version caches the tgid value that the last readdir call couldn't 2197 /* f_version caches the tgid value that the last readdir call couldn't
2221 * return. lseek aka telldir automagically resets f_version to 0. 2198 * return. lseek aka telldir automagically resets f_version to 0.
2222 */ 2199 */
2223 next_tgid = filp->f_version; 2200 tgid = filp->f_version;
2224 filp->f_version = 0; 2201 filp->f_version = 0;
2225 for (;;) { 2202 for (task = first_tgid(tgid, nr);
2226 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2203 task;
2227 if (!nr_tgids) { 2204 task = next_tgid(task), filp->f_pos++) {
2228 /* no more entries ! */ 2205 int len;
2206 ino_t ino;
2207 tgid = task->pid;
2208 len = snprintf(buf, sizeof(buf), "%d", tgid);
2209 ino = fake_ino(tgid, PROC_TGID_INO);
2210 if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
2211 /* returning this tgid failed, save it as the first
2212 * pid for the next readir call */
2213 filp->f_version = tgid;
2214 put_task_struct(task);
2229 break; 2215 break;
2230 } 2216 }
2231 next_tgid = 0; 2217 }
2218 return 0;
2219}
2232 2220
2233 /* do not use the last found pid, reserve it for next_tgid */ 2221/*
2234 if (nr_tgids == PROC_MAXPIDS) { 2222 * Find the first tid of a thread group to return to user space.
2235 nr_tgids--; 2223 *
2236 next_tgid = tgid_array[nr_tgids]; 2224 * Usually this is just the thread group leader, but if the users
2237 } 2225 * buffer was too small or there was a seek into the middle of the
2226 * directory we have more work todo.
2227 *
2228 * In the case of a short read we start with find_task_by_pid.
2229 *
2230 * In the case of a seek we start with the leader and walk nr
2231 * threads past it.
2232 */
2233static struct task_struct *first_tid(struct task_struct *leader,
2234 int tid, int nr)
2235{
2236 struct task_struct *pos;
2238 2237
2239 for (i=0;i<nr_tgids;i++) { 2238 rcu_read_lock();
2240 int tgid = tgid_array[i]; 2239 /* Attempt to start with the pid of a thread */
2241 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2240 if (tid && (nr > 0)) {
2242 unsigned long j = PROC_NUMBUF; 2241 pos = find_task_by_pid(tid);
2242 if (pos && (pos->group_leader == leader))
2243 goto found;
2244 }
2243 2245
2244 do 2246 /* If nr exceeds the number of threads there is nothing todo */
2245 buf[--j] = '0' + (tgid % 10); 2247 pos = NULL;
2246 while ((tgid /= 10) != 0); 2248 if (nr && nr >= get_nr_threads(leader))
2249 goto out;
2247 2250
2248 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2251 /* If we haven't found our starting place yet start
2249 /* returning this tgid failed, save it as the first 2252 * with the leader and walk nr threads forward.
2250 * pid for the next readir call */ 2253 */
2251 filp->f_version = tgid_array[i]; 2254 for (pos = leader; nr > 0; --nr) {
2252 goto out; 2255 pos = next_thread(pos);
2253 } 2256 if (pos == leader) {
2254 filp->f_pos++; 2257 pos = NULL;
2255 nr++; 2258 goto out;
2256 } 2259 }
2257 } 2260 }
2261found:
2262 get_task_struct(pos);
2258out: 2263out:
2259 return 0; 2264 rcu_read_unlock();
2265 return pos;
2266}
2267
2268/*
2269 * Find the next thread in the thread list.
2270 * Return NULL if there is an error or no next thread.
2271 *
2272 * The reference to the input task_struct is released.
2273 */
2274static struct task_struct *next_tid(struct task_struct *start)
2275{
2276 struct task_struct *pos = NULL;
2277 rcu_read_lock();
2278 if (pid_alive(start)) {
2279 pos = next_thread(start);
2280 if (thread_group_leader(pos))
2281 pos = NULL;
2282 else
2283 get_task_struct(pos);
2284 }
2285 rcu_read_unlock();
2286 put_task_struct(start);
2287 return pos;
2260} 2288}
2261 2289
2262/* for the /proc/TGID/task/ directories */ 2290/* for the /proc/TGID/task/ directories */
2263static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2291static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2264{ 2292{
2265 unsigned int tid_array[PROC_MAXPIDS];
2266 char buf[PROC_NUMBUF]; 2293 char buf[PROC_NUMBUF];
2267 unsigned int nr_tids, i;
2268 struct dentry *dentry = filp->f_dentry; 2294 struct dentry *dentry = filp->f_dentry;
2269 struct inode *inode = dentry->d_inode; 2295 struct inode *inode = dentry->d_inode;
2296 struct task_struct *leader = get_proc_task(inode);
2297 struct task_struct *task;
2270 int retval = -ENOENT; 2298 int retval = -ENOENT;
2271 ino_t ino; 2299 ino_t ino;
2300 int tid;
2272 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2301 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2273 2302
2274 if (!pid_alive(proc_task(inode))) 2303 if (!leader)
2275 goto out; 2304 goto out_no_task;
2276 retval = 0; 2305 retval = 0;
2277 2306
2278 switch (pos) { 2307 switch (pos) {
@@ -2290,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2290 /* fall through */ 2319 /* fall through */
2291 } 2320 }
2292 2321
2293 nr_tids = get_tid_list(pos, tid_array, inode); 2322 /* f_version caches the tgid value that the last readdir call couldn't
2294 inode->i_nlink = pos + nr_tids; 2323 * return. lseek aka telldir automagically resets f_version to 0.
2295 2324 */
2296 for (i = 0; i < nr_tids; i++) { 2325 tid = filp->f_version;
2297 unsigned long j = PROC_NUMBUF; 2326 filp->f_version = 0;
2298 int tid = tid_array[i]; 2327 for (task = first_tid(leader, tid, pos - 2);
2299 2328 task;
2300 ino = fake_ino(tid,PROC_TID_INO); 2329 task = next_tid(task), pos++) {
2301 2330 int len;
2302 do 2331 tid = task->pid;
2303 buf[--j] = '0' + (tid % 10); 2332 len = snprintf(buf, sizeof(buf), "%d", tid);
2304 while ((tid /= 10) != 0); 2333 ino = fake_ino(tid, PROC_TID_INO);
2305 2334 if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
2306 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2335 /* returning this tgid failed, save it as the first
2336 * pid for the next readir call */
2337 filp->f_version = tid;
2338 put_task_struct(task);
2307 break; 2339 break;
2308 pos++; 2340 }
2309 } 2341 }
2310out: 2342out:
2311 filp->f_pos = pos; 2343 filp->f_pos = pos;
2344 put_task_struct(leader);
2345out_no_task:
2312 return retval; 2346 return retval;
2313} 2347}
2348
2349static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2350{
2351 struct inode *inode = dentry->d_inode;
2352 struct task_struct *p = get_proc_task(inode);
2353 generic_fillattr(inode, stat);
2354
2355 if (p) {
2356 rcu_read_lock();
2357 stat->nlink += get_nr_threads(p);
2358 rcu_read_unlock();
2359 put_task_struct(p);
2360 }
2361
2362 return 0;
2363}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 722b9c463111..6dcef089e18e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de)
58static void proc_delete_inode(struct inode *inode) 58static void proc_delete_inode(struct inode *inode)
59{ 59{
60 struct proc_dir_entry *de; 60 struct proc_dir_entry *de;
61 struct task_struct *tsk;
62 61
63 truncate_inode_pages(&inode->i_data, 0); 62 truncate_inode_pages(&inode->i_data, 0);
64 63
65 /* Let go of any associated process */ 64 /* Stop tracking associated processes */
66 tsk = PROC_I(inode)->task; 65 put_pid(PROC_I(inode)->pid);
67 if (tsk)
68 put_task_struct(tsk);
69 66
70 /* Let go of any associated proc directory entry */ 67 /* Let go of any associated proc directory entry */
71 de = PROC_I(inode)->pde; 68 de = PROC_I(inode)->pde;
@@ -94,8 +91,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
94 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); 91 ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
95 if (!ei) 92 if (!ei)
96 return NULL; 93 return NULL;
97 ei->task = NULL; 94 ei->pid = NULL;
98 ei->type = 0; 95 ei->fd = 0;
99 ei->op.proc_get_link = NULL; 96 ei->op.proc_get_link = NULL;
100 ei->pde = NULL; 97 ei->pde = NULL;
101 inode = &ei->vfs_inode; 98 inode = &ei->vfs_inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0502f17b860d..146a434ba944 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,16 +37,30 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40extern struct file_operations proc_maps_operations;
41extern struct file_operations proc_numa_maps_operations;
42extern struct file_operations proc_smaps_operations;
43
44extern struct file_operations proc_maps_operations;
45extern struct file_operations proc_numa_maps_operations;
46extern struct file_operations proc_smaps_operations;
47
48
40void free_proc_entry(struct proc_dir_entry *de); 49void free_proc_entry(struct proc_dir_entry *de);
41 50
42int proc_init_inodecache(void); 51int proc_init_inodecache(void);
43 52
44static inline struct task_struct *proc_task(struct inode *inode) 53static inline struct pid *proc_pid(struct inode *inode)
54{
55 return PROC_I(inode)->pid;
56}
57
58static inline struct task_struct *get_proc_task(struct inode *inode)
45{ 59{
46 return PROC_I(inode)->task; 60 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
47} 61}
48 62
49static inline int proc_type(struct inode *inode) 63static inline int proc_fd(struct inode *inode)
50{ 64{
51 return PROC_I(inode)->type; 65 return PROC_I(inode)->fd;
52} 66}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 91b7c15ab373..0137ec4c1368 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
75{ 75{
76 struct vm_area_struct * vma; 76 struct vm_area_struct * vma;
77 int result = -ENOENT; 77 int result = -ENOENT;
78 struct task_struct *task = proc_task(inode); 78 struct task_struct *task = get_proc_task(inode);
79 struct mm_struct * mm = get_task_mm(task); 79 struct mm_struct * mm = NULL;
80 80
81 if (task) {
82 mm = get_task_mm(task);
83 put_task_struct(task);
84 }
81 if (!mm) 85 if (!mm)
82 goto out; 86 goto out;
83 down_read(&mm->mmap_sem); 87 down_read(&mm->mmap_sem);
@@ -120,7 +124,8 @@ struct mem_size_stats
120 124
121static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
122{ 126{
123 struct task_struct *task = m->private; 127 struct proc_maps_private *priv = m->private;
128 struct task_struct *task = priv->task;
124 struct vm_area_struct *vma = v; 129 struct vm_area_struct *vma = v;
125 struct mm_struct *mm = vma->vm_mm; 130 struct mm_struct *mm = vma->vm_mm;
126 struct file *file = vma->vm_file; 131 struct file *file = vma->vm_file;
@@ -295,12 +300,16 @@ static int show_smap(struct seq_file *m, void *v)
295 300
296static void *m_start(struct seq_file *m, loff_t *pos) 301static void *m_start(struct seq_file *m, loff_t *pos)
297{ 302{
298 struct task_struct *task = m->private; 303 struct proc_maps_private *priv = m->private;
299 unsigned long last_addr = m->version; 304 unsigned long last_addr = m->version;
300 struct mm_struct *mm; 305 struct mm_struct *mm;
301 struct vm_area_struct *vma, *tail_vma; 306 struct vm_area_struct *vma, *tail_vma = NULL;
302 loff_t l = *pos; 307 loff_t l = *pos;
303 308
309 /* Clear the per syscall fields in priv */
310 priv->task = NULL;
311 priv->tail_vma = NULL;
312
304 /* 313 /*
305 * We remember last_addr rather than next_addr to hit with 314 * We remember last_addr rather than next_addr to hit with
306 * mmap_cache most of the time. We have zero last_addr at 315 * mmap_cache most of the time. We have zero last_addr at
@@ -311,11 +320,15 @@ static void *m_start(struct seq_file *m, loff_t *pos)
311 if (last_addr == -1UL) 320 if (last_addr == -1UL)
312 return NULL; 321 return NULL;
313 322
314 mm = get_task_mm(task); 323 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
324 if (!priv->task)
325 return NULL;
326
327 mm = get_task_mm(priv->task);
315 if (!mm) 328 if (!mm)
316 return NULL; 329 return NULL;
317 330
318 tail_vma = get_gate_vma(task); 331 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
319 down_read(&mm->mmap_sem); 332 down_read(&mm->mmap_sem);
320 333
321 /* Start with last addr hint */ 334 /* Start with last addr hint */
@@ -350,11 +363,9 @@ out:
350 return tail_vma; 363 return tail_vma;
351} 364}
352 365
353static void m_stop(struct seq_file *m, void *v) 366static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
354{ 367{
355 struct task_struct *task = m->private; 368 if (vma && vma != priv->tail_vma) {
356 struct vm_area_struct *vma = v;
357 if (vma && vma != get_gate_vma(task)) {
358 struct mm_struct *mm = vma->vm_mm; 369 struct mm_struct *mm = vma->vm_mm;
359 up_read(&mm->mmap_sem); 370 up_read(&mm->mmap_sem);
360 mmput(mm); 371 mmput(mm);
@@ -363,38 +374,103 @@ static void m_stop(struct seq_file *m, void *v)
363 374
364static void *m_next(struct seq_file *m, void *v, loff_t *pos) 375static void *m_next(struct seq_file *m, void *v, loff_t *pos)
365{ 376{
366 struct task_struct *task = m->private; 377 struct proc_maps_private *priv = m->private;
367 struct vm_area_struct *vma = v; 378 struct vm_area_struct *vma = v;
368 struct vm_area_struct *tail_vma = get_gate_vma(task); 379 struct vm_area_struct *tail_vma = priv->tail_vma;
369 380
370 (*pos)++; 381 (*pos)++;
371 if (vma && (vma != tail_vma) && vma->vm_next) 382 if (vma && (vma != tail_vma) && vma->vm_next)
372 return vma->vm_next; 383 return vma->vm_next;
373 m_stop(m, v); 384 vma_stop(priv, vma);
374 return (vma != tail_vma)? tail_vma: NULL; 385 return (vma != tail_vma)? tail_vma: NULL;
375} 386}
376 387
377struct seq_operations proc_pid_maps_op = { 388static void m_stop(struct seq_file *m, void *v)
389{
390 struct proc_maps_private *priv = m->private;
391 struct vm_area_struct *vma = v;
392
393 vma_stop(priv, vma);
394 if (priv->task)
395 put_task_struct(priv->task);
396}
397
398static struct seq_operations proc_pid_maps_op = {
378 .start = m_start, 399 .start = m_start,
379 .next = m_next, 400 .next = m_next,
380 .stop = m_stop, 401 .stop = m_stop,
381 .show = show_map 402 .show = show_map
382}; 403};
383 404
384struct seq_operations proc_pid_smaps_op = { 405static struct seq_operations proc_pid_smaps_op = {
385 .start = m_start, 406 .start = m_start,
386 .next = m_next, 407 .next = m_next,
387 .stop = m_stop, 408 .stop = m_stop,
388 .show = show_smap 409 .show = show_smap
389}; 410};
390 411
412static int do_maps_open(struct inode *inode, struct file *file,
413 struct seq_operations *ops)
414{
415 struct proc_maps_private *priv;
416 int ret = -ENOMEM;
417 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
418 if (priv) {
419 priv->pid = proc_pid(inode);
420 ret = seq_open(file, ops);
421 if (!ret) {
422 struct seq_file *m = file->private_data;
423 m->private = priv;
424 } else {
425 kfree(priv);
426 }
427 }
428 return ret;
429}
430
431static int maps_open(struct inode *inode, struct file *file)
432{
433 return do_maps_open(inode, file, &proc_pid_maps_op);
434}
435
436struct file_operations proc_maps_operations = {
437 .open = maps_open,
438 .read = seq_read,
439 .llseek = seq_lseek,
440 .release = seq_release_private,
441};
442
391#ifdef CONFIG_NUMA 443#ifdef CONFIG_NUMA
392extern int show_numa_map(struct seq_file *m, void *v); 444extern int show_numa_map(struct seq_file *m, void *v);
393 445
394struct seq_operations proc_pid_numa_maps_op = { 446static struct seq_operations proc_pid_numa_maps_op = {
395 .start = m_start, 447 .start = m_start,
396 .next = m_next, 448 .next = m_next,
397 .stop = m_stop, 449 .stop = m_stop,
398 .show = show_numa_map 450 .show = show_numa_map
399}; 451};
452
453static int numa_maps_open(struct inode *inode, struct file *file)
454{
455 return do_maps_open(inode, file, &proc_pid_numa_maps_op);
456}
457
458struct file_operations proc_numa_maps_operations = {
459 .open = numa_maps_open,
460 .read = seq_read,
461 .llseek = seq_lseek,
462 .release = seq_release_private,
463};
400#endif 464#endif
465
466static int smaps_open(struct inode *inode, struct file *file)
467{
468 return do_maps_open(inode, file, &proc_pid_smaps_op);
469}
470
471struct file_operations proc_smaps_operations = {
472 .open = smaps_open,
473 .read = seq_read,
474 .llseek = seq_lseek,
475 .release = seq_release_private,
476};
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f68827ed10e..af69f28277b6 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -156,9 +156,28 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
156{ 156{
157 return NULL; 157 return NULL;
158} 158}
159struct seq_operations proc_pid_maps_op = { 159static struct seq_operations proc_pid_maps_op = {
160 .start = m_start, 160 .start = m_start,
161 .next = m_next, 161 .next = m_next,
162 .stop = m_stop, 162 .stop = m_stop,
163 .show = show_map 163 .show = show_map
164}; 164};
165
166static int maps_open(struct inode *inode, struct file *file)
167{
168 int ret;
169 ret = seq_open(file, &proc_pid_maps_op);
170 if (!ret) {
171 struct seq_file *m = file->private_data;
172 m->private = NULL;
173 }
174 return ret;
175}
176
177struct file_operations proc_maps_operations = {
178 .open = maps_open,
179 .read = seq_read,
180 .llseek = seq_lseek,
181 .release = seq_release,
182};
183
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index cf6e1cf40351..752cea12e30f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1560,12 +1560,6 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1560 return res; 1560 return res;
1561} 1561}
1562 1562
1563static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1564 size_t count, loff_t pos)
1565{
1566 return generic_file_aio_write(iocb, buf, count, pos);
1567}
1568
1569const struct file_operations reiserfs_file_operations = { 1563const struct file_operations reiserfs_file_operations = {
1570 .read = generic_file_read, 1564 .read = generic_file_read,
1571 .write = reiserfs_file_write, 1565 .write = reiserfs_file_write,
@@ -1575,7 +1569,7 @@ const struct file_operations reiserfs_file_operations = {
1575 .fsync = reiserfs_sync_file, 1569 .fsync = reiserfs_sync_file,
1576 .sendfile = generic_file_sendfile, 1570 .sendfile = generic_file_sendfile,
1577 .aio_read = generic_file_aio_read, 1571 .aio_read = generic_file_aio_read,
1578 .aio_write = reiserfs_aio_write, 1572 .aio_write = generic_file_aio_write,
1579 .splice_read = generic_file_splice_read, 1573 .splice_read = generic_file_splice_read,
1580 .splice_write = generic_file_splice_write, 1574 .splice_write = generic_file_splice_write,
1581}; 1575};
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1b73529b8099..49d1a53dbef0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -834,8 +834,7 @@ static int write_ordered_buffers(spinlock_t * lock,
834 get_bh(bh); 834 get_bh(bh);
835 if (test_set_buffer_locked(bh)) { 835 if (test_set_buffer_locked(bh)) {
836 if (!buffer_dirty(bh)) { 836 if (!buffer_dirty(bh)) {
837 list_del_init(&jh->list); 837 list_move(&jh->list, &tmp);
838 list_add(&jh->list, &tmp);
839 goto loop_next; 838 goto loop_next;
840 } 839 }
841 spin_unlock(lock); 840 spin_unlock(lock);
@@ -855,8 +854,7 @@ static int write_ordered_buffers(spinlock_t * lock,
855 ret = -EIO; 854 ret = -EIO;
856 } 855 }
857 if (buffer_dirty(bh)) { 856 if (buffer_dirty(bh)) {
858 list_del_init(&jh->list); 857 list_move(&jh->list, &tmp);
859 list_add(&jh->list, &tmp);
860 add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 858 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
861 } else { 859 } else {
862 reiserfs_free_jh(bh); 860 reiserfs_free_jh(bh);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c71dd2760d32..c8e96195b96e 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -400,8 +400,7 @@ static int smb_request_send_req(struct smb_request *req)
400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED)) 400 if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
401 goto out; 401 goto out;
402 402
403 list_del_init(&req->rq_queue); 403 list_move_tail(&req->rq_queue, &server->recvq);
404 list_add_tail(&req->rq_queue, &server->recvq);
405 result = 1; 404 result = 1;
406out: 405out:
407 return result; 406 return result;
@@ -435,8 +434,7 @@ int smb_request_send_server(struct smb_sb_info *server)
435 result = smb_request_send_req(req); 434 result = smb_request_send_req(req);
436 if (result < 0) { 435 if (result < 0) {
437 server->conn_error = result; 436 server->conn_error = result;
438 list_del_init(&req->rq_queue); 437 list_move(&req->rq_queue, &server->xmitq);
439 list_add(&req->rq_queue, &server->xmitq);
440 result = -EIO; 438 result = -EIO;
441 goto out; 439 goto out;
442 } 440 }
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 3f71384020cb..24577e2c489b 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -193,8 +193,7 @@ int smbiod_retry(struct smb_sb_info *server)
193 if (req->rq_flags & SMB_REQ_RETRY) { 193 if (req->rq_flags & SMB_REQ_RETRY) {
194 /* must move the request to the xmitq */ 194 /* must move the request to the xmitq */
195 VERBOSE("retrying request %p on recvq\n", req); 195 VERBOSE("retrying request %p on recvq\n", req);
196 list_del(&req->rq_queue); 196 list_move(&req->rq_queue, &server->xmitq);
197 list_add(&req->rq_queue, &server->xmitq);
198 continue; 197 continue;
199 } 198 }
200#endif 199#endif
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 610b5bdbe75b..61c42430cba3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -430,10 +430,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
430 i++; 430 i++;
431 /* fallthrough */ 431 /* fallthrough */
432 default: 432 default:
433 if (filp->f_pos == 2) { 433 if (filp->f_pos == 2)
434 list_del(q); 434 list_move(q, &parent_sd->s_children);
435 list_add(q, &parent_sd->s_children); 435
436 }
437 for (p=q->next; p!= &parent_sd->s_children; p=p->next) { 436 for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
438 struct sysfs_dirent *next; 437 struct sysfs_dirent *next;
439 const char * name; 438 const char * name;
@@ -455,8 +454,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
455 dt_type(next)) < 0) 454 dt_type(next)) < 0)
456 return 0; 455 return 0;
457 456
458 list_del(q); 457 list_move(q, p);
459 list_add(q, p);
460 p = q; 458 p = q;
461 filp->f_pos++; 459 filp->f_pos++;
462 } 460 }