aboutsummaryrefslogtreecommitdiffstats
path: root/fs/sysfs
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2007-06-13 14:45:16 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2007-07-11 19:09:05 -0400
commit0ab66088c855eca68513bdd7442a426c4b374ced (patch)
tree7f931f7f984ac14701a33d123fa9e03d9048bf8f /fs/sysfs
parenteb36165353d0e5ac32b063f555acedcbaf6d3b75 (diff)
sysfs: implement sysfs_dirent active reference and immediate disconnect
sysfs: implement sysfs_dirent active reference and immediate disconnect Opening a sysfs node references its associated kobject, so userland can arbitrarily prolong lifetime of a kobject which complicates lifetime rules in drivers. This patch implements active reference and makes the association between kobject and sysfs immediately breakable. Now each sysfs_dirent has two reference counts - s_count and s_active. s_count is a regular reference count which guarantees that the containing sysfs_dirent is accessible. As long as s_count reference is held, all sysfs internal fields in sysfs_dirent are accessible including s_parent and s_name. The newly added s_active is active reference count. This is acquired by invoking sysfs_get_active() and it's the caller's responsibility to ensure sysfs_dirent itself is accessible (should be holding s_count one way or the other). Dereferencing sysfs_dirent to access objects out of sysfs proper requires active reference. This includes access to the associated kobjects, attributes and ops. The active references can be drained and denied by calling sysfs_deactivate(). All active sysfs_dirents must be deactivated after deletion but before the default reference is dropped. This enables immediate disconnect of sysfs nodes. Once a sysfs_dirent is deleted, it won't access any entity external to sysfs proper. Because attr/bin_attr ops access both the node itself and its parent for kobject, they need to hold active references to both. sysfs_get/put_active_two() helpers are provided to help grabbing both references. Parent's is acquired first and released last. Unlike other operations, mmapped area lingers on after mmap() is finished and the module implement implementing it and kobj need to stay referenced till all the mapped pages are gone. This is accomplished by holding one set of active references to the bin_attr and its parent if there have been any mmap during lifetime of an openfile. The references are dropped when the openfile is released. This change makes sysfs lifetime rules independent from both kobject's and module's. It not only fixes several race conditions caused by sysfs not holding onto the proper module when referencing kobject, but also helps fixing and simplifying lifetime management in driver model and drivers by taking sysfs out of the equation. Please read the following message for more info. http://article.gmane.org/gmane.linux.kernel/510293 Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/sysfs')
-rw-r--r--fs/sysfs/bin.c95
-rw-r--r--fs/sysfs/dir.c28
-rw-r--r--fs/sysfs/file.c130
-rw-r--r--fs/sysfs/inode.c8
-rw-r--r--fs/sysfs/sysfs.h123
5 files changed, 271 insertions, 113 deletions
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 5dc47fe5de5e..618b8aea6a7b 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -23,6 +23,7 @@
23struct bin_buffer { 23struct bin_buffer {
24 struct mutex mutex; 24 struct mutex mutex;
25 void *buffer; 25 void *buffer;
26 int mmapped;
26}; 27};
27 28
28static int 29static int
@@ -30,12 +31,20 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
30{ 31{
31 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 32 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
32 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 33 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
33 struct kobject * kobj = to_kobj(dentry->d_parent); 34 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
35 int rc;
36
37 /* need attr_sd for attr, its parent for kobj */
38 if (!sysfs_get_active_two(attr_sd))
39 return -ENODEV;
34 40
35 if (!attr->read) 41 rc = -EIO;
36 return -EIO; 42 if (attr->read)
43 rc = attr->read(kobj, buffer, off, count);
37 44
38 return attr->read(kobj, buffer, off, count); 45 sysfs_put_active_two(attr_sd);
46
47 return rc;
39} 48}
40 49
41static ssize_t 50static ssize_t
@@ -79,12 +88,20 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
79{ 88{
80 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 89 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
81 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 90 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
82 struct kobject *kobj = to_kobj(dentry->d_parent); 91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 int rc;
93
94 /* need attr_sd for attr, its parent for kobj */
95 if (!sysfs_get_active_two(attr_sd))
96 return -ENODEV;
83 97
84 if (!attr->write) 98 rc = -EIO;
85 return -EIO; 99 if (attr->write)
100 rc = attr->write(kobj, buffer, offset, count);
86 101
87 return attr->write(kobj, buffer, offset, count); 102 sysfs_put_active_two(attr_sd);
103
104 return rc;
88} 105}
89 106
90static ssize_t write(struct file *file, const char __user *userbuf, 107static ssize_t write(struct file *file, const char __user *userbuf,
@@ -124,14 +141,24 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
124 struct bin_buffer *bb = file->private_data; 141 struct bin_buffer *bb = file->private_data;
125 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 142 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
126 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 143 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
127 struct kobject *kobj = to_kobj(file->f_path.dentry->d_parent); 144 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
128 int rc; 145 int rc;
129 146
130 if (!attr->mmap)
131 return -EINVAL;
132
133 mutex_lock(&bb->mutex); 147 mutex_lock(&bb->mutex);
134 rc = attr->mmap(kobj, attr, vma); 148
149 /* need attr_sd for attr, its parent for kobj */
150 if (!sysfs_get_active_two(attr_sd))
151 return -ENODEV;
152
153 rc = -EINVAL;
154 if (attr->mmap)
155 rc = attr->mmap(kobj, attr, vma);
156
157 if (rc == 0 && !bb->mmapped)
158 bb->mmapped = 1;
159 else
160 sysfs_put_active_two(attr_sd);
161
135 mutex_unlock(&bb->mutex); 162 mutex_unlock(&bb->mutex);
136 163
137 return rc; 164 return rc;
@@ -139,58 +166,60 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
139 166
140static int open(struct inode * inode, struct file * file) 167static int open(struct inode * inode, struct file * file)
141{ 168{
142 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
143 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 169 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
144 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 170 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
145 struct bin_buffer *bb = NULL; 171 struct bin_buffer *bb = NULL;
146 int error = -EINVAL; 172 int error;
147 173
148 if (!kobj || !attr) 174 /* need attr_sd for attr */
149 goto Done; 175 if (!sysfs_get_active(attr_sd))
176 return -ENODEV;
150 177
151 /* Grab the module reference for this attribute if we have one */ 178 /* Grab the module reference for this attribute */
152 error = -ENODEV; 179 error = -ENODEV;
153 if (!try_module_get(attr->attr.owner)) 180 if (!try_module_get(attr->attr.owner))
154 goto Done; 181 goto err_sput;
155 182
156 error = -EACCES; 183 error = -EACCES;
157 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) 184 if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
158 goto Error; 185 goto err_mput;
159 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) 186 if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
160 goto Error; 187 goto err_mput;
161 188
162 error = -ENOMEM; 189 error = -ENOMEM;
163 bb = kzalloc(sizeof(*bb), GFP_KERNEL); 190 bb = kzalloc(sizeof(*bb), GFP_KERNEL);
164 if (!bb) 191 if (!bb)
165 goto Error; 192 goto err_mput;
166 193
167 bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); 194 bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
168 if (!bb->buffer) 195 if (!bb->buffer)
169 goto Error; 196 goto err_mput;
170 197
171 mutex_init(&bb->mutex); 198 mutex_init(&bb->mutex);
172 file->private_data = bb; 199 file->private_data = bb;
173 200
174 error = 0; 201 /* open succeeded, put active reference and pin attr_sd */
175 goto Done; 202 sysfs_put_active(attr_sd);
203 sysfs_get(attr_sd);
204 return 0;
176 205
177 Error: 206 err_mput:
178 kfree(bb);
179 module_put(attr->attr.owner); 207 module_put(attr->attr.owner);
180 Done: 208 err_sput:
181 if (error) 209 sysfs_put_active(attr_sd);
182 kobject_put(kobj); 210 kfree(bb);
183 return error; 211 return error;
184} 212}
185 213
186static int release(struct inode * inode, struct file * file) 214static int release(struct inode * inode, struct file * file)
187{ 215{
188 struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
189 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 216 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
190 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 217 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
191 struct bin_buffer *bb = file->private_data; 218 struct bin_buffer *bb = file->private_data;
192 219
193 kobject_put(kobj); 220 if (bb->mmapped)
221 sysfs_put_active_two(attr_sd);
222 sysfs_put(attr_sd);
194 module_put(attr->attr.owner); 223 module_put(attr->attr.owner);
195 kfree(bb->buffer); 224 kfree(bb->buffer);
196 kfree(bb); 225 kfree(bb);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2a94dc36d166..e0d377aaf2cc 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -53,6 +53,19 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
53 repeat: 53 repeat:
54 parent_sd = sd->s_parent; 54 parent_sd = sd->s_parent;
55 55
56 /* If @sd is being released after deletion, s_active is write
57 * locked. If @sd is cursor for directory walk or being
58 * released prematurely, s_active has no reader or writer.
59 *
60 * sysfs_deactivate() lies to lockdep that s_active is
61 * unlocked immediately. Lie one more time to cover the
62 * previous lie.
63 */
64 if (!down_write_trylock(&sd->s_active))
65 rwsem_acquire(&sd->s_active.dep_map,
66 SYSFS_S_ACTIVE_DEACTIVATE, 0, _RET_IP_);
67 up_write(&sd->s_active);
68
56 if (sd->s_type & SYSFS_KOBJ_LINK) 69 if (sd->s_type & SYSFS_KOBJ_LINK)
57 sysfs_put(sd->s_elem.symlink.target_sd); 70 sysfs_put(sd->s_elem.symlink.target_sd);
58 if (sd->s_type & SYSFS_COPY_NAME) 71 if (sd->s_type & SYSFS_COPY_NAME)
@@ -113,6 +126,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
113 126
114 atomic_set(&sd->s_count, 1); 127 atomic_set(&sd->s_count, 1);
115 atomic_set(&sd->s_event, 1); 128 atomic_set(&sd->s_event, 1);
129 init_rwsem(&sd->s_active);
116 INIT_LIST_HEAD(&sd->s_children); 130 INIT_LIST_HEAD(&sd->s_children);
117 INIT_LIST_HEAD(&sd->s_sibling); 131 INIT_LIST_HEAD(&sd->s_sibling);
118 132
@@ -371,7 +385,6 @@ static void remove_dir(struct dentry * d)
371 d_delete(d); 385 d_delete(d);
372 sd = d->d_fsdata; 386 sd = d->d_fsdata;
373 list_del_init(&sd->s_sibling); 387 list_del_init(&sd->s_sibling);
374 sysfs_put(sd);
375 if (d->d_inode) 388 if (d->d_inode)
376 simple_rmdir(parent->d_inode,d); 389 simple_rmdir(parent->d_inode,d);
377 390
@@ -380,6 +393,9 @@ static void remove_dir(struct dentry * d)
380 393
381 mutex_unlock(&parent->d_inode->i_mutex); 394 mutex_unlock(&parent->d_inode->i_mutex);
382 dput(parent); 395 dput(parent);
396
397 sysfs_deactivate(sd);
398 sysfs_put(sd);
383} 399}
384 400
385void sysfs_remove_subdir(struct dentry * d) 401void sysfs_remove_subdir(struct dentry * d)
@@ -390,6 +406,7 @@ void sysfs_remove_subdir(struct dentry * d)
390 406
391static void __sysfs_remove_dir(struct dentry *dentry) 407static void __sysfs_remove_dir(struct dentry *dentry)
392{ 408{
409 LIST_HEAD(removed);
393 struct sysfs_dirent * parent_sd; 410 struct sysfs_dirent * parent_sd;
394 struct sysfs_dirent * sd, * tmp; 411 struct sysfs_dirent * sd, * tmp;
395 412
@@ -403,12 +420,17 @@ static void __sysfs_remove_dir(struct dentry *dentry)
403 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { 420 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
404 if (!sd->s_type || !(sd->s_type & SYSFS_NOT_PINNED)) 421 if (!sd->s_type || !(sd->s_type & SYSFS_NOT_PINNED))
405 continue; 422 continue;
406 list_del_init(&sd->s_sibling); 423 list_move(&sd->s_sibling, &removed);
407 sysfs_drop_dentry(sd, dentry); 424 sysfs_drop_dentry(sd, dentry);
408 sysfs_put(sd);
409 } 425 }
410 mutex_unlock(&dentry->d_inode->i_mutex); 426 mutex_unlock(&dentry->d_inode->i_mutex);
411 427
428 list_for_each_entry_safe(sd, tmp, &removed, s_sibling) {
429 list_del_init(&sd->s_sibling);
430 sysfs_deactivate(sd);
431 sysfs_put(sd);
432 }
433
412 remove_dir(dentry); 434 remove_dir(dentry);
413 /** 435 /**
414 * Drop reference from dget() on entrance. 436 * Drop reference from dget() on entrance.
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 04f6b0ebc889..310430baf572 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -87,8 +87,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
87 */ 87 */
88static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 88static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
89{ 89{
90 struct sysfs_dirent * sd = dentry->d_fsdata; 90 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
91 struct kobject * kobj = to_kobj(dentry->d_parent); 91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
92 struct sysfs_ops * ops = buffer->ops; 92 struct sysfs_ops * ops = buffer->ops;
93 int ret = 0; 93 int ret = 0;
94 ssize_t count; 94 ssize_t count;
@@ -98,8 +98,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
98 if (!buffer->page) 98 if (!buffer->page)
99 return -ENOMEM; 99 return -ENOMEM;
100 100
101 buffer->event = atomic_read(&sd->s_event); 101 /* need attr_sd for attr and ops, its parent for kobj */
102 count = ops->show(kobj, sd->s_elem.attr.attr, buffer->page); 102 if (!sysfs_get_active_two(attr_sd))
103 return -ENODEV;
104
105 buffer->event = atomic_read(&attr_sd->s_event);
106 count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
107
108 sysfs_put_active_two(attr_sd);
109
103 BUG_ON(count > (ssize_t)PAGE_SIZE); 110 BUG_ON(count > (ssize_t)PAGE_SIZE);
104 if (count >= 0) { 111 if (count >= 0) {
105 buffer->needs_read_fill = 0; 112 buffer->needs_read_fill = 0;
@@ -195,14 +202,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
195 * passing the buffer that we acquired in fill_write_buffer(). 202 * passing the buffer that we acquired in fill_write_buffer().
196 */ 203 */
197 204
198static int 205static int
199flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) 206flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
200{ 207{
201 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 208 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
202 struct kobject * kobj = to_kobj(dentry->d_parent); 209 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
203 struct sysfs_ops * ops = buffer->ops; 210 struct sysfs_ops * ops = buffer->ops;
211 int rc;
212
213 /* need attr_sd for attr and ops, its parent for kobj */
214 if (!sysfs_get_active_two(attr_sd))
215 return -ENODEV;
216
217 rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
218
219 sysfs_put_active_two(attr_sd);
204 220
205 return ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count); 221 return rc;
206} 222}
207 223
208 224
@@ -246,22 +262,22 @@ out:
246 262
247static int sysfs_open_file(struct inode *inode, struct file *file) 263static int sysfs_open_file(struct inode *inode, struct file *file)
248{ 264{
249 struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
250 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 265 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
251 struct attribute *attr = attr_sd->s_elem.attr.attr; 266 struct attribute *attr = attr_sd->s_elem.attr.attr;
267 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
252 struct sysfs_buffer_collection *set; 268 struct sysfs_buffer_collection *set;
253 struct sysfs_buffer * buffer; 269 struct sysfs_buffer * buffer;
254 struct sysfs_ops * ops = NULL; 270 struct sysfs_ops * ops = NULL;
255 int error = 0; 271 int error;
256 272
257 if (!kobj || !attr) 273 /* need attr_sd for attr and ops, its parent for kobj */
258 goto Einval; 274 if (!sysfs_get_active_two(attr_sd))
275 return -ENODEV;
259 276
260 /* Grab the module reference for this attribute if we have one */ 277 /* Grab the module reference for this attribute */
261 if (!try_module_get(attr->owner)) { 278 error = -ENODEV;
262 error = -ENODEV; 279 if (!try_module_get(attr->owner))
263 goto Done; 280 goto err_sput;
264 }
265 281
266 /* if the kobject has no ktype, then we assume that it is a subsystem 282 /* if the kobject has no ktype, then we assume that it is a subsystem
267 * itself, and use ops for it. 283 * itself, and use ops for it.
@@ -276,30 +292,30 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
276 /* No sysfs operations, either from having no subsystem, 292 /* No sysfs operations, either from having no subsystem,
277 * or the subsystem have no operations. 293 * or the subsystem have no operations.
278 */ 294 */
295 error = -EACCES;
279 if (!ops) 296 if (!ops)
280 goto Eaccess; 297 goto err_mput;
281 298
282 /* make sure we have a collection to add our buffers to */ 299 /* make sure we have a collection to add our buffers to */
283 mutex_lock(&inode->i_mutex); 300 mutex_lock(&inode->i_mutex);
284 if (!(set = inode->i_private)) { 301 if (!(set = inode->i_private)) {
285 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) { 302 error = -ENOMEM;
286 error = -ENOMEM; 303 if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL)))
287 goto Done; 304 goto err_mput;
288 } else { 305 else
289 INIT_LIST_HEAD(&set->associates); 306 INIT_LIST_HEAD(&set->associates);
290 }
291 } 307 }
292 mutex_unlock(&inode->i_mutex); 308 mutex_unlock(&inode->i_mutex);
293 309
310 error = -EACCES;
311
294 /* File needs write support. 312 /* File needs write support.
295 * The inode's perms must say it's ok, 313 * The inode's perms must say it's ok,
296 * and we must have a store method. 314 * and we must have a store method.
297 */ 315 */
298 if (file->f_mode & FMODE_WRITE) { 316 if (file->f_mode & FMODE_WRITE) {
299
300 if (!(inode->i_mode & S_IWUGO) || !ops->store) 317 if (!(inode->i_mode & S_IWUGO) || !ops->store)
301 goto Eaccess; 318 goto err_mput;
302
303 } 319 }
304 320
305 /* File needs read support. 321 /* File needs read support.
@@ -308,46 +324,45 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
308 */ 324 */
309 if (file->f_mode & FMODE_READ) { 325 if (file->f_mode & FMODE_READ) {
310 if (!(inode->i_mode & S_IRUGO) || !ops->show) 326 if (!(inode->i_mode & S_IRUGO) || !ops->show)
311 goto Eaccess; 327 goto err_mput;
312 } 328 }
313 329
314 /* No error? Great, allocate a buffer for the file, and store it 330 /* No error? Great, allocate a buffer for the file, and store it
315 * it in file->private_data for easy access. 331 * it in file->private_data for easy access.
316 */ 332 */
333 error = -ENOMEM;
317 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); 334 buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
318 if (buffer) { 335 if (!buffer)
319 INIT_LIST_HEAD(&buffer->associates); 336 goto err_mput;
320 init_MUTEX(&buffer->sem);
321 buffer->needs_read_fill = 1;
322 buffer->ops = ops;
323 add_to_collection(buffer, inode);
324 file->private_data = buffer;
325 } else
326 error = -ENOMEM;
327 goto Done;
328 337
329 Einval: 338 INIT_LIST_HEAD(&buffer->associates);
330 error = -EINVAL; 339 init_MUTEX(&buffer->sem);
331 goto Done; 340 buffer->needs_read_fill = 1;
332 Eaccess: 341 buffer->ops = ops;
333 error = -EACCES; 342 add_to_collection(buffer, inode);
343 file->private_data = buffer;
344
345 /* open succeeded, put active references and pin attr_sd */
346 sysfs_put_active_two(attr_sd);
347 sysfs_get(attr_sd);
348 return 0;
349
350 err_mput:
334 module_put(attr->owner); 351 module_put(attr->owner);
335 Done: 352 err_sput:
336 if (error) 353 sysfs_put_active_two(attr_sd);
337 kobject_put(kobj);
338 return error; 354 return error;
339} 355}
340 356
341static int sysfs_release(struct inode * inode, struct file * filp) 357static int sysfs_release(struct inode * inode, struct file * filp)
342{ 358{
343 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
344 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; 359 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
345 struct attribute *attr = attr_sd->s_elem.attr.attr; 360 struct attribute *attr = attr_sd->s_elem.attr.attr;
346 struct sysfs_buffer * buffer = filp->private_data; 361 struct sysfs_buffer * buffer = filp->private_data;
347 362
348 if (buffer) 363 if (buffer)
349 remove_from_collection(buffer, inode); 364 remove_from_collection(buffer, inode);
350 kobject_put(kobj); 365 sysfs_put(attr_sd);
351 /* After this point, attr should not be accessed. */ 366 /* After this point, attr should not be accessed. */
352 module_put(attr->owner); 367 module_put(attr->owner);
353 368
@@ -376,18 +391,25 @@ static int sysfs_release(struct inode * inode, struct file * filp)
376static unsigned int sysfs_poll(struct file *filp, poll_table *wait) 391static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
377{ 392{
378 struct sysfs_buffer * buffer = filp->private_data; 393 struct sysfs_buffer * buffer = filp->private_data;
379 struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); 394 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
380 struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; 395 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
381 int res = 0; 396
397 /* need parent for the kobj, grab both */
398 if (!sysfs_get_active_two(attr_sd))
399 goto trigger;
382 400
383 poll_wait(filp, &kobj->poll, wait); 401 poll_wait(filp, &kobj->poll, wait);
384 402
385 if (buffer->event != atomic_read(&sd->s_event)) { 403 sysfs_put_active_two(attr_sd);
386 res = POLLERR|POLLPRI;
387 buffer->needs_read_fill = 1;
388 }
389 404
390 return res; 405 if (buffer->event != atomic_read(&attr_sd->s_event))
406 goto trigger;
407
408 return 0;
409
410 trigger:
411 buffer->needs_read_fill = 1;
412 return POLLERR|POLLPRI;
391} 413}
392 414
393 415
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 617d10cea07d..7b9a8f132d5a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -277,12 +277,16 @@ int sysfs_hash_and_remove(struct dentry * dir, const char * name)
277 if (!strcmp(sd->s_name, name)) { 277 if (!strcmp(sd->s_name, name)) {
278 list_del_init(&sd->s_sibling); 278 list_del_init(&sd->s_sibling);
279 sysfs_drop_dentry(sd, dir); 279 sysfs_drop_dentry(sd, dir);
280 sysfs_put(sd);
281 found = 1; 280 found = 1;
282 break; 281 break;
283 } 282 }
284 } 283 }
285 mutex_unlock(&dir->d_inode->i_mutex); 284 mutex_unlock(&dir->d_inode->i_mutex);
286 285
287 return found ? 0 : -ENOENT; 286 if (!found)
287 return -ENOENT;
288
289 sysfs_deactivate(sd);
290 sysfs_put(sd);
291 return 0;
288} 292}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 60717660ac55..d998e8e27841 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -14,8 +14,14 @@ struct sysfs_elem_bin_attr {
14 struct bin_attribute * bin_attr; 14 struct bin_attribute * bin_attr;
15}; 15};
16 16
17/*
18 * As long as s_count reference is held, the sysfs_dirent itself is
19 * accessible. Dereferencing s_elem or any other outer entity
20 * requires s_active reference.
21 */
17struct sysfs_dirent { 22struct sysfs_dirent {
18 atomic_t s_count; 23 atomic_t s_count;
24 struct rw_semaphore s_active;
19 struct sysfs_dirent * s_parent; 25 struct sysfs_dirent * s_parent;
20 struct list_head s_sibling; 26 struct list_head s_sibling;
21 struct list_head s_children; 27 struct list_head s_children;
@@ -36,6 +42,17 @@ struct sysfs_dirent {
36 atomic_t s_event; 42 atomic_t s_event;
37}; 43};
38 44
45/*
46 * A sysfs file which deletes another file when written to need to
47 * write lock the s_active of the victim while its s_active is read
48 * locked for the write operation. Tell lockdep that this is okay.
49 */
50enum sysfs_s_active_class
51{
52 SYSFS_S_ACTIVE_NORMAL, /* file r/w access, etc - default */
53 SYSFS_S_ACTIVE_DEACTIVATE, /* file deactivation */
54};
55
39extern struct vfsmount * sysfs_mount; 56extern struct vfsmount * sysfs_mount;
40extern struct kmem_cache *sysfs_dir_cachep; 57extern struct kmem_cache *sysfs_dir_cachep;
41 58
@@ -87,43 +104,107 @@ struct sysfs_buffer_collection {
87 struct list_head associates; 104 struct list_head associates;
88}; 105};
89 106
90static inline struct kobject * to_kobj(struct dentry * dentry) 107static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
91{ 108{
92 struct sysfs_dirent * sd = dentry->d_fsdata; 109 if (sd) {
93 return sd->s_elem.dir.kobj; 110 WARN_ON(!atomic_read(&sd->s_count));
111 atomic_inc(&sd->s_count);
112 }
113 return sd;
94} 114}
95 115
96static inline struct kobject *sysfs_get_kobject(struct dentry *dentry) 116static inline void sysfs_put(struct sysfs_dirent * sd)
97{ 117{
98 struct kobject * kobj = NULL; 118 if (sd && atomic_dec_and_test(&sd->s_count))
99 119 release_sysfs_dirent(sd);
100 spin_lock(&dcache_lock); 120}
101 if (!d_unhashed(dentry)) {
102 struct sysfs_dirent * sd = dentry->d_fsdata;
103
104 if (sd->s_type & SYSFS_KOBJ_LINK)
105 sd = sd->s_elem.symlink.target_sd;
106 121
107 kobj = kobject_get(sd->s_elem.dir.kobj); 122/**
123 * sysfs_get_active - get an active reference to sysfs_dirent
124 * @sd: sysfs_dirent to get an active reference to
125 *
126 * Get an active reference of @sd. This function is noop if @sd
127 * is NULL.
128 *
129 * RETURNS:
130 * Pointer to @sd on success, NULL on failure.
131 */
132static inline struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
133{
134 if (sd) {
135 if (unlikely(!down_read_trylock(&sd->s_active)))
136 sd = NULL;
108 } 137 }
109 spin_unlock(&dcache_lock); 138 return sd;
139}
110 140
111 return kobj; 141/**
142 * sysfs_put_active - put an active reference to sysfs_dirent
143 * @sd: sysfs_dirent to put an active reference to
144 *
145 * Put an active reference to @sd. This function is noop if @sd
146 * is NULL.
147 */
148static inline void sysfs_put_active(struct sysfs_dirent *sd)
149{
150 if (sd)
151 up_read(&sd->s_active);
112} 152}
113 153
114static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) 154/**
155 * sysfs_get_active_two - get active references to sysfs_dirent and parent
156 * @sd: sysfs_dirent of interest
157 *
158 * Get active reference to @sd and its parent. Parent's active
159 * reference is grabbed first. This function is noop if @sd is
160 * NULL.
161 *
162 * RETURNS:
163 * Pointer to @sd on success, NULL on failure.
164 */
165static inline struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
115{ 166{
116 if (sd) { 167 if (sd) {
117 WARN_ON(!atomic_read(&sd->s_count)); 168 if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
118 atomic_inc(&sd->s_count); 169 return NULL;
170 if (unlikely(!sysfs_get_active(sd))) {
171 sysfs_put_active(sd->s_parent);
172 return NULL;
173 }
119 } 174 }
120 return sd; 175 return sd;
121} 176}
122 177
123static inline void sysfs_put(struct sysfs_dirent * sd) 178/**
179 * sysfs_put_active_two - put active references to sysfs_dirent and parent
180 * @sd: sysfs_dirent of interest
181 *
182 * Put active references to @sd and its parent. This function is
183 * noop if @sd is NULL.
184 */
185static inline void sysfs_put_active_two(struct sysfs_dirent *sd)
124{ 186{
125 if (sd && atomic_dec_and_test(&sd->s_count)) 187 if (sd) {
126 release_sysfs_dirent(sd); 188 sysfs_put_active(sd);
189 sysfs_put_active(sd->s_parent);
190 }
191}
192
193/**
194 * sysfs_deactivate - deactivate sysfs_dirent
195 * @sd: sysfs_dirent to deactivate
196 *
197 * Deny new active references and drain existing ones. s_active
198 * will be unlocked when the sysfs_dirent is released.
199 */
200static inline void sysfs_deactivate(struct sysfs_dirent *sd)
201{
202 down_write_nested(&sd->s_active, SYSFS_S_ACTIVE_DEACTIVATE);
203
204 /* s_active will be unlocked by the thread doing the final put
205 * on @sd. Lie to lockdep.
206 */
207 rwsem_release(&sd->s_active.dep_map, 1, _RET_IP_);
127} 208}
128 209
129static inline int sysfs_is_shadowed_inode(struct inode *inode) 210static inline int sysfs_is_shadowed_inode(struct inode *inode)