diff options
Diffstat (limited to 'fs/overlayfs/readdir.c')
-rw-r--r-- | fs/overlayfs/readdir.c | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..ab1e3dcbed95 --- /dev/null +++ b/fs/overlayfs/readdir.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/namei.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/xattr.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/security.h> | ||
17 | #include <linux/cred.h> | ||
18 | #include "overlayfs.h" | ||
19 | |||
20 | struct ovl_cache_entry { | ||
21 | unsigned int len; | ||
22 | unsigned int type; | ||
23 | u64 ino; | ||
24 | struct list_head l_node; | ||
25 | struct rb_node node; | ||
26 | bool is_whiteout; | ||
27 | bool is_cursor; | ||
28 | char name[]; | ||
29 | }; | ||
30 | |||
31 | struct ovl_dir_cache { | ||
32 | long refcount; | ||
33 | u64 version; | ||
34 | struct list_head entries; | ||
35 | }; | ||
36 | |||
37 | struct ovl_readdir_data { | ||
38 | struct dir_context ctx; | ||
39 | bool is_merge; | ||
40 | struct rb_root root; | ||
41 | struct list_head *list; | ||
42 | struct list_head middle; | ||
43 | int count; | ||
44 | int err; | ||
45 | }; | ||
46 | |||
47 | struct ovl_dir_file { | ||
48 | bool is_real; | ||
49 | bool is_upper; | ||
50 | struct ovl_dir_cache *cache; | ||
51 | struct ovl_cache_entry cursor; | ||
52 | struct file *realfile; | ||
53 | struct file *upperfile; | ||
54 | }; | ||
55 | |||
56 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) | ||
57 | { | ||
58 | return container_of(n, struct ovl_cache_entry, node); | ||
59 | } | ||
60 | |||
61 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, | ||
62 | const char *name, int len) | ||
63 | { | ||
64 | struct rb_node *node = root->rb_node; | ||
65 | int cmp; | ||
66 | |||
67 | while (node) { | ||
68 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); | ||
69 | |||
70 | cmp = strncmp(name, p->name, len); | ||
71 | if (cmp > 0) | ||
72 | node = p->node.rb_right; | ||
73 | else if (cmp < 0 || len < p->len) | ||
74 | node = p->node.rb_left; | ||
75 | else | ||
76 | return p; | ||
77 | } | ||
78 | |||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, | ||
83 | u64 ino, unsigned int d_type) | ||
84 | { | ||
85 | struct ovl_cache_entry *p; | ||
86 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); | ||
87 | |||
88 | p = kmalloc(size, GFP_KERNEL); | ||
89 | if (p) { | ||
90 | memcpy(p->name, name, len); | ||
91 | p->name[len] = '\0'; | ||
92 | p->len = len; | ||
93 | p->type = d_type; | ||
94 | p->ino = ino; | ||
95 | p->is_whiteout = false; | ||
96 | p->is_cursor = false; | ||
97 | } | ||
98 | |||
99 | return p; | ||
100 | } | ||
101 | |||
102 | static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, | ||
103 | const char *name, int len, u64 ino, | ||
104 | unsigned int d_type) | ||
105 | { | ||
106 | struct rb_node **newp = &rdd->root.rb_node; | ||
107 | struct rb_node *parent = NULL; | ||
108 | struct ovl_cache_entry *p; | ||
109 | |||
110 | while (*newp) { | ||
111 | int cmp; | ||
112 | struct ovl_cache_entry *tmp; | ||
113 | |||
114 | parent = *newp; | ||
115 | tmp = ovl_cache_entry_from_node(*newp); | ||
116 | cmp = strncmp(name, tmp->name, len); | ||
117 | if (cmp > 0) | ||
118 | newp = &tmp->node.rb_right; | ||
119 | else if (cmp < 0 || len < tmp->len) | ||
120 | newp = &tmp->node.rb_left; | ||
121 | else | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | p = ovl_cache_entry_new(name, len, ino, d_type); | ||
126 | if (p == NULL) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | list_add_tail(&p->l_node, rdd->list); | ||
130 | rb_link_node(&p->node, parent, newp); | ||
131 | rb_insert_color(&p->node, &rdd->root); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int ovl_fill_lower(struct ovl_readdir_data *rdd, | ||
137 | const char *name, int namelen, | ||
138 | loff_t offset, u64 ino, unsigned int d_type) | ||
139 | { | ||
140 | struct ovl_cache_entry *p; | ||
141 | |||
142 | p = ovl_cache_entry_find(&rdd->root, name, namelen); | ||
143 | if (p) { | ||
144 | list_move_tail(&p->l_node, &rdd->middle); | ||
145 | } else { | ||
146 | p = ovl_cache_entry_new(name, namelen, ino, d_type); | ||
147 | if (p == NULL) | ||
148 | rdd->err = -ENOMEM; | ||
149 | else | ||
150 | list_add_tail(&p->l_node, &rdd->middle); | ||
151 | } | ||
152 | |||
153 | return rdd->err; | ||
154 | } | ||
155 | |||
156 | void ovl_cache_free(struct list_head *list) | ||
157 | { | ||
158 | struct ovl_cache_entry *p; | ||
159 | struct ovl_cache_entry *n; | ||
160 | |||
161 | list_for_each_entry_safe(p, n, list, l_node) | ||
162 | kfree(p); | ||
163 | |||
164 | INIT_LIST_HEAD(list); | ||
165 | } | ||
166 | |||
167 | static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) | ||
168 | { | ||
169 | struct ovl_dir_cache *cache = od->cache; | ||
170 | |||
171 | list_del_init(&od->cursor.l_node); | ||
172 | WARN_ON(cache->refcount <= 0); | ||
173 | cache->refcount--; | ||
174 | if (!cache->refcount) { | ||
175 | if (ovl_dir_cache(dentry) == cache) | ||
176 | ovl_set_dir_cache(dentry, NULL); | ||
177 | |||
178 | ovl_cache_free(&cache->entries); | ||
179 | kfree(cache); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static int ovl_fill_merge(void *buf, const char *name, int namelen, | ||
184 | loff_t offset, u64 ino, unsigned int d_type) | ||
185 | { | ||
186 | struct ovl_readdir_data *rdd = buf; | ||
187 | |||
188 | rdd->count++; | ||
189 | if (!rdd->is_merge) | ||
190 | return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); | ||
191 | else | ||
192 | return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); | ||
193 | } | ||
194 | |||
195 | static inline int ovl_dir_read(struct path *realpath, | ||
196 | struct ovl_readdir_data *rdd) | ||
197 | { | ||
198 | struct file *realfile; | ||
199 | int err; | ||
200 | |||
201 | realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); | ||
202 | if (IS_ERR(realfile)) | ||
203 | return PTR_ERR(realfile); | ||
204 | |||
205 | rdd->ctx.pos = 0; | ||
206 | do { | ||
207 | rdd->count = 0; | ||
208 | rdd->err = 0; | ||
209 | err = iterate_dir(realfile, &rdd->ctx); | ||
210 | if (err >= 0) | ||
211 | err = rdd->err; | ||
212 | } while (!err && rdd->count); | ||
213 | fput(realfile); | ||
214 | |||
215 | return err; | ||
216 | } | ||
217 | |||
218 | static void ovl_dir_reset(struct file *file) | ||
219 | { | ||
220 | struct ovl_dir_file *od = file->private_data; | ||
221 | struct ovl_dir_cache *cache = od->cache; | ||
222 | struct dentry *dentry = file->f_path.dentry; | ||
223 | enum ovl_path_type type = ovl_path_type(dentry); | ||
224 | |||
225 | if (cache && ovl_dentry_version_get(dentry) != cache->version) { | ||
226 | ovl_cache_put(od, dentry); | ||
227 | od->cache = NULL; | ||
228 | } | ||
229 | WARN_ON(!od->is_real && type != OVL_PATH_MERGE); | ||
230 | if (od->is_real && type == OVL_PATH_MERGE) | ||
231 | od->is_real = false; | ||
232 | } | ||
233 | |||
234 | static int ovl_dir_mark_whiteouts(struct dentry *dir, | ||
235 | struct ovl_readdir_data *rdd) | ||
236 | { | ||
237 | struct ovl_cache_entry *p; | ||
238 | struct dentry *dentry; | ||
239 | const struct cred *old_cred; | ||
240 | struct cred *override_cred; | ||
241 | |||
242 | override_cred = prepare_creds(); | ||
243 | if (!override_cred) { | ||
244 | ovl_cache_free(rdd->list); | ||
245 | return -ENOMEM; | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * CAP_DAC_OVERRIDE for lookup | ||
250 | */ | ||
251 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
252 | old_cred = override_creds(override_cred); | ||
253 | |||
254 | mutex_lock(&dir->d_inode->i_mutex); | ||
255 | list_for_each_entry(p, rdd->list, l_node) { | ||
256 | if (p->is_cursor) | ||
257 | continue; | ||
258 | |||
259 | if (p->type != DT_CHR) | ||
260 | continue; | ||
261 | |||
262 | dentry = lookup_one_len(p->name, dir, p->len); | ||
263 | if (IS_ERR(dentry)) | ||
264 | continue; | ||
265 | |||
266 | p->is_whiteout = ovl_is_whiteout(dentry); | ||
267 | dput(dentry); | ||
268 | } | ||
269 | mutex_unlock(&dir->d_inode->i_mutex); | ||
270 | |||
271 | revert_creds(old_cred); | ||
272 | put_cred(override_cred); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) | ||
278 | { | ||
279 | int err; | ||
280 | struct path lowerpath; | ||
281 | struct path upperpath; | ||
282 | struct ovl_readdir_data rdd = { | ||
283 | .ctx.actor = ovl_fill_merge, | ||
284 | .list = list, | ||
285 | .root = RB_ROOT, | ||
286 | .is_merge = false, | ||
287 | }; | ||
288 | |||
289 | ovl_path_lower(dentry, &lowerpath); | ||
290 | ovl_path_upper(dentry, &upperpath); | ||
291 | |||
292 | if (upperpath.dentry) { | ||
293 | err = ovl_dir_read(&upperpath, &rdd); | ||
294 | if (err) | ||
295 | goto out; | ||
296 | |||
297 | if (lowerpath.dentry) { | ||
298 | err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); | ||
299 | if (err) | ||
300 | goto out; | ||
301 | } | ||
302 | } | ||
303 | if (lowerpath.dentry) { | ||
304 | /* | ||
305 | * Insert lowerpath entries before upperpath ones, this allows | ||
306 | * offsets to be reasonably constant | ||
307 | */ | ||
308 | list_add(&rdd.middle, rdd.list); | ||
309 | rdd.is_merge = true; | ||
310 | err = ovl_dir_read(&lowerpath, &rdd); | ||
311 | list_del(&rdd.middle); | ||
312 | } | ||
313 | out: | ||
314 | return err; | ||
315 | } | ||
316 | |||
317 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) | ||
318 | { | ||
319 | struct ovl_cache_entry *p; | ||
320 | loff_t off = 0; | ||
321 | |||
322 | list_for_each_entry(p, &od->cache->entries, l_node) { | ||
323 | if (p->is_cursor) | ||
324 | continue; | ||
325 | if (off >= pos) | ||
326 | break; | ||
327 | off++; | ||
328 | } | ||
329 | list_move_tail(&od->cursor.l_node, &p->l_node); | ||
330 | } | ||
331 | |||
332 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) | ||
333 | { | ||
334 | int res; | ||
335 | struct ovl_dir_cache *cache; | ||
336 | |||
337 | cache = ovl_dir_cache(dentry); | ||
338 | if (cache && ovl_dentry_version_get(dentry) == cache->version) { | ||
339 | cache->refcount++; | ||
340 | return cache; | ||
341 | } | ||
342 | ovl_set_dir_cache(dentry, NULL); | ||
343 | |||
344 | cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); | ||
345 | if (!cache) | ||
346 | return ERR_PTR(-ENOMEM); | ||
347 | |||
348 | cache->refcount = 1; | ||
349 | INIT_LIST_HEAD(&cache->entries); | ||
350 | |||
351 | res = ovl_dir_read_merged(dentry, &cache->entries); | ||
352 | if (res) { | ||
353 | ovl_cache_free(&cache->entries); | ||
354 | kfree(cache); | ||
355 | return ERR_PTR(res); | ||
356 | } | ||
357 | |||
358 | cache->version = ovl_dentry_version_get(dentry); | ||
359 | ovl_set_dir_cache(dentry, cache); | ||
360 | |||
361 | return cache; | ||
362 | } | ||
363 | |||
364 | static int ovl_iterate(struct file *file, struct dir_context *ctx) | ||
365 | { | ||
366 | struct ovl_dir_file *od = file->private_data; | ||
367 | struct dentry *dentry = file->f_path.dentry; | ||
368 | |||
369 | if (!ctx->pos) | ||
370 | ovl_dir_reset(file); | ||
371 | |||
372 | if (od->is_real) | ||
373 | return iterate_dir(od->realfile, ctx); | ||
374 | |||
375 | if (!od->cache) { | ||
376 | struct ovl_dir_cache *cache; | ||
377 | |||
378 | cache = ovl_cache_get(dentry); | ||
379 | if (IS_ERR(cache)) | ||
380 | return PTR_ERR(cache); | ||
381 | |||
382 | od->cache = cache; | ||
383 | ovl_seek_cursor(od, ctx->pos); | ||
384 | } | ||
385 | |||
386 | while (od->cursor.l_node.next != &od->cache->entries) { | ||
387 | struct ovl_cache_entry *p; | ||
388 | |||
389 | p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); | ||
390 | /* Skip cursors */ | ||
391 | if (!p->is_cursor) { | ||
392 | if (!p->is_whiteout) { | ||
393 | if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) | ||
394 | break; | ||
395 | } | ||
396 | ctx->pos++; | ||
397 | } | ||
398 | list_move(&od->cursor.l_node, &p->l_node); | ||
399 | } | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) | ||
404 | { | ||
405 | loff_t res; | ||
406 | struct ovl_dir_file *od = file->private_data; | ||
407 | |||
408 | mutex_lock(&file_inode(file)->i_mutex); | ||
409 | if (!file->f_pos) | ||
410 | ovl_dir_reset(file); | ||
411 | |||
412 | if (od->is_real) { | ||
413 | res = vfs_llseek(od->realfile, offset, origin); | ||
414 | file->f_pos = od->realfile->f_pos; | ||
415 | } else { | ||
416 | res = -EINVAL; | ||
417 | |||
418 | switch (origin) { | ||
419 | case SEEK_CUR: | ||
420 | offset += file->f_pos; | ||
421 | break; | ||
422 | case SEEK_SET: | ||
423 | break; | ||
424 | default: | ||
425 | goto out_unlock; | ||
426 | } | ||
427 | if (offset < 0) | ||
428 | goto out_unlock; | ||
429 | |||
430 | if (offset != file->f_pos) { | ||
431 | file->f_pos = offset; | ||
432 | if (od->cache) | ||
433 | ovl_seek_cursor(od, offset); | ||
434 | } | ||
435 | res = offset; | ||
436 | } | ||
437 | out_unlock: | ||
438 | mutex_unlock(&file_inode(file)->i_mutex); | ||
439 | |||
440 | return res; | ||
441 | } | ||
442 | |||
443 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
444 | int datasync) | ||
445 | { | ||
446 | struct ovl_dir_file *od = file->private_data; | ||
447 | struct dentry *dentry = file->f_path.dentry; | ||
448 | struct file *realfile = od->realfile; | ||
449 | |||
450 | /* | ||
451 | * Need to check if we started out being a lower dir, but got copied up | ||
452 | */ | ||
453 | if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { | ||
454 | struct inode *inode = file_inode(file); | ||
455 | |||
456 | realfile = lockless_dereference(od->upperfile); | ||
457 | if (!realfile) { | ||
458 | struct path upperpath; | ||
459 | |||
460 | ovl_path_upper(dentry, &upperpath); | ||
461 | realfile = ovl_path_open(&upperpath, O_RDONLY); | ||
462 | smp_mb__before_spinlock(); | ||
463 | mutex_lock(&inode->i_mutex); | ||
464 | if (!od->upperfile) { | ||
465 | if (IS_ERR(realfile)) { | ||
466 | mutex_unlock(&inode->i_mutex); | ||
467 | return PTR_ERR(realfile); | ||
468 | } | ||
469 | od->upperfile = realfile; | ||
470 | } else { | ||
471 | /* somebody has beaten us to it */ | ||
472 | if (!IS_ERR(realfile)) | ||
473 | fput(realfile); | ||
474 | realfile = od->upperfile; | ||
475 | } | ||
476 | mutex_unlock(&inode->i_mutex); | ||
477 | } | ||
478 | } | ||
479 | |||
480 | return vfs_fsync_range(realfile, start, end, datasync); | ||
481 | } | ||
482 | |||
483 | static int ovl_dir_release(struct inode *inode, struct file *file) | ||
484 | { | ||
485 | struct ovl_dir_file *od = file->private_data; | ||
486 | |||
487 | if (od->cache) { | ||
488 | mutex_lock(&inode->i_mutex); | ||
489 | ovl_cache_put(od, file->f_path.dentry); | ||
490 | mutex_unlock(&inode->i_mutex); | ||
491 | } | ||
492 | fput(od->realfile); | ||
493 | if (od->upperfile) | ||
494 | fput(od->upperfile); | ||
495 | kfree(od); | ||
496 | |||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | static int ovl_dir_open(struct inode *inode, struct file *file) | ||
501 | { | ||
502 | struct path realpath; | ||
503 | struct file *realfile; | ||
504 | struct ovl_dir_file *od; | ||
505 | enum ovl_path_type type; | ||
506 | |||
507 | od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); | ||
508 | if (!od) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | type = ovl_path_real(file->f_path.dentry, &realpath); | ||
512 | realfile = ovl_path_open(&realpath, file->f_flags); | ||
513 | if (IS_ERR(realfile)) { | ||
514 | kfree(od); | ||
515 | return PTR_ERR(realfile); | ||
516 | } | ||
517 | INIT_LIST_HEAD(&od->cursor.l_node); | ||
518 | od->realfile = realfile; | ||
519 | od->is_real = (type != OVL_PATH_MERGE); | ||
520 | od->is_upper = (type != OVL_PATH_LOWER); | ||
521 | od->cursor.is_cursor = true; | ||
522 | file->private_data = od; | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | const struct file_operations ovl_dir_operations = { | ||
528 | .read = generic_read_dir, | ||
529 | .open = ovl_dir_open, | ||
530 | .iterate = ovl_iterate, | ||
531 | .llseek = ovl_dir_llseek, | ||
532 | .fsync = ovl_dir_fsync, | ||
533 | .release = ovl_dir_release, | ||
534 | }; | ||
535 | |||
536 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) | ||
537 | { | ||
538 | int err; | ||
539 | struct ovl_cache_entry *p; | ||
540 | |||
541 | err = ovl_dir_read_merged(dentry, list); | ||
542 | if (err) | ||
543 | return err; | ||
544 | |||
545 | err = 0; | ||
546 | |||
547 | list_for_each_entry(p, list, l_node) { | ||
548 | if (p->is_whiteout) | ||
549 | continue; | ||
550 | |||
551 | if (p->name[0] == '.') { | ||
552 | if (p->len == 1) | ||
553 | continue; | ||
554 | if (p->len == 2 && p->name[1] == '.') | ||
555 | continue; | ||
556 | } | ||
557 | err = -ENOTEMPTY; | ||
558 | break; | ||
559 | } | ||
560 | |||
561 | return err; | ||
562 | } | ||
563 | |||
564 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) | ||
565 | { | ||
566 | struct ovl_cache_entry *p; | ||
567 | |||
568 | mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); | ||
569 | list_for_each_entry(p, list, l_node) { | ||
570 | struct dentry *dentry; | ||
571 | |||
572 | if (!p->is_whiteout) | ||
573 | continue; | ||
574 | |||
575 | dentry = lookup_one_len(p->name, upper, p->len); | ||
576 | if (IS_ERR(dentry)) { | ||
577 | pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", | ||
578 | upper->d_name.name, p->len, p->name, | ||
579 | (int) PTR_ERR(dentry)); | ||
580 | continue; | ||
581 | } | ||
582 | ovl_cleanup(upper->d_inode, dentry); | ||
583 | dput(dentry); | ||
584 | } | ||
585 | mutex_unlock(&upper->d_inode->i_mutex); | ||
586 | } | ||