diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-01 20:51:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-01 20:51:54 -0400 |
commit | 20b4fb485227404329e41ad15588afad3df23050 (patch) | |
tree | f3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs | |
parent | b9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff) | |
parent | ac3e3c5b1164397656df81b9e9ab4991184d3236 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS updates from Al Viro,
Misc cleanups all over the place, mainly wrt /proc interfaces (switch
create_proc_entry to proc_create(), get rid of the deprecated
create_proc_read_entry() in favor of using proc_create_data() and
seq_file etc).
7kloc removed.
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (204 commits)
don't bother with deferred freeing of fdtables
proc: Move non-public stuff from linux/proc_fs.h to fs/proc/internal.h
proc: Make the PROC_I() and PDE() macros internal to procfs
proc: Supply a function to remove a proc entry by PDE
take cgroup_open() and cpuset_open() to fs/proc/base.c
ppc: Clean up scanlog
ppc: Clean up rtas_flash driver somewhat
hostap: proc: Use remove_proc_subtree()
drm: proc: Use remove_proc_subtree()
drm: proc: Use minor->index to label things, not PDE->name
drm: Constify drm_proc_list[]
zoran: Don't print proc_dir_entry data in debug
reiserfs: Don't access the proc_dir_entry in r_open(), r_start() r_show()
proc: Supply an accessor for getting the data from a PDE's parent
airo: Use remove_proc_subtree()
rtl8192u: Don't need to save device proc dir PDE
rtl8187se: Use a dir under /proc/net/r8180/
proc: Add proc_mkdir_data()
proc: Move some bits from linux/proc_fs.h to linux/{of.h,signal.h,tty.h}
proc: Move PDE_NET() to fs/proc/proc_net.c
...
Diffstat (limited to 'fs')
58 files changed, 1307 insertions, 1613 deletions
diff --git a/fs/Makefile b/fs/Makefile index f0db9c941a5f..4fe6df3ec28f 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | obj-y := open.o read_write.o file_table.o super.o \ | 8 | obj-y := open.o read_write.o file_table.o super.o \ |
9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ | 9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ |
10 | ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o dcache.o inode.o \ |
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o splice.o sync.o utimes.o \ | 13 | pnode.o splice.o sync.o utimes.o \ |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 096b23f821a1..526e4bbbde59 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -190,7 +190,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) | |||
190 | return ret; | 190 | return ret; |
191 | 191 | ||
192 | m = file->private_data; | 192 | m = file->private_data; |
193 | m->private = PDE(inode)->data; | 193 | m->private = PDE_DATA(inode); |
194 | 194 | ||
195 | return 0; | 195 | return 0; |
196 | } | 196 | } |
@@ -448,7 +448,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file) | |||
448 | struct seq_file *m; | 448 | struct seq_file *m; |
449 | int ret; | 449 | int ret; |
450 | 450 | ||
451 | cell = PDE(inode)->data; | 451 | cell = PDE_DATA(inode); |
452 | if (!cell) | 452 | if (!cell) |
453 | return -ENOENT; | 453 | return -ENOENT; |
454 | 454 | ||
@@ -554,7 +554,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) | |||
554 | struct seq_file *m; | 554 | struct seq_file *m; |
555 | int ret; | 555 | int ret; |
556 | 556 | ||
557 | cell = PDE(inode)->data; | 557 | cell = PDE_DATA(inode); |
558 | if (!cell) | 558 | if (!cell) |
559 | return -ENOENT; | 559 | return -ENOENT; |
560 | 560 | ||
@@ -659,7 +659,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file) | |||
659 | struct seq_file *m; | 659 | struct seq_file *m; |
660 | int ret; | 660 | int ret; |
661 | 661 | ||
662 | cell = PDE(inode)->data; | 662 | cell = PDE_DATA(inode); |
663 | if (!cell) | 663 | if (!cell) |
664 | return -ENOENT; | 664 | return -ENOENT; |
665 | 665 | ||
@@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) | |||
1324 | if (iocb->ki_pos < 0) | 1324 | if (iocb->ki_pos < 0) |
1325 | return -EINVAL; | 1325 | return -EINVAL; |
1326 | 1326 | ||
1327 | if (opcode == IOCB_CMD_PWRITEV) | ||
1328 | file_start_write(file); | ||
1327 | do { | 1329 | do { |
1328 | ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], | 1330 | ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], |
1329 | iocb->ki_nr_segs - iocb->ki_cur_seg, | 1331 | iocb->ki_nr_segs - iocb->ki_cur_seg, |
@@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb) | |||
1336 | } while (ret > 0 && iocb->ki_left > 0 && | 1338 | } while (ret > 0 && iocb->ki_left > 0 && |
1337 | (opcode == IOCB_CMD_PWRITEV || | 1339 | (opcode == IOCB_CMD_PWRITEV || |
1338 | (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); | 1340 | (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); |
1341 | if (opcode == IOCB_CMD_PWRITEV) | ||
1342 | file_end_write(file); | ||
1339 | 1343 | ||
1340 | /* This means we must have transferred all that we could */ | 1344 | /* This means we must have transferred all that we could */ |
1341 | /* No need to retry anymore */ | 1345 | /* No need to retry anymore */ |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 02fe378fc506..bce87694f7b0 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -286,15 +286,12 @@ static int load_aout_binary(struct linux_binprm * bprm) | |||
286 | return error; | 286 | return error; |
287 | } | 287 | } |
288 | 288 | ||
289 | error = bprm->file->f_op->read(bprm->file, | 289 | error = read_code(bprm->file, text_addr, pos, |
290 | (char __user *)text_addr, | 290 | ex.a_text+ex.a_data); |
291 | ex.a_text+ex.a_data, &pos); | ||
292 | if ((signed long)error < 0) { | 291 | if ((signed long)error < 0) { |
293 | send_sig(SIGKILL, current, 0); | 292 | send_sig(SIGKILL, current, 0); |
294 | return error; | 293 | return error; |
295 | } | 294 | } |
296 | |||
297 | flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); | ||
298 | } else { | 295 | } else { |
299 | if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && | 296 | if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && |
300 | (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) | 297 | (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) |
@@ -310,14 +307,9 @@ static int load_aout_binary(struct linux_binprm * bprm) | |||
310 | } | 307 | } |
311 | 308 | ||
312 | if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { | 309 | if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { |
313 | loff_t pos = fd_offset; | ||
314 | vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); | 310 | vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); |
315 | bprm->file->f_op->read(bprm->file, | 311 | read_code(bprm->file, N_TXTADDR(ex), fd_offset, |
316 | (char __user *)N_TXTADDR(ex), | 312 | ex.a_text + ex.a_data); |
317 | ex.a_text+ex.a_data, &pos); | ||
318 | flush_icache_range((unsigned long) N_TXTADDR(ex), | ||
319 | (unsigned long) N_TXTADDR(ex) + | ||
320 | ex.a_text+ex.a_data); | ||
321 | goto beyond_if; | 313 | goto beyond_if; |
322 | } | 314 | } |
323 | 315 | ||
@@ -396,8 +388,6 @@ static int load_aout_library(struct file *file) | |||
396 | start_addr = ex.a_entry & 0xfffff000; | 388 | start_addr = ex.a_entry & 0xfffff000; |
397 | 389 | ||
398 | if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { | 390 | if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { |
399 | loff_t pos = N_TXTOFF(ex); | ||
400 | |||
401 | if (printk_ratelimit()) | 391 | if (printk_ratelimit()) |
402 | { | 392 | { |
403 | printk(KERN_WARNING | 393 | printk(KERN_WARNING |
@@ -406,11 +396,8 @@ static int load_aout_library(struct file *file) | |||
406 | } | 396 | } |
407 | vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); | 397 | vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); |
408 | 398 | ||
409 | file->f_op->read(file, (char __user *)start_addr, | 399 | read_code(file, start_addr, N_TXTOFF(ex), |
410 | ex.a_text + ex.a_data, &pos); | 400 | ex.a_text + ex.a_data); |
411 | flush_icache_range((unsigned long) start_addr, | ||
412 | (unsigned long) start_addr + ex.a_text + ex.a_data); | ||
413 | |||
414 | retval = 0; | 401 | retval = 0; |
415 | goto out; | 402 | goto out; |
416 | } | 403 | } |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c1cc06aed601..9dac212fc6f9 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -926,7 +926,6 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( | |||
926 | struct elf32_fdpic_loadseg *seg; | 926 | struct elf32_fdpic_loadseg *seg; |
927 | struct elf32_phdr *phdr; | 927 | struct elf32_phdr *phdr; |
928 | unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags; | 928 | unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags; |
929 | loff_t fpos; | ||
930 | int loop, ret; | 929 | int loop, ret; |
931 | 930 | ||
932 | load_addr = params->load_addr; | 931 | load_addr = params->load_addr; |
@@ -964,14 +963,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( | |||
964 | if (params->phdrs[loop].p_type != PT_LOAD) | 963 | if (params->phdrs[loop].p_type != PT_LOAD) |
965 | continue; | 964 | continue; |
966 | 965 | ||
967 | fpos = phdr->p_offset; | ||
968 | |||
969 | seg->addr = maddr + (phdr->p_vaddr - base); | 966 | seg->addr = maddr + (phdr->p_vaddr - base); |
970 | seg->p_vaddr = phdr->p_vaddr; | 967 | seg->p_vaddr = phdr->p_vaddr; |
971 | seg->p_memsz = phdr->p_memsz; | 968 | seg->p_memsz = phdr->p_memsz; |
972 | 969 | ||
973 | ret = file->f_op->read(file, (void *) seg->addr, | 970 | ret = read_code(file, seg->addr, phdr->p_offset, |
974 | phdr->p_filesz, &fpos); | 971 | phdr->p_filesz); |
975 | if (ret < 0) | 972 | if (ret < 0) |
976 | return ret; | 973 | return ret; |
977 | 974 | ||
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 2036d21baaef..d50bbe59da1e 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -207,11 +207,12 @@ static int decompress_exec( | |||
207 | 207 | ||
208 | /* Read in first chunk of data and parse gzip header. */ | 208 | /* Read in first chunk of data and parse gzip header. */ |
209 | fpos = offset; | 209 | fpos = offset; |
210 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); | 210 | ret = kernel_read(bprm->file, offset, buf, LBUFSIZE); |
211 | 211 | ||
212 | strm.next_in = buf; | 212 | strm.next_in = buf; |
213 | strm.avail_in = ret; | 213 | strm.avail_in = ret; |
214 | strm.total_in = 0; | 214 | strm.total_in = 0; |
215 | fpos += ret; | ||
215 | 216 | ||
216 | retval = -ENOEXEC; | 217 | retval = -ENOEXEC; |
217 | 218 | ||
@@ -277,7 +278,7 @@ static int decompress_exec( | |||
277 | } | 278 | } |
278 | 279 | ||
279 | while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) { | 280 | while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) { |
280 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); | 281 | ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE); |
281 | if (ret <= 0) | 282 | if (ret <= 0) |
282 | break; | 283 | break; |
283 | len -= ret; | 284 | len -= ret; |
@@ -285,6 +286,7 @@ static int decompress_exec( | |||
285 | strm.next_in = buf; | 286 | strm.next_in = buf; |
286 | strm.avail_in = ret; | 287 | strm.avail_in = ret; |
287 | strm.total_in = 0; | 288 | strm.total_in = 0; |
289 | fpos += ret; | ||
288 | } | 290 | } |
289 | 291 | ||
290 | if (ret < 0) { | 292 | if (ret < 0) { |
@@ -428,6 +430,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
428 | unsigned long textpos = 0, datapos = 0, result; | 430 | unsigned long textpos = 0, datapos = 0, result; |
429 | unsigned long realdatastart = 0; | 431 | unsigned long realdatastart = 0; |
430 | unsigned long text_len, data_len, bss_len, stack_len, flags; | 432 | unsigned long text_len, data_len, bss_len, stack_len, flags; |
433 | unsigned long full_data; | ||
431 | unsigned long len, memp = 0; | 434 | unsigned long len, memp = 0; |
432 | unsigned long memp_size, extra, rlim; | 435 | unsigned long memp_size, extra, rlim; |
433 | unsigned long *reloc = 0, *rp; | 436 | unsigned long *reloc = 0, *rp; |
@@ -451,6 +454,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
451 | relocs = ntohl(hdr->reloc_count); | 454 | relocs = ntohl(hdr->reloc_count); |
452 | flags = ntohl(hdr->flags); | 455 | flags = ntohl(hdr->flags); |
453 | rev = ntohl(hdr->rev); | 456 | rev = ntohl(hdr->rev); |
457 | full_data = data_len + relocs * sizeof(unsigned long); | ||
454 | 458 | ||
455 | if (strncmp(hdr->magic, "bFLT", 4)) { | 459 | if (strncmp(hdr->magic, "bFLT", 4)) { |
456 | /* | 460 | /* |
@@ -577,12 +581,12 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
577 | #ifdef CONFIG_BINFMT_ZFLAT | 581 | #ifdef CONFIG_BINFMT_ZFLAT |
578 | if (flags & FLAT_FLAG_GZDATA) { | 582 | if (flags & FLAT_FLAG_GZDATA) { |
579 | result = decompress_exec(bprm, fpos, (char *) datapos, | 583 | result = decompress_exec(bprm, fpos, (char *) datapos, |
580 | data_len + (relocs * sizeof(unsigned long)), 0); | 584 | full_data, 0); |
581 | } else | 585 | } else |
582 | #endif | 586 | #endif |
583 | { | 587 | { |
584 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 588 | result = read_code(bprm->file, datapos, fpos, |
585 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 589 | full_data); |
586 | } | 590 | } |
587 | if (IS_ERR_VALUE(result)) { | 591 | if (IS_ERR_VALUE(result)) { |
588 | printk("Unable to read data+bss, errno %d\n", (int)-result); | 592 | printk("Unable to read data+bss, errno %d\n", (int)-result); |
@@ -627,30 +631,25 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
627 | if (flags & FLAT_FLAG_GZIP) { | 631 | if (flags & FLAT_FLAG_GZIP) { |
628 | result = decompress_exec(bprm, sizeof (struct flat_hdr), | 632 | result = decompress_exec(bprm, sizeof (struct flat_hdr), |
629 | (((char *) textpos) + sizeof (struct flat_hdr)), | 633 | (((char *) textpos) + sizeof (struct flat_hdr)), |
630 | (text_len + data_len + (relocs * sizeof(unsigned long)) | 634 | (text_len + full_data |
631 | - sizeof (struct flat_hdr)), | 635 | - sizeof (struct flat_hdr)), |
632 | 0); | 636 | 0); |
633 | memmove((void *) datapos, (void *) realdatastart, | 637 | memmove((void *) datapos, (void *) realdatastart, |
634 | data_len + (relocs * sizeof(unsigned long))); | 638 | full_data); |
635 | } else if (flags & FLAT_FLAG_GZDATA) { | 639 | } else if (flags & FLAT_FLAG_GZDATA) { |
636 | fpos = 0; | 640 | result = read_code(bprm->file, textpos, 0, text_len); |
637 | result = bprm->file->f_op->read(bprm->file, | ||
638 | (char *) textpos, text_len, &fpos); | ||
639 | if (!IS_ERR_VALUE(result)) | 641 | if (!IS_ERR_VALUE(result)) |
640 | result = decompress_exec(bprm, text_len, (char *) datapos, | 642 | result = decompress_exec(bprm, text_len, (char *) datapos, |
641 | data_len + (relocs * sizeof(unsigned long)), 0); | 643 | full_data, 0); |
642 | } | 644 | } |
643 | else | 645 | else |
644 | #endif | 646 | #endif |
645 | { | 647 | { |
646 | fpos = 0; | 648 | result = read_code(bprm->file, textpos, 0, text_len); |
647 | result = bprm->file->f_op->read(bprm->file, | 649 | if (!IS_ERR_VALUE(result)) |
648 | (char *) textpos, text_len, &fpos); | 650 | result = read_code(bprm->file, datapos, |
649 | if (!IS_ERR_VALUE(result)) { | 651 | ntohl(hdr->data_start), |
650 | fpos = ntohl(hdr->data_start); | 652 | full_data); |
651 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | ||
652 | data_len + (relocs * sizeof(unsigned long)), &fpos); | ||
653 | } | ||
654 | } | 653 | } |
655 | if (IS_ERR_VALUE(result)) { | 654 | if (IS_ERR_VALUE(result)) { |
656 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); | 655 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ade03e6f7bd2..bb8b7a0e28a6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1514 | size_t count, ocount; | 1514 | size_t count, ocount; |
1515 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | 1515 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); |
1516 | 1516 | ||
1517 | sb_start_write(inode->i_sb); | ||
1518 | |||
1519 | mutex_lock(&inode->i_mutex); | 1517 | mutex_lock(&inode->i_mutex); |
1520 | 1518 | ||
1521 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | 1519 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); |
@@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1617 | if (sync) | 1615 | if (sync) |
1618 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1616 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1619 | out: | 1617 | out: |
1620 | sb_end_write(inode->i_sb); | ||
1621 | current->backing_dev_info = NULL; | 1618 | current->backing_dev_info = NULL; |
1622 | return num_written ? num_written : err; | 1619 | return num_written ? num_written : err; |
1623 | } | 1620 | } |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 480992259707..317f9ee9c991 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) | |||
962 | } | 962 | } |
963 | 963 | ||
964 | data = kmap(page); | 964 | data = kmap(page); |
965 | file_start_write(file); | ||
965 | old_fs = get_fs(); | 966 | old_fs = get_fs(); |
966 | set_fs(KERNEL_DS); | 967 | set_fs(KERNEL_DS); |
967 | ret = file->f_op->write( | 968 | ret = file->f_op->write( |
968 | file, (const void __user *) data, len, &pos); | 969 | file, (const void __user *) data, len, &pos); |
969 | set_fs(old_fs); | 970 | set_fs(old_fs); |
970 | kunmap(page); | 971 | kunmap(page); |
972 | file_end_write(file); | ||
971 | if (ret != len) | 973 | if (ret != len) |
972 | ret = -EIO; | 974 | ret = -EIO; |
973 | } | 975 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7a0dd99e4507..2d4a231dd70b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2520 | 2520 | ||
2521 | BUG_ON(iocb->ki_pos != pos); | 2521 | BUG_ON(iocb->ki_pos != pos); |
2522 | 2522 | ||
2523 | sb_start_write(inode->i_sb); | ||
2524 | |||
2525 | /* | 2523 | /* |
2526 | * We need to hold the sem to be sure nobody modifies lock list | 2524 | * We need to hold the sem to be sure nobody modifies lock list |
2527 | * with a brlock that prevents writing. | 2525 | * with a brlock that prevents writing. |
@@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2545 | } | 2543 | } |
2546 | 2544 | ||
2547 | up_read(&cinode->lock_sem); | 2545 | up_read(&cinode->lock_sem); |
2548 | sb_end_write(inode->i_sb); | ||
2549 | return rc; | 2546 | return rc; |
2550 | } | 2547 | } |
2551 | 2548 | ||
diff --git a/fs/coda/file.c b/fs/coda/file.c index fa4c100bdc7d..380b798f8443 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo | |||
79 | return -EINVAL; | 79 | return -EINVAL; |
80 | 80 | ||
81 | host_inode = file_inode(host_file); | 81 | host_inode = file_inode(host_file); |
82 | file_start_write(host_file); | ||
82 | mutex_lock(&coda_inode->i_mutex); | 83 | mutex_lock(&coda_inode->i_mutex); |
83 | 84 | ||
84 | ret = host_file->f_op->write(host_file, buf, count, ppos); | 85 | ret = host_file->f_op->write(host_file, buf, count, ppos); |
@@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo | |||
87 | coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; | 88 | coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; |
88 | coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; | 89 | coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; |
89 | mutex_unlock(&coda_inode->i_mutex); | 90 | mutex_unlock(&coda_inode->i_mutex); |
91 | file_end_write(host_file); | ||
90 | 92 | ||
91 | return ret; | 93 | return ret; |
92 | } | 94 | } |
diff --git a/fs/compat.c b/fs/compat.c index 5f83ffa42115..d0560c93973d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1068,190 +1068,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1068 | } | 1068 | } |
1069 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1069 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ |
1070 | 1070 | ||
1071 | static ssize_t compat_do_readv_writev(int type, struct file *file, | ||
1072 | const struct compat_iovec __user *uvector, | ||
1073 | unsigned long nr_segs, loff_t *pos) | ||
1074 | { | ||
1075 | compat_ssize_t tot_len; | ||
1076 | struct iovec iovstack[UIO_FASTIOV]; | ||
1077 | struct iovec *iov = iovstack; | ||
1078 | ssize_t ret; | ||
1079 | io_fn_t fn; | ||
1080 | iov_fn_t fnv; | ||
1081 | |||
1082 | ret = -EINVAL; | ||
1083 | if (!file->f_op) | ||
1084 | goto out; | ||
1085 | |||
1086 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, | ||
1087 | UIO_FASTIOV, iovstack, &iov); | ||
1088 | if (ret <= 0) | ||
1089 | goto out; | ||
1090 | |||
1091 | tot_len = ret; | ||
1092 | ret = rw_verify_area(type, file, pos, tot_len); | ||
1093 | if (ret < 0) | ||
1094 | goto out; | ||
1095 | |||
1096 | fnv = NULL; | ||
1097 | if (type == READ) { | ||
1098 | fn = file->f_op->read; | ||
1099 | fnv = file->f_op->aio_read; | ||
1100 | } else { | ||
1101 | fn = (io_fn_t)file->f_op->write; | ||
1102 | fnv = file->f_op->aio_write; | ||
1103 | } | ||
1104 | |||
1105 | if (fnv) | ||
1106 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | ||
1107 | pos, fnv); | ||
1108 | else | ||
1109 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | ||
1110 | |||
1111 | out: | ||
1112 | if (iov != iovstack) | ||
1113 | kfree(iov); | ||
1114 | if ((ret + (type == READ)) > 0) { | ||
1115 | if (type == READ) | ||
1116 | fsnotify_access(file); | ||
1117 | else | ||
1118 | fsnotify_modify(file); | ||
1119 | } | ||
1120 | return ret; | ||
1121 | } | ||
1122 | |||
1123 | static size_t compat_readv(struct file *file, | ||
1124 | const struct compat_iovec __user *vec, | ||
1125 | unsigned long vlen, loff_t *pos) | ||
1126 | { | ||
1127 | ssize_t ret = -EBADF; | ||
1128 | |||
1129 | if (!(file->f_mode & FMODE_READ)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ret = -EINVAL; | ||
1133 | if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) | ||
1134 | goto out; | ||
1135 | |||
1136 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); | ||
1137 | |||
1138 | out: | ||
1139 | if (ret > 0) | ||
1140 | add_rchar(current, ret); | ||
1141 | inc_syscr(current); | ||
1142 | return ret; | ||
1143 | } | ||
1144 | |||
1145 | asmlinkage ssize_t | ||
1146 | compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, | ||
1147 | unsigned long vlen) | ||
1148 | { | ||
1149 | struct fd f = fdget(fd); | ||
1150 | ssize_t ret; | ||
1151 | loff_t pos; | ||
1152 | |||
1153 | if (!f.file) | ||
1154 | return -EBADF; | ||
1155 | pos = f.file->f_pos; | ||
1156 | ret = compat_readv(f.file, vec, vlen, &pos); | ||
1157 | f.file->f_pos = pos; | ||
1158 | fdput(f); | ||
1159 | return ret; | ||
1160 | } | ||
1161 | |||
1162 | asmlinkage ssize_t | ||
1163 | compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, | ||
1164 | unsigned long vlen, loff_t pos) | ||
1165 | { | ||
1166 | struct fd f; | ||
1167 | ssize_t ret; | ||
1168 | |||
1169 | if (pos < 0) | ||
1170 | return -EINVAL; | ||
1171 | f = fdget(fd); | ||
1172 | if (!f.file) | ||
1173 | return -EBADF; | ||
1174 | ret = -ESPIPE; | ||
1175 | if (f.file->f_mode & FMODE_PREAD) | ||
1176 | ret = compat_readv(f.file, vec, vlen, &pos); | ||
1177 | fdput(f); | ||
1178 | return ret; | ||
1179 | } | ||
1180 | |||
1181 | asmlinkage ssize_t | ||
1182 | compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec, | ||
1183 | unsigned long vlen, u32 pos_low, u32 pos_high) | ||
1184 | { | ||
1185 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1186 | return compat_sys_preadv64(fd, vec, vlen, pos); | ||
1187 | } | ||
1188 | |||
1189 | static size_t compat_writev(struct file *file, | ||
1190 | const struct compat_iovec __user *vec, | ||
1191 | unsigned long vlen, loff_t *pos) | ||
1192 | { | ||
1193 | ssize_t ret = -EBADF; | ||
1194 | |||
1195 | if (!(file->f_mode & FMODE_WRITE)) | ||
1196 | goto out; | ||
1197 | |||
1198 | ret = -EINVAL; | ||
1199 | if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) | ||
1200 | goto out; | ||
1201 | |||
1202 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); | ||
1203 | |||
1204 | out: | ||
1205 | if (ret > 0) | ||
1206 | add_wchar(current, ret); | ||
1207 | inc_syscw(current); | ||
1208 | return ret; | ||
1209 | } | ||
1210 | |||
1211 | asmlinkage ssize_t | ||
1212 | compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, | ||
1213 | unsigned long vlen) | ||
1214 | { | ||
1215 | struct fd f = fdget(fd); | ||
1216 | ssize_t ret; | ||
1217 | loff_t pos; | ||
1218 | |||
1219 | if (!f.file) | ||
1220 | return -EBADF; | ||
1221 | pos = f.file->f_pos; | ||
1222 | ret = compat_writev(f.file, vec, vlen, &pos); | ||
1223 | f.file->f_pos = pos; | ||
1224 | fdput(f); | ||
1225 | return ret; | ||
1226 | } | ||
1227 | |||
1228 | asmlinkage ssize_t | ||
1229 | compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, | ||
1230 | unsigned long vlen, loff_t pos) | ||
1231 | { | ||
1232 | struct fd f; | ||
1233 | ssize_t ret; | ||
1234 | |||
1235 | if (pos < 0) | ||
1236 | return -EINVAL; | ||
1237 | f = fdget(fd); | ||
1238 | if (!f.file) | ||
1239 | return -EBADF; | ||
1240 | ret = -ESPIPE; | ||
1241 | if (f.file->f_mode & FMODE_PWRITE) | ||
1242 | ret = compat_writev(f.file, vec, vlen, &pos); | ||
1243 | fdput(f); | ||
1244 | return ret; | ||
1245 | } | ||
1246 | |||
1247 | asmlinkage ssize_t | ||
1248 | compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, | ||
1249 | unsigned long vlen, u32 pos_low, u32 pos_high) | ||
1250 | { | ||
1251 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1252 | return compat_sys_pwritev64(fd, vec, vlen, pos); | ||
1253 | } | ||
1254 | |||
1255 | /* | 1071 | /* |
1256 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the | 1072 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the |
1257 | * O_LARGEFILE flag. | 1073 | * O_LARGEFILE flag. |
diff --git a/fs/coredump.c b/fs/coredump.c index ec306cc9a28a..a9abe313e8d5 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -432,9 +432,7 @@ static bool dump_interrupted(void) | |||
432 | 432 | ||
433 | static void wait_for_dump_helpers(struct file *file) | 433 | static void wait_for_dump_helpers(struct file *file) |
434 | { | 434 | { |
435 | struct pipe_inode_info *pipe; | 435 | struct pipe_inode_info *pipe = file->private_data; |
436 | |||
437 | pipe = file_inode(file)->i_pipe; | ||
438 | 436 | ||
439 | pipe_lock(pipe); | 437 | pipe_lock(pipe); |
440 | pipe->readers++; | 438 | pipe->readers++; |
@@ -656,7 +654,9 @@ void do_coredump(siginfo_t *siginfo) | |||
656 | goto close_fail; | 654 | goto close_fail; |
657 | if (displaced) | 655 | if (displaced) |
658 | put_files_struct(displaced); | 656 | put_files_struct(displaced); |
657 | file_start_write(cprm.file); | ||
659 | core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm); | 658 | core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm); |
659 | file_end_write(cprm.file); | ||
660 | 660 | ||
661 | if (ispipe && core_pipe_limit) | 661 | if (ispipe && core_pipe_limit) |
662 | wait_for_dump_helpers(cprm.file); | 662 | wait_for_dump_helpers(cprm.file); |
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index ede07fc7309f..bfb531564319 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/efi.h> | 10 | #include <linux/efi.h> |
11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
12 | #include <linux/slab.h> | ||
12 | 13 | ||
13 | #include "internal.h" | 14 | #include "internal.h" |
14 | 15 | ||
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 640e289d522e..7e787fb90293 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/efi.h> | 10 | #include <linux/efi.h> |
11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
12 | #include <linux/ctype.h> | 12 | #include <linux/ctype.h> |
13 | #include <linux/slab.h> | ||
13 | 14 | ||
14 | #include "internal.h" | 15 | #include "internal.h" |
15 | 16 | ||
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 525a2a1ac16c..141aee31884f 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/ucs2_string.h> | 15 | #include <linux/ucs2_string.h> |
16 | #include <linux/slab.h> | ||
17 | #include <linux/magic.h> | ||
16 | 18 | ||
17 | #include "internal.h" | 19 | #include "internal.h" |
18 | 20 | ||
@@ -802,6 +802,15 @@ int kernel_read(struct file *file, loff_t offset, | |||
802 | 802 | ||
803 | EXPORT_SYMBOL(kernel_read); | 803 | EXPORT_SYMBOL(kernel_read); |
804 | 804 | ||
805 | ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) | ||
806 | { | ||
807 | ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos); | ||
808 | if (res > 0) | ||
809 | flush_icache_range(addr, addr + len); | ||
810 | return res; | ||
811 | } | ||
812 | EXPORT_SYMBOL(read_code); | ||
813 | |||
805 | static int exec_mmap(struct mm_struct *mm) | 814 | static int exec_mmap(struct mm_struct *mm) |
806 | { | 815 | { |
807 | struct task_struct *tsk; | 816 | struct task_struct *tsk; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a11ea4d6164c..b1ed9e07434b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2260,7 +2260,7 @@ static const struct seq_operations ext4_mb_seq_groups_ops = { | |||
2260 | 2260 | ||
2261 | static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | 2261 | static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) |
2262 | { | 2262 | { |
2263 | struct super_block *sb = PDE(inode)->data; | 2263 | struct super_block *sb = PDE_DATA(inode); |
2264 | int rc; | 2264 | int rc; |
2265 | 2265 | ||
2266 | rc = seq_open(file, &ext4_mb_seq_groups_ops); | 2266 | rc = seq_open(file, &ext4_mb_seq_groups_ops); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dbc7c090c13a..24a146bde742 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1806,7 +1806,7 @@ static int options_seq_show(struct seq_file *seq, void *offset) | |||
1806 | 1806 | ||
1807 | static int options_open_fs(struct inode *inode, struct file *file) | 1807 | static int options_open_fs(struct inode *inode, struct file *file) |
1808 | { | 1808 | { |
1809 | return single_open(file, options_seq_show, PDE(inode)->data); | 1809 | return single_open(file, options_seq_show, PDE_DATA(inode)); |
1810 | } | 1810 | } |
1811 | 1811 | ||
1812 | static const struct file_operations ext4_seq_options_fops = { | 1812 | static const struct file_operations ext4_seq_options_fops = { |
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 137af4255da6..44abc2f286e0 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
@@ -299,7 +299,7 @@ int f2fs_acl_chmod(struct inode *inode) | |||
299 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 299 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
300 | struct posix_acl *acl; | 300 | struct posix_acl *acl; |
301 | int error; | 301 | int error; |
302 | mode_t mode = get_inode_mode(inode); | 302 | umode_t mode = get_inode_mode(inode); |
303 | 303 | ||
304 | if (!test_opt(sbi, POSIX_ACL)) | 304 | if (!test_opt(sbi, POSIX_ACL)) |
305 | return 0; | 305 | return 0; |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a1f38443ecee..1be948768e2f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -60,7 +60,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
60 | 60 | ||
61 | static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) | 61 | static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) |
62 | { | 62 | { |
63 | mode_t mode = inode->i_mode; | 63 | umode_t mode = inode->i_mode; |
64 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | 64 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 958a46da19ae..db626282d424 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -590,7 +590,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
590 | { | 590 | { |
591 | unsigned int oldflags; | 591 | unsigned int oldflags; |
592 | 592 | ||
593 | ret = mnt_want_write(filp->f_path.mnt); | 593 | ret = mnt_want_write_file(filp); |
594 | if (ret) | 594 | if (ret) |
595 | return ret; | 595 | return ret; |
596 | 596 | ||
@@ -627,7 +627,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
627 | inode->i_ctime = CURRENT_TIME; | 627 | inode->i_ctime = CURRENT_TIME; |
628 | mark_inode_dirty(inode); | 628 | mark_inode_dirty(inode); |
629 | out: | 629 | out: |
630 | mnt_drop_write(filp->f_path.mnt); | 630 | mnt_drop_write_file(filp); |
631 | return ret; | 631 | return ret; |
632 | } | 632 | } |
633 | default: | 633 | default: |
diff --git a/fs/fifo.c b/fs/fifo.c deleted file mode 100644 index cf6f4345ceb0..000000000000 --- a/fs/fifo.c +++ /dev/null | |||
@@ -1,153 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/fifo.c | ||
3 | * | ||
4 | * written by Paul H. Hargrove | ||
5 | * | ||
6 | * Fixes: | ||
7 | * 10-06-1999, AV: fixed OOM handling in fifo_open(), moved | ||
8 | * initialization there, switched to external | ||
9 | * allocation of pipe_inode_info. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mm.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/pipe_fs_i.h> | ||
16 | |||
17 | static int wait_for_partner(struct inode* inode, unsigned int *cnt) | ||
18 | { | ||
19 | int cur = *cnt; | ||
20 | |||
21 | while (cur == *cnt) { | ||
22 | pipe_wait(inode->i_pipe); | ||
23 | if (signal_pending(current)) | ||
24 | break; | ||
25 | } | ||
26 | return cur == *cnt ? -ERESTARTSYS : 0; | ||
27 | } | ||
28 | |||
29 | static void wake_up_partner(struct inode* inode) | ||
30 | { | ||
31 | wake_up_interruptible(&inode->i_pipe->wait); | ||
32 | } | ||
33 | |||
34 | static int fifo_open(struct inode *inode, struct file *filp) | ||
35 | { | ||
36 | struct pipe_inode_info *pipe; | ||
37 | int ret; | ||
38 | |||
39 | mutex_lock(&inode->i_mutex); | ||
40 | pipe = inode->i_pipe; | ||
41 | if (!pipe) { | ||
42 | ret = -ENOMEM; | ||
43 | pipe = alloc_pipe_info(inode); | ||
44 | if (!pipe) | ||
45 | goto err_nocleanup; | ||
46 | inode->i_pipe = pipe; | ||
47 | } | ||
48 | filp->f_version = 0; | ||
49 | |||
50 | /* We can only do regular read/write on fifos */ | ||
51 | filp->f_mode &= (FMODE_READ | FMODE_WRITE); | ||
52 | |||
53 | switch (filp->f_mode) { | ||
54 | case FMODE_READ: | ||
55 | /* | ||
56 | * O_RDONLY | ||
57 | * POSIX.1 says that O_NONBLOCK means return with the FIFO | ||
58 | * opened, even when there is no process writing the FIFO. | ||
59 | */ | ||
60 | filp->f_op = &read_pipefifo_fops; | ||
61 | pipe->r_counter++; | ||
62 | if (pipe->readers++ == 0) | ||
63 | wake_up_partner(inode); | ||
64 | |||
65 | if (!pipe->writers) { | ||
66 | if ((filp->f_flags & O_NONBLOCK)) { | ||
67 | /* suppress POLLHUP until we have | ||
68 | * seen a writer */ | ||
69 | filp->f_version = pipe->w_counter; | ||
70 | } else { | ||
71 | if (wait_for_partner(inode, &pipe->w_counter)) | ||
72 | goto err_rd; | ||
73 | } | ||
74 | } | ||
75 | break; | ||
76 | |||
77 | case FMODE_WRITE: | ||
78 | /* | ||
79 | * O_WRONLY | ||
80 | * POSIX.1 says that O_NONBLOCK means return -1 with | ||
81 | * errno=ENXIO when there is no process reading the FIFO. | ||
82 | */ | ||
83 | ret = -ENXIO; | ||
84 | if ((filp->f_flags & O_NONBLOCK) && !pipe->readers) | ||
85 | goto err; | ||
86 | |||
87 | filp->f_op = &write_pipefifo_fops; | ||
88 | pipe->w_counter++; | ||
89 | if (!pipe->writers++) | ||
90 | wake_up_partner(inode); | ||
91 | |||
92 | if (!pipe->readers) { | ||
93 | if (wait_for_partner(inode, &pipe->r_counter)) | ||
94 | goto err_wr; | ||
95 | } | ||
96 | break; | ||
97 | |||
98 | case FMODE_READ | FMODE_WRITE: | ||
99 | /* | ||
100 | * O_RDWR | ||
101 | * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. | ||
102 | * This implementation will NEVER block on a O_RDWR open, since | ||
103 | * the process can at least talk to itself. | ||
104 | */ | ||
105 | filp->f_op = &rdwr_pipefifo_fops; | ||
106 | |||
107 | pipe->readers++; | ||
108 | pipe->writers++; | ||
109 | pipe->r_counter++; | ||
110 | pipe->w_counter++; | ||
111 | if (pipe->readers == 1 || pipe->writers == 1) | ||
112 | wake_up_partner(inode); | ||
113 | break; | ||
114 | |||
115 | default: | ||
116 | ret = -EINVAL; | ||
117 | goto err; | ||
118 | } | ||
119 | |||
120 | /* Ok! */ | ||
121 | mutex_unlock(&inode->i_mutex); | ||
122 | return 0; | ||
123 | |||
124 | err_rd: | ||
125 | if (!--pipe->readers) | ||
126 | wake_up_interruptible(&pipe->wait); | ||
127 | ret = -ERESTARTSYS; | ||
128 | goto err; | ||
129 | |||
130 | err_wr: | ||
131 | if (!--pipe->writers) | ||
132 | wake_up_interruptible(&pipe->wait); | ||
133 | ret = -ERESTARTSYS; | ||
134 | goto err; | ||
135 | |||
136 | err: | ||
137 | if (!pipe->readers && !pipe->writers) | ||
138 | free_pipe_info(inode); | ||
139 | |||
140 | err_nocleanup: | ||
141 | mutex_unlock(&inode->i_mutex); | ||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Dummy default file-operations: the only thing this does | ||
147 | * is contain the open that then fills in the correct operations | ||
148 | * depending on the access mode of the file... | ||
149 | */ | ||
150 | const struct file_operations def_fifo_fops = { | ||
151 | .open = fifo_open, /* will set read_ or write_pipefifo_fops */ | ||
152 | .llseek = noop_llseek, | ||
153 | }; | ||
@@ -23,24 +23,10 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | 25 | ||
26 | struct fdtable_defer { | ||
27 | spinlock_t lock; | ||
28 | struct work_struct wq; | ||
29 | struct fdtable *next; | ||
30 | }; | ||
31 | |||
32 | int sysctl_nr_open __read_mostly = 1024*1024; | 26 | int sysctl_nr_open __read_mostly = 1024*1024; |
33 | int sysctl_nr_open_min = BITS_PER_LONG; | 27 | int sysctl_nr_open_min = BITS_PER_LONG; |
34 | int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | 28 | int sysctl_nr_open_max = 1024 * 1024; /* raised later */ |
35 | 29 | ||
36 | /* | ||
37 | * We use this list to defer free fdtables that have vmalloced | ||
38 | * sets/arrays. By keeping a per-cpu list, we avoid having to embed | ||
39 | * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in | ||
40 | * this per-task structure. | ||
41 | */ | ||
42 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | ||
43 | |||
44 | static void *alloc_fdmem(size_t size) | 30 | static void *alloc_fdmem(size_t size) |
45 | { | 31 | { |
46 | /* | 32 | /* |
@@ -67,46 +53,9 @@ static void __free_fdtable(struct fdtable *fdt) | |||
67 | kfree(fdt); | 53 | kfree(fdt); |
68 | } | 54 | } |
69 | 55 | ||
70 | static void free_fdtable_work(struct work_struct *work) | ||
71 | { | ||
72 | struct fdtable_defer *f = | ||
73 | container_of(work, struct fdtable_defer, wq); | ||
74 | struct fdtable *fdt; | ||
75 | |||
76 | spin_lock_bh(&f->lock); | ||
77 | fdt = f->next; | ||
78 | f->next = NULL; | ||
79 | spin_unlock_bh(&f->lock); | ||
80 | while(fdt) { | ||
81 | struct fdtable *next = fdt->next; | ||
82 | |||
83 | __free_fdtable(fdt); | ||
84 | fdt = next; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | static void free_fdtable_rcu(struct rcu_head *rcu) | 56 | static void free_fdtable_rcu(struct rcu_head *rcu) |
89 | { | 57 | { |
90 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); | 58 | __free_fdtable(container_of(rcu, struct fdtable, rcu)); |
91 | struct fdtable_defer *fddef; | ||
92 | |||
93 | BUG_ON(!fdt); | ||
94 | BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); | ||
95 | |||
96 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { | ||
97 | kfree(fdt->fd); | ||
98 | kfree(fdt->open_fds); | ||
99 | kfree(fdt); | ||
100 | } else { | ||
101 | fddef = &get_cpu_var(fdtable_defer_list); | ||
102 | spin_lock(&fddef->lock); | ||
103 | fdt->next = fddef->next; | ||
104 | fddef->next = fdt; | ||
105 | /* vmallocs are handled from the workqueue context */ | ||
106 | schedule_work(&fddef->wq); | ||
107 | spin_unlock(&fddef->lock); | ||
108 | put_cpu_var(fdtable_defer_list); | ||
109 | } | ||
110 | } | 59 | } |
111 | 60 | ||
112 | /* | 61 | /* |
@@ -174,7 +123,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
174 | fdt->open_fds = data; | 123 | fdt->open_fds = data; |
175 | data += nr / BITS_PER_BYTE; | 124 | data += nr / BITS_PER_BYTE; |
176 | fdt->close_on_exec = data; | 125 | fdt->close_on_exec = data; |
177 | fdt->next = NULL; | ||
178 | 126 | ||
179 | return fdt; | 127 | return fdt; |
180 | 128 | ||
@@ -221,7 +169,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
221 | /* Continue as planned */ | 169 | /* Continue as planned */ |
222 | copy_fdtable(new_fdt, cur_fdt); | 170 | copy_fdtable(new_fdt, cur_fdt); |
223 | rcu_assign_pointer(files->fdt, new_fdt); | 171 | rcu_assign_pointer(files->fdt, new_fdt); |
224 | if (cur_fdt->max_fds > NR_OPEN_DEFAULT) | 172 | if (cur_fdt != &files->fdtab) |
225 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); | 173 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
226 | } else { | 174 | } else { |
227 | /* Somebody else expanded, so undo our attempt */ | 175 | /* Somebody else expanded, so undo our attempt */ |
@@ -316,7 +264,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
316 | new_fdt->close_on_exec = newf->close_on_exec_init; | 264 | new_fdt->close_on_exec = newf->close_on_exec_init; |
317 | new_fdt->open_fds = newf->open_fds_init; | 265 | new_fdt->open_fds = newf->open_fds_init; |
318 | new_fdt->fd = &newf->fd_array[0]; | 266 | new_fdt->fd = &newf->fd_array[0]; |
319 | new_fdt->next = NULL; | ||
320 | 267 | ||
321 | spin_lock(&oldf->file_lock); | 268 | spin_lock(&oldf->file_lock); |
322 | old_fdt = files_fdtable(oldf); | 269 | old_fdt = files_fdtable(oldf); |
@@ -490,19 +437,8 @@ void exit_files(struct task_struct *tsk) | |||
490 | } | 437 | } |
491 | } | 438 | } |
492 | 439 | ||
493 | static void fdtable_defer_list_init(int cpu) | ||
494 | { | ||
495 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | ||
496 | spin_lock_init(&fddef->lock); | ||
497 | INIT_WORK(&fddef->wq, free_fdtable_work); | ||
498 | fddef->next = NULL; | ||
499 | } | ||
500 | |||
501 | void __init files_defer_init(void) | 440 | void __init files_defer_init(void) |
502 | { | 441 | { |
503 | int i; | ||
504 | for_each_possible_cpu(i) | ||
505 | fdtable_defer_list_init(i); | ||
506 | sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & | 442 | sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & |
507 | -BITS_PER_LONG; | 443 | -BITS_PER_LONG; |
508 | } | 444 | } |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 11dfa0c3fb46..9bfd1a3214e6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -1319,7 +1319,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | |||
1319 | page_nr++; | 1319 | page_nr++; |
1320 | ret += buf->len; | 1320 | ret += buf->len; |
1321 | 1321 | ||
1322 | if (pipe->inode) | 1322 | if (pipe->files) |
1323 | do_wakeup = 1; | 1323 | do_wakeup = 1; |
1324 | } | 1324 | } |
1325 | 1325 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34b80ba95bad..d15c6f21c17f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
971 | return err; | 971 | return err; |
972 | 972 | ||
973 | count = ocount; | 973 | count = ocount; |
974 | sb_start_write(inode->i_sb); | ||
975 | mutex_lock(&inode->i_mutex); | 974 | mutex_lock(&inode->i_mutex); |
976 | 975 | ||
977 | /* We can write back this queue in page reclaim */ | 976 | /* We can write back this queue in page reclaim */ |
@@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1030 | out: | 1029 | out: |
1031 | current->backing_dev_info = NULL; | 1030 | current->backing_dev_info = NULL; |
1032 | mutex_unlock(&inode->i_mutex); | 1031 | mutex_unlock(&inode->i_mutex); |
1033 | sb_end_write(inode->i_sb); | ||
1034 | 1032 | ||
1035 | return written ? written : err; | 1033 | return written ? written : err; |
1036 | } | 1034 | } |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 9f9dbeceeee7..3027f4dbbab5 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -131,6 +131,24 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, | |||
131 | return ret; | 131 | return ret; |
132 | } | 132 | } |
133 | 133 | ||
134 | static int hpfs_write_end(struct file *file, struct address_space *mapping, | ||
135 | loff_t pos, unsigned len, unsigned copied, | ||
136 | struct page *pagep, void *fsdata) | ||
137 | { | ||
138 | struct inode *inode = mapping->host; | ||
139 | int err; | ||
140 | err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); | ||
141 | if (err < len) | ||
142 | hpfs_write_failed(mapping, pos + len); | ||
143 | if (!(err < 0)) { | ||
144 | /* make sure we write it on close, if not earlier */ | ||
145 | hpfs_lock(inode->i_sb); | ||
146 | hpfs_i(inode)->i_dirty = 1; | ||
147 | hpfs_unlock(inode->i_sb); | ||
148 | } | ||
149 | return err; | ||
150 | } | ||
151 | |||
134 | static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) | 152 | static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) |
135 | { | 153 | { |
136 | return generic_block_bmap(mapping,block,hpfs_get_block); | 154 | return generic_block_bmap(mapping,block,hpfs_get_block); |
@@ -140,30 +158,16 @@ const struct address_space_operations hpfs_aops = { | |||
140 | .readpage = hpfs_readpage, | 158 | .readpage = hpfs_readpage, |
141 | .writepage = hpfs_writepage, | 159 | .writepage = hpfs_writepage, |
142 | .write_begin = hpfs_write_begin, | 160 | .write_begin = hpfs_write_begin, |
143 | .write_end = generic_write_end, | 161 | .write_end = hpfs_write_end, |
144 | .bmap = _hpfs_bmap | 162 | .bmap = _hpfs_bmap |
145 | }; | 163 | }; |
146 | 164 | ||
147 | static ssize_t hpfs_file_write(struct file *file, const char __user *buf, | ||
148 | size_t count, loff_t *ppos) | ||
149 | { | ||
150 | ssize_t retval; | ||
151 | |||
152 | retval = do_sync_write(file, buf, count, ppos); | ||
153 | if (retval > 0) { | ||
154 | hpfs_lock(file->f_path.dentry->d_sb); | ||
155 | hpfs_i(file_inode(file))->i_dirty = 1; | ||
156 | hpfs_unlock(file->f_path.dentry->d_sb); | ||
157 | } | ||
158 | return retval; | ||
159 | } | ||
160 | |||
161 | const struct file_operations hpfs_file_ops = | 165 | const struct file_operations hpfs_file_ops = |
162 | { | 166 | { |
163 | .llseek = generic_file_llseek, | 167 | .llseek = generic_file_llseek, |
164 | .read = do_sync_read, | 168 | .read = do_sync_read, |
165 | .aio_read = generic_file_aio_read, | 169 | .aio_read = generic_file_aio_read, |
166 | .write = hpfs_file_write, | 170 | .write = do_sync_write, |
167 | .aio_write = generic_file_aio_write, | 171 | .aio_write = generic_file_aio_write, |
168 | .mmap = generic_file_mmap, | 172 | .mmap = generic_file_mmap, |
169 | .release = hpfs_file_release, | 173 | .release = hpfs_file_release, |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 126d3c2e2dee..cd3e38972c86 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -436,7 +436,6 @@ static int hppfs_open(struct inode *inode, struct file *file) | |||
436 | path.mnt = inode->i_sb->s_fs_info; | 436 | path.mnt = inode->i_sb->s_fs_info; |
437 | path.dentry = HPPFS_I(inode)->proc_dentry; | 437 | path.dentry = HPPFS_I(inode)->proc_dentry; |
438 | 438 | ||
439 | /* XXX This isn't closed anywhere */ | ||
440 | data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); | 439 | data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); |
441 | err = PTR_ERR(data->proc_file); | 440 | err = PTR_ERR(data->proc_file); |
442 | if (IS_ERR(data->proc_file)) | 441 | if (IS_ERR(data->proc_file)) |
@@ -523,12 +522,23 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where) | |||
523 | return default_llseek(file, off, where); | 522 | return default_llseek(file, off, where); |
524 | } | 523 | } |
525 | 524 | ||
525 | static int hppfs_release(struct inode *inode, struct file *file) | ||
526 | { | ||
527 | struct hppfs_private *data = file->private_data; | ||
528 | struct file *proc_file = data->proc_file; | ||
529 | if (proc_file) | ||
530 | fput(proc_file); | ||
531 | kfree(data); | ||
532 | return 0; | ||
533 | } | ||
534 | |||
526 | static const struct file_operations hppfs_file_fops = { | 535 | static const struct file_operations hppfs_file_fops = { |
527 | .owner = NULL, | 536 | .owner = NULL, |
528 | .llseek = hppfs_llseek, | 537 | .llseek = hppfs_llseek, |
529 | .read = hppfs_read, | 538 | .read = hppfs_read, |
530 | .write = hppfs_write, | 539 | .write = hppfs_write, |
531 | .open = hppfs_open, | 540 | .open = hppfs_open, |
541 | .release = hppfs_release, | ||
532 | }; | 542 | }; |
533 | 543 | ||
534 | struct hppfs_dirent { | 544 | struct hppfs_dirent { |
@@ -570,18 +580,12 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
570 | return err; | 580 | return err; |
571 | } | 581 | } |
572 | 582 | ||
573 | static int hppfs_fsync(struct file *file, loff_t start, loff_t end, | ||
574 | int datasync) | ||
575 | { | ||
576 | return filemap_write_and_wait_range(file->f_mapping, start, end); | ||
577 | } | ||
578 | |||
579 | static const struct file_operations hppfs_dir_fops = { | 583 | static const struct file_operations hppfs_dir_fops = { |
580 | .owner = NULL, | 584 | .owner = NULL, |
581 | .readdir = hppfs_readdir, | 585 | .readdir = hppfs_readdir, |
582 | .open = hppfs_dir_open, | 586 | .open = hppfs_dir_open, |
583 | .fsync = hppfs_fsync, | ||
584 | .llseek = default_llseek, | 587 | .llseek = default_llseek, |
588 | .release = hppfs_release, | ||
585 | }; | 589 | }; |
586 | 590 | ||
587 | static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) | 591 | static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) |
diff --git a/fs/inode.c b/fs/inode.c index a898b3d43ccf..00d5fc3b86e1 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1803,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1803 | inode->i_fop = &def_blk_fops; | 1803 | inode->i_fop = &def_blk_fops; |
1804 | inode->i_rdev = rdev; | 1804 | inode->i_rdev = rdev; |
1805 | } else if (S_ISFIFO(mode)) | 1805 | } else if (S_ISFIFO(mode)) |
1806 | inode->i_fop = &def_fifo_fops; | 1806 | inode->i_fop = &pipefifo_fops; |
1807 | else if (S_ISSOCK(mode)) | 1807 | else if (S_ISSOCK(mode)) |
1808 | inode->i_fop = &bad_sock_fops; | 1808 | inode->i_fop = &bad_sock_fops; |
1809 | else | 1809 | else |
diff --git a/fs/internal.h b/fs/internal.h index 4be78237d896..eaa75f75b625 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -130,3 +130,8 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | |||
130 | * read_write.c | 130 | * read_write.c |
131 | */ | 131 | */ |
132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | 132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); |
133 | |||
134 | /* | ||
135 | * pipe.c | ||
136 | */ | ||
137 | extern const struct file_operations pipefifo_fops; | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f6c5ba027f4f..95457576e434 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -979,7 +979,7 @@ static const struct seq_operations jbd2_seq_info_ops = { | |||
979 | 979 | ||
980 | static int jbd2_seq_info_open(struct inode *inode, struct file *file) | 980 | static int jbd2_seq_info_open(struct inode *inode, struct file *file) |
981 | { | 981 | { |
982 | journal_t *journal = PDE(inode)->data; | 982 | journal_t *journal = PDE_DATA(inode); |
983 | struct jbd2_stats_proc_session *s; | 983 | struct jbd2_stats_proc_session *s; |
984 | int rc, size; | 984 | int rc, size; |
985 | 985 | ||
diff --git a/fs/mount.h b/fs/mount.h index cd5007980400..64a858143ff9 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -18,6 +18,12 @@ struct mnt_pcp { | |||
18 | int mnt_writers; | 18 | int mnt_writers; |
19 | }; | 19 | }; |
20 | 20 | ||
21 | struct mountpoint { | ||
22 | struct list_head m_hash; | ||
23 | struct dentry *m_dentry; | ||
24 | int m_count; | ||
25 | }; | ||
26 | |||
21 | struct mount { | 27 | struct mount { |
22 | struct list_head mnt_hash; | 28 | struct list_head mnt_hash; |
23 | struct mount *mnt_parent; | 29 | struct mount *mnt_parent; |
@@ -40,6 +46,7 @@ struct mount { | |||
40 | struct list_head mnt_slave; /* slave list entry */ | 46 | struct list_head mnt_slave; /* slave list entry */ |
41 | struct mount *mnt_master; /* slave is on master->mnt_slave_list */ | 47 | struct mount *mnt_master; /* slave is on master->mnt_slave_list */ |
42 | struct mnt_namespace *mnt_ns; /* containing namespace */ | 48 | struct mnt_namespace *mnt_ns; /* containing namespace */ |
49 | struct mountpoint *mnt_mp; /* where is it mounted */ | ||
43 | #ifdef CONFIG_FSNOTIFY | 50 | #ifdef CONFIG_FSNOTIFY |
44 | struct hlist_head mnt_fsnotify_marks; | 51 | struct hlist_head mnt_fsnotify_marks; |
45 | __u32 mnt_fsnotify_mask; | 52 | __u32 mnt_fsnotify_mask; |
diff --git a/fs/namespace.c b/fs/namespace.c index 341d3f564082..b4f96a5230a3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -21,7 +21,8 @@ | |||
21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_ns.h> |
25 | #include <linux/magic.h> | ||
25 | #include "pnode.h" | 26 | #include "pnode.h" |
26 | #include "internal.h" | 27 | #include "internal.h" |
27 | 28 | ||
@@ -36,6 +37,7 @@ static int mnt_id_start = 0; | |||
36 | static int mnt_group_start = 1; | 37 | static int mnt_group_start = 1; |
37 | 38 | ||
38 | static struct list_head *mount_hashtable __read_mostly; | 39 | static struct list_head *mount_hashtable __read_mostly; |
40 | static struct list_head *mountpoint_hashtable __read_mostly; | ||
39 | static struct kmem_cache *mnt_cache __read_mostly; | 41 | static struct kmem_cache *mnt_cache __read_mostly; |
40 | static struct rw_semaphore namespace_sem; | 42 | static struct rw_semaphore namespace_sem; |
41 | 43 | ||
@@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path) | |||
605 | } | 607 | } |
606 | } | 608 | } |
607 | 609 | ||
610 | static struct mountpoint *new_mountpoint(struct dentry *dentry) | ||
611 | { | ||
612 | struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); | ||
613 | struct mountpoint *mp; | ||
614 | |||
615 | list_for_each_entry(mp, chain, m_hash) { | ||
616 | if (mp->m_dentry == dentry) { | ||
617 | /* might be worth a WARN_ON() */ | ||
618 | if (d_unlinked(dentry)) | ||
619 | return ERR_PTR(-ENOENT); | ||
620 | mp->m_count++; | ||
621 | return mp; | ||
622 | } | ||
623 | } | ||
624 | |||
625 | mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); | ||
626 | if (!mp) | ||
627 | return ERR_PTR(-ENOMEM); | ||
628 | |||
629 | spin_lock(&dentry->d_lock); | ||
630 | if (d_unlinked(dentry)) { | ||
631 | spin_unlock(&dentry->d_lock); | ||
632 | kfree(mp); | ||
633 | return ERR_PTR(-ENOENT); | ||
634 | } | ||
635 | dentry->d_flags |= DCACHE_MOUNTED; | ||
636 | spin_unlock(&dentry->d_lock); | ||
637 | mp->m_dentry = dentry; | ||
638 | mp->m_count = 1; | ||
639 | list_add(&mp->m_hash, chain); | ||
640 | return mp; | ||
641 | } | ||
642 | |||
643 | static void put_mountpoint(struct mountpoint *mp) | ||
644 | { | ||
645 | if (!--mp->m_count) { | ||
646 | struct dentry *dentry = mp->m_dentry; | ||
647 | spin_lock(&dentry->d_lock); | ||
648 | dentry->d_flags &= ~DCACHE_MOUNTED; | ||
649 | spin_unlock(&dentry->d_lock); | ||
650 | list_del(&mp->m_hash); | ||
651 | kfree(mp); | ||
652 | } | ||
653 | } | ||
654 | |||
608 | static inline int check_mnt(struct mount *mnt) | 655 | static inline int check_mnt(struct mount *mnt) |
609 | { | 656 | { |
610 | return mnt->mnt_ns == current->nsproxy->mnt_ns; | 657 | return mnt->mnt_ns == current->nsproxy->mnt_ns; |
@@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) | |||
633 | } | 680 | } |
634 | 681 | ||
635 | /* | 682 | /* |
636 | * Clear dentry's mounted state if it has no remaining mounts. | ||
637 | * vfsmount_lock must be held for write. | ||
638 | */ | ||
639 | static void dentry_reset_mounted(struct dentry *dentry) | ||
640 | { | ||
641 | unsigned u; | ||
642 | |||
643 | for (u = 0; u < HASH_SIZE; u++) { | ||
644 | struct mount *p; | ||
645 | |||
646 | list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { | ||
647 | if (p->mnt_mountpoint == dentry) | ||
648 | return; | ||
649 | } | ||
650 | } | ||
651 | spin_lock(&dentry->d_lock); | ||
652 | dentry->d_flags &= ~DCACHE_MOUNTED; | ||
653 | spin_unlock(&dentry->d_lock); | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * vfsmount lock must be held for write | 683 | * vfsmount lock must be held for write |
658 | */ | 684 | */ |
659 | static void detach_mnt(struct mount *mnt, struct path *old_path) | 685 | static void detach_mnt(struct mount *mnt, struct path *old_path) |
@@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) | |||
664 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 690 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
665 | list_del_init(&mnt->mnt_child); | 691 | list_del_init(&mnt->mnt_child); |
666 | list_del_init(&mnt->mnt_hash); | 692 | list_del_init(&mnt->mnt_hash); |
667 | dentry_reset_mounted(old_path->dentry); | 693 | put_mountpoint(mnt->mnt_mp); |
694 | mnt->mnt_mp = NULL; | ||
668 | } | 695 | } |
669 | 696 | ||
670 | /* | 697 | /* |
671 | * vfsmount lock must be held for write | 698 | * vfsmount lock must be held for write |
672 | */ | 699 | */ |
673 | void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry, | 700 | void mnt_set_mountpoint(struct mount *mnt, |
701 | struct mountpoint *mp, | ||
674 | struct mount *child_mnt) | 702 | struct mount *child_mnt) |
675 | { | 703 | { |
704 | mp->m_count++; | ||
676 | mnt_add_count(mnt, 1); /* essentially, that's mntget */ | 705 | mnt_add_count(mnt, 1); /* essentially, that's mntget */ |
677 | child_mnt->mnt_mountpoint = dget(dentry); | 706 | child_mnt->mnt_mountpoint = dget(mp->m_dentry); |
678 | child_mnt->mnt_parent = mnt; | 707 | child_mnt->mnt_parent = mnt; |
679 | spin_lock(&dentry->d_lock); | 708 | child_mnt->mnt_mp = mp; |
680 | dentry->d_flags |= DCACHE_MOUNTED; | ||
681 | spin_unlock(&dentry->d_lock); | ||
682 | } | 709 | } |
683 | 710 | ||
684 | /* | 711 | /* |
685 | * vfsmount lock must be held for write | 712 | * vfsmount lock must be held for write |
686 | */ | 713 | */ |
687 | static void attach_mnt(struct mount *mnt, struct path *path) | 714 | static void attach_mnt(struct mount *mnt, |
715 | struct mount *parent, | ||
716 | struct mountpoint *mp) | ||
688 | { | 717 | { |
689 | mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt); | 718 | mnt_set_mountpoint(parent, mp, mnt); |
690 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | 719 | list_add_tail(&mnt->mnt_hash, mount_hashtable + |
691 | hash(path->mnt, path->dentry)); | 720 | hash(&parent->mnt, mp->m_dentry)); |
692 | list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); | 721 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); |
693 | } | 722 | } |
694 | 723 | ||
695 | /* | 724 | /* |
@@ -1095,11 +1124,23 @@ int may_umount(struct vfsmount *mnt) | |||
1095 | 1124 | ||
1096 | EXPORT_SYMBOL(may_umount); | 1125 | EXPORT_SYMBOL(may_umount); |
1097 | 1126 | ||
1098 | void release_mounts(struct list_head *head) | 1127 | static LIST_HEAD(unmounted); /* protected by namespace_sem */ |
1128 | |||
1129 | static void namespace_unlock(void) | ||
1099 | { | 1130 | { |
1100 | struct mount *mnt; | 1131 | struct mount *mnt; |
1101 | while (!list_empty(head)) { | 1132 | LIST_HEAD(head); |
1102 | mnt = list_first_entry(head, struct mount, mnt_hash); | 1133 | |
1134 | if (likely(list_empty(&unmounted))) { | ||
1135 | up_write(&namespace_sem); | ||
1136 | return; | ||
1137 | } | ||
1138 | |||
1139 | list_splice_init(&unmounted, &head); | ||
1140 | up_write(&namespace_sem); | ||
1141 | |||
1142 | while (!list_empty(&head)) { | ||
1143 | mnt = list_first_entry(&head, struct mount, mnt_hash); | ||
1103 | list_del_init(&mnt->mnt_hash); | 1144 | list_del_init(&mnt->mnt_hash); |
1104 | if (mnt_has_parent(mnt)) { | 1145 | if (mnt_has_parent(mnt)) { |
1105 | struct dentry *dentry; | 1146 | struct dentry *dentry; |
@@ -1119,11 +1160,16 @@ void release_mounts(struct list_head *head) | |||
1119 | } | 1160 | } |
1120 | } | 1161 | } |
1121 | 1162 | ||
1163 | static inline void namespace_lock(void) | ||
1164 | { | ||
1165 | down_write(&namespace_sem); | ||
1166 | } | ||
1167 | |||
1122 | /* | 1168 | /* |
1123 | * vfsmount lock must be held for write | 1169 | * vfsmount lock must be held for write |
1124 | * namespace_sem must be held for write | 1170 | * namespace_sem must be held for write |
1125 | */ | 1171 | */ |
1126 | void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) | 1172 | void umount_tree(struct mount *mnt, int propagate) |
1127 | { | 1173 | { |
1128 | LIST_HEAD(tmp_list); | 1174 | LIST_HEAD(tmp_list); |
1129 | struct mount *p; | 1175 | struct mount *p; |
@@ -1142,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) | |||
1142 | list_del_init(&p->mnt_child); | 1188 | list_del_init(&p->mnt_child); |
1143 | if (mnt_has_parent(p)) { | 1189 | if (mnt_has_parent(p)) { |
1144 | p->mnt_parent->mnt_ghosts++; | 1190 | p->mnt_parent->mnt_ghosts++; |
1145 | dentry_reset_mounted(p->mnt_mountpoint); | 1191 | put_mountpoint(p->mnt_mp); |
1192 | p->mnt_mp = NULL; | ||
1146 | } | 1193 | } |
1147 | change_mnt_propagation(p, MS_PRIVATE); | 1194 | change_mnt_propagation(p, MS_PRIVATE); |
1148 | } | 1195 | } |
1149 | list_splice(&tmp_list, kill); | 1196 | list_splice(&tmp_list, &unmounted); |
1150 | } | 1197 | } |
1151 | 1198 | ||
1152 | static void shrink_submounts(struct mount *mnt, struct list_head *umounts); | 1199 | static void shrink_submounts(struct mount *mnt); |
1153 | 1200 | ||
1154 | static int do_umount(struct mount *mnt, int flags) | 1201 | static int do_umount(struct mount *mnt, int flags) |
1155 | { | 1202 | { |
1156 | struct super_block *sb = mnt->mnt.mnt_sb; | 1203 | struct super_block *sb = mnt->mnt.mnt_sb; |
1157 | int retval; | 1204 | int retval; |
1158 | LIST_HEAD(umount_list); | ||
1159 | 1205 | ||
1160 | retval = security_sb_umount(&mnt->mnt, flags); | 1206 | retval = security_sb_umount(&mnt->mnt, flags); |
1161 | if (retval) | 1207 | if (retval) |
@@ -1222,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags) | |||
1222 | return retval; | 1268 | return retval; |
1223 | } | 1269 | } |
1224 | 1270 | ||
1225 | down_write(&namespace_sem); | 1271 | namespace_lock(); |
1226 | br_write_lock(&vfsmount_lock); | 1272 | br_write_lock(&vfsmount_lock); |
1227 | event++; | 1273 | event++; |
1228 | 1274 | ||
1229 | if (!(flags & MNT_DETACH)) | 1275 | if (!(flags & MNT_DETACH)) |
1230 | shrink_submounts(mnt, &umount_list); | 1276 | shrink_submounts(mnt); |
1231 | 1277 | ||
1232 | retval = -EBUSY; | 1278 | retval = -EBUSY; |
1233 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { | 1279 | if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { |
1234 | if (!list_empty(&mnt->mnt_list)) | 1280 | if (!list_empty(&mnt->mnt_list)) |
1235 | umount_tree(mnt, 1, &umount_list); | 1281 | umount_tree(mnt, 1); |
1236 | retval = 0; | 1282 | retval = 0; |
1237 | } | 1283 | } |
1238 | br_write_unlock(&vfsmount_lock); | 1284 | br_write_unlock(&vfsmount_lock); |
1239 | up_write(&namespace_sem); | 1285 | namespace_unlock(); |
1240 | release_mounts(&umount_list); | ||
1241 | return retval; | 1286 | return retval; |
1242 | } | 1287 | } |
1243 | 1288 | ||
@@ -1310,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path) | |||
1310 | * mount namespace loop? | 1355 | * mount namespace loop? |
1311 | */ | 1356 | */ |
1312 | struct inode *inode = path->dentry->d_inode; | 1357 | struct inode *inode = path->dentry->d_inode; |
1313 | struct proc_inode *ei; | 1358 | struct proc_ns *ei; |
1314 | struct mnt_namespace *mnt_ns; | 1359 | struct mnt_namespace *mnt_ns; |
1315 | 1360 | ||
1316 | if (!proc_ns_inode(inode)) | 1361 | if (!proc_ns_inode(inode)) |
1317 | return false; | 1362 | return false; |
1318 | 1363 | ||
1319 | ei = PROC_I(inode); | 1364 | ei = get_proc_ns(inode); |
1320 | if (ei->ns_ops != &mntns_operations) | 1365 | if (ei->ns_ops != &mntns_operations) |
1321 | return false; | 1366 | return false; |
1322 | 1367 | ||
@@ -1327,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path) | |||
1327 | struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | 1372 | struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, |
1328 | int flag) | 1373 | int flag) |
1329 | { | 1374 | { |
1330 | struct mount *res, *p, *q, *r; | 1375 | struct mount *res, *p, *q, *r, *parent; |
1331 | struct path path; | ||
1332 | 1376 | ||
1333 | if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) | 1377 | if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) |
1334 | return ERR_PTR(-EINVAL); | 1378 | return ERR_PTR(-EINVAL); |
@@ -1355,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1355 | q = q->mnt_parent; | 1399 | q = q->mnt_parent; |
1356 | } | 1400 | } |
1357 | p = s; | 1401 | p = s; |
1358 | path.mnt = &q->mnt; | 1402 | parent = q; |
1359 | path.dentry = p->mnt_mountpoint; | ||
1360 | q = clone_mnt(p, p->mnt.mnt_root, flag); | 1403 | q = clone_mnt(p, p->mnt.mnt_root, flag); |
1361 | if (IS_ERR(q)) | 1404 | if (IS_ERR(q)) |
1362 | goto out; | 1405 | goto out; |
1363 | br_write_lock(&vfsmount_lock); | 1406 | br_write_lock(&vfsmount_lock); |
1364 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1407 | list_add_tail(&q->mnt_list, &res->mnt_list); |
1365 | attach_mnt(q, &path); | 1408 | attach_mnt(q, parent, p->mnt_mp); |
1366 | br_write_unlock(&vfsmount_lock); | 1409 | br_write_unlock(&vfsmount_lock); |
1367 | } | 1410 | } |
1368 | } | 1411 | } |
1369 | return res; | 1412 | return res; |
1370 | out: | 1413 | out: |
1371 | if (res) { | 1414 | if (res) { |
1372 | LIST_HEAD(umount_list); | ||
1373 | br_write_lock(&vfsmount_lock); | 1415 | br_write_lock(&vfsmount_lock); |
1374 | umount_tree(res, 0, &umount_list); | 1416 | umount_tree(res, 0); |
1375 | br_write_unlock(&vfsmount_lock); | 1417 | br_write_unlock(&vfsmount_lock); |
1376 | release_mounts(&umount_list); | ||
1377 | } | 1418 | } |
1378 | return q; | 1419 | return q; |
1379 | } | 1420 | } |
@@ -1383,10 +1424,10 @@ out: | |||
1383 | struct vfsmount *collect_mounts(struct path *path) | 1424 | struct vfsmount *collect_mounts(struct path *path) |
1384 | { | 1425 | { |
1385 | struct mount *tree; | 1426 | struct mount *tree; |
1386 | down_write(&namespace_sem); | 1427 | namespace_lock(); |
1387 | tree = copy_tree(real_mount(path->mnt), path->dentry, | 1428 | tree = copy_tree(real_mount(path->mnt), path->dentry, |
1388 | CL_COPY_ALL | CL_PRIVATE); | 1429 | CL_COPY_ALL | CL_PRIVATE); |
1389 | up_write(&namespace_sem); | 1430 | namespace_unlock(); |
1390 | if (IS_ERR(tree)) | 1431 | if (IS_ERR(tree)) |
1391 | return NULL; | 1432 | return NULL; |
1392 | return &tree->mnt; | 1433 | return &tree->mnt; |
@@ -1394,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path) | |||
1394 | 1435 | ||
1395 | void drop_collected_mounts(struct vfsmount *mnt) | 1436 | void drop_collected_mounts(struct vfsmount *mnt) |
1396 | { | 1437 | { |
1397 | LIST_HEAD(umount_list); | 1438 | namespace_lock(); |
1398 | down_write(&namespace_sem); | ||
1399 | br_write_lock(&vfsmount_lock); | 1439 | br_write_lock(&vfsmount_lock); |
1400 | umount_tree(real_mount(mnt), 0, &umount_list); | 1440 | umount_tree(real_mount(mnt), 0); |
1401 | br_write_unlock(&vfsmount_lock); | 1441 | br_write_unlock(&vfsmount_lock); |
1402 | up_write(&namespace_sem); | 1442 | namespace_unlock(); |
1403 | release_mounts(&umount_list); | ||
1404 | } | 1443 | } |
1405 | 1444 | ||
1406 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | 1445 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, |
@@ -1509,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse) | |||
1509 | * in allocations. | 1548 | * in allocations. |
1510 | */ | 1549 | */ |
1511 | static int attach_recursive_mnt(struct mount *source_mnt, | 1550 | static int attach_recursive_mnt(struct mount *source_mnt, |
1512 | struct path *path, struct path *parent_path) | 1551 | struct mount *dest_mnt, |
1552 | struct mountpoint *dest_mp, | ||
1553 | struct path *parent_path) | ||
1513 | { | 1554 | { |
1514 | LIST_HEAD(tree_list); | 1555 | LIST_HEAD(tree_list); |
1515 | struct mount *dest_mnt = real_mount(path->mnt); | ||
1516 | struct dentry *dest_dentry = path->dentry; | ||
1517 | struct mount *child, *p; | 1556 | struct mount *child, *p; |
1518 | int err; | 1557 | int err; |
1519 | 1558 | ||
@@ -1522,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1522 | if (err) | 1561 | if (err) |
1523 | goto out; | 1562 | goto out; |
1524 | } | 1563 | } |
1525 | err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); | 1564 | err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); |
1526 | if (err) | 1565 | if (err) |
1527 | goto out_cleanup_ids; | 1566 | goto out_cleanup_ids; |
1528 | 1567 | ||
@@ -1534,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1534 | } | 1573 | } |
1535 | if (parent_path) { | 1574 | if (parent_path) { |
1536 | detach_mnt(source_mnt, parent_path); | 1575 | detach_mnt(source_mnt, parent_path); |
1537 | attach_mnt(source_mnt, path); | 1576 | attach_mnt(source_mnt, dest_mnt, dest_mp); |
1538 | touch_mnt_namespace(source_mnt->mnt_ns); | 1577 | touch_mnt_namespace(source_mnt->mnt_ns); |
1539 | } else { | 1578 | } else { |
1540 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 1579 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); |
1541 | commit_tree(source_mnt); | 1580 | commit_tree(source_mnt); |
1542 | } | 1581 | } |
1543 | 1582 | ||
@@ -1556,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1556 | return err; | 1595 | return err; |
1557 | } | 1596 | } |
1558 | 1597 | ||
1559 | static int lock_mount(struct path *path) | 1598 | static struct mountpoint *lock_mount(struct path *path) |
1560 | { | 1599 | { |
1561 | struct vfsmount *mnt; | 1600 | struct vfsmount *mnt; |
1601 | struct dentry *dentry = path->dentry; | ||
1562 | retry: | 1602 | retry: |
1563 | mutex_lock(&path->dentry->d_inode->i_mutex); | 1603 | mutex_lock(&dentry->d_inode->i_mutex); |
1564 | if (unlikely(cant_mount(path->dentry))) { | 1604 | if (unlikely(cant_mount(dentry))) { |
1565 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1605 | mutex_unlock(&dentry->d_inode->i_mutex); |
1566 | return -ENOENT; | 1606 | return ERR_PTR(-ENOENT); |
1567 | } | 1607 | } |
1568 | down_write(&namespace_sem); | 1608 | namespace_lock(); |
1569 | mnt = lookup_mnt(path); | 1609 | mnt = lookup_mnt(path); |
1570 | if (likely(!mnt)) | 1610 | if (likely(!mnt)) { |
1571 | return 0; | 1611 | struct mountpoint *mp = new_mountpoint(dentry); |
1572 | up_write(&namespace_sem); | 1612 | if (IS_ERR(mp)) { |
1613 | namespace_unlock(); | ||
1614 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
1615 | return mp; | ||
1616 | } | ||
1617 | return mp; | ||
1618 | } | ||
1619 | namespace_unlock(); | ||
1573 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1620 | mutex_unlock(&path->dentry->d_inode->i_mutex); |
1574 | path_put(path); | 1621 | path_put(path); |
1575 | path->mnt = mnt; | 1622 | path->mnt = mnt; |
1576 | path->dentry = dget(mnt->mnt_root); | 1623 | dentry = path->dentry = dget(mnt->mnt_root); |
1577 | goto retry; | 1624 | goto retry; |
1578 | } | 1625 | } |
1579 | 1626 | ||
1580 | static void unlock_mount(struct path *path) | 1627 | static void unlock_mount(struct mountpoint *where) |
1581 | { | 1628 | { |
1582 | up_write(&namespace_sem); | 1629 | struct dentry *dentry = where->m_dentry; |
1583 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1630 | put_mountpoint(where); |
1631 | namespace_unlock(); | ||
1632 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
1584 | } | 1633 | } |
1585 | 1634 | ||
1586 | static int graft_tree(struct mount *mnt, struct path *path) | 1635 | static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) |
1587 | { | 1636 | { |
1588 | if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) | 1637 | if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) |
1589 | return -EINVAL; | 1638 | return -EINVAL; |
1590 | 1639 | ||
1591 | if (S_ISDIR(path->dentry->d_inode->i_mode) != | 1640 | if (S_ISDIR(mp->m_dentry->d_inode->i_mode) != |
1592 | S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) | 1641 | S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) |
1593 | return -ENOTDIR; | 1642 | return -ENOTDIR; |
1594 | 1643 | ||
1595 | if (d_unlinked(path->dentry)) | 1644 | return attach_recursive_mnt(mnt, p, mp, NULL); |
1596 | return -ENOENT; | ||
1597 | |||
1598 | return attach_recursive_mnt(mnt, path, NULL); | ||
1599 | } | 1645 | } |
1600 | 1646 | ||
1601 | /* | 1647 | /* |
@@ -1633,7 +1679,7 @@ static int do_change_type(struct path *path, int flag) | |||
1633 | if (!type) | 1679 | if (!type) |
1634 | return -EINVAL; | 1680 | return -EINVAL; |
1635 | 1681 | ||
1636 | down_write(&namespace_sem); | 1682 | namespace_lock(); |
1637 | if (type == MS_SHARED) { | 1683 | if (type == MS_SHARED) { |
1638 | err = invent_group_ids(mnt, recurse); | 1684 | err = invent_group_ids(mnt, recurse); |
1639 | if (err) | 1685 | if (err) |
@@ -1646,7 +1692,7 @@ static int do_change_type(struct path *path, int flag) | |||
1646 | br_write_unlock(&vfsmount_lock); | 1692 | br_write_unlock(&vfsmount_lock); |
1647 | 1693 | ||
1648 | out_unlock: | 1694 | out_unlock: |
1649 | up_write(&namespace_sem); | 1695 | namespace_unlock(); |
1650 | return err; | 1696 | return err; |
1651 | } | 1697 | } |
1652 | 1698 | ||
@@ -1656,9 +1702,9 @@ static int do_change_type(struct path *path, int flag) | |||
1656 | static int do_loopback(struct path *path, const char *old_name, | 1702 | static int do_loopback(struct path *path, const char *old_name, |
1657 | int recurse) | 1703 | int recurse) |
1658 | { | 1704 | { |
1659 | LIST_HEAD(umount_list); | ||
1660 | struct path old_path; | 1705 | struct path old_path; |
1661 | struct mount *mnt = NULL, *old; | 1706 | struct mount *mnt = NULL, *old, *parent; |
1707 | struct mountpoint *mp; | ||
1662 | int err; | 1708 | int err; |
1663 | if (!old_name || !*old_name) | 1709 | if (!old_name || !*old_name) |
1664 | return -EINVAL; | 1710 | return -EINVAL; |
@@ -1670,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name, | |||
1670 | if (mnt_ns_loop(&old_path)) | 1716 | if (mnt_ns_loop(&old_path)) |
1671 | goto out; | 1717 | goto out; |
1672 | 1718 | ||
1673 | err = lock_mount(path); | 1719 | mp = lock_mount(path); |
1674 | if (err) | 1720 | err = PTR_ERR(mp); |
1721 | if (IS_ERR(mp)) | ||
1675 | goto out; | 1722 | goto out; |
1676 | 1723 | ||
1677 | old = real_mount(old_path.mnt); | 1724 | old = real_mount(old_path.mnt); |
1725 | parent = real_mount(path->mnt); | ||
1678 | 1726 | ||
1679 | err = -EINVAL; | 1727 | err = -EINVAL; |
1680 | if (IS_MNT_UNBINDABLE(old)) | 1728 | if (IS_MNT_UNBINDABLE(old)) |
1681 | goto out2; | 1729 | goto out2; |
1682 | 1730 | ||
1683 | if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) | 1731 | if (!check_mnt(parent) || !check_mnt(old)) |
1684 | goto out2; | 1732 | goto out2; |
1685 | 1733 | ||
1686 | if (recurse) | 1734 | if (recurse) |
@@ -1693,15 +1741,14 @@ static int do_loopback(struct path *path, const char *old_name, | |||
1693 | goto out2; | 1741 | goto out2; |
1694 | } | 1742 | } |
1695 | 1743 | ||
1696 | err = graft_tree(mnt, path); | 1744 | err = graft_tree(mnt, parent, mp); |
1697 | if (err) { | 1745 | if (err) { |
1698 | br_write_lock(&vfsmount_lock); | 1746 | br_write_lock(&vfsmount_lock); |
1699 | umount_tree(mnt, 0, &umount_list); | 1747 | umount_tree(mnt, 0); |
1700 | br_write_unlock(&vfsmount_lock); | 1748 | br_write_unlock(&vfsmount_lock); |
1701 | } | 1749 | } |
1702 | out2: | 1750 | out2: |
1703 | unlock_mount(path); | 1751 | unlock_mount(mp); |
1704 | release_mounts(&umount_list); | ||
1705 | out: | 1752 | out: |
1706 | path_put(&old_path); | 1753 | path_put(&old_path); |
1707 | return err; | 1754 | return err; |
@@ -1786,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1786 | struct path old_path, parent_path; | 1833 | struct path old_path, parent_path; |
1787 | struct mount *p; | 1834 | struct mount *p; |
1788 | struct mount *old; | 1835 | struct mount *old; |
1836 | struct mountpoint *mp; | ||
1789 | int err; | 1837 | int err; |
1790 | if (!old_name || !*old_name) | 1838 | if (!old_name || !*old_name) |
1791 | return -EINVAL; | 1839 | return -EINVAL; |
@@ -1793,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1793 | if (err) | 1841 | if (err) |
1794 | return err; | 1842 | return err; |
1795 | 1843 | ||
1796 | err = lock_mount(path); | 1844 | mp = lock_mount(path); |
1797 | if (err < 0) | 1845 | err = PTR_ERR(mp); |
1846 | if (IS_ERR(mp)) | ||
1798 | goto out; | 1847 | goto out; |
1799 | 1848 | ||
1800 | old = real_mount(old_path.mnt); | 1849 | old = real_mount(old_path.mnt); |
@@ -1804,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1804 | if (!check_mnt(p) || !check_mnt(old)) | 1853 | if (!check_mnt(p) || !check_mnt(old)) |
1805 | goto out1; | 1854 | goto out1; |
1806 | 1855 | ||
1807 | if (d_unlinked(path->dentry)) | ||
1808 | goto out1; | ||
1809 | |||
1810 | err = -EINVAL; | 1856 | err = -EINVAL; |
1811 | if (old_path.dentry != old_path.mnt->mnt_root) | 1857 | if (old_path.dentry != old_path.mnt->mnt_root) |
1812 | goto out1; | 1858 | goto out1; |
@@ -1833,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1833 | if (p == old) | 1879 | if (p == old) |
1834 | goto out1; | 1880 | goto out1; |
1835 | 1881 | ||
1836 | err = attach_recursive_mnt(old, path, &parent_path); | 1882 | err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); |
1837 | if (err) | 1883 | if (err) |
1838 | goto out1; | 1884 | goto out1; |
1839 | 1885 | ||
@@ -1841,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1841 | * automatically */ | 1887 | * automatically */ |
1842 | list_del_init(&old->mnt_expire); | 1888 | list_del_init(&old->mnt_expire); |
1843 | out1: | 1889 | out1: |
1844 | unlock_mount(path); | 1890 | unlock_mount(mp); |
1845 | out: | 1891 | out: |
1846 | if (!err) | 1892 | if (!err) |
1847 | path_put(&parent_path); | 1893 | path_put(&parent_path); |
@@ -1877,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | |||
1877 | */ | 1923 | */ |
1878 | static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) | 1924 | static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) |
1879 | { | 1925 | { |
1926 | struct mountpoint *mp; | ||
1927 | struct mount *parent; | ||
1880 | int err; | 1928 | int err; |
1881 | 1929 | ||
1882 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); | 1930 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); |
1883 | 1931 | ||
1884 | err = lock_mount(path); | 1932 | mp = lock_mount(path); |
1885 | if (err) | 1933 | if (IS_ERR(mp)) |
1886 | return err; | 1934 | return PTR_ERR(mp); |
1887 | 1935 | ||
1936 | parent = real_mount(path->mnt); | ||
1888 | err = -EINVAL; | 1937 | err = -EINVAL; |
1889 | if (unlikely(!check_mnt(real_mount(path->mnt)))) { | 1938 | if (unlikely(!check_mnt(parent))) { |
1890 | /* that's acceptable only for automounts done in private ns */ | 1939 | /* that's acceptable only for automounts done in private ns */ |
1891 | if (!(mnt_flags & MNT_SHRINKABLE)) | 1940 | if (!(mnt_flags & MNT_SHRINKABLE)) |
1892 | goto unlock; | 1941 | goto unlock; |
1893 | /* ... and for those we'd better have mountpoint still alive */ | 1942 | /* ... and for those we'd better have mountpoint still alive */ |
1894 | if (!real_mount(path->mnt)->mnt_ns) | 1943 | if (!parent->mnt_ns) |
1895 | goto unlock; | 1944 | goto unlock; |
1896 | } | 1945 | } |
1897 | 1946 | ||
@@ -1906,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) | |||
1906 | goto unlock; | 1955 | goto unlock; |
1907 | 1956 | ||
1908 | newmnt->mnt.mnt_flags = mnt_flags; | 1957 | newmnt->mnt.mnt_flags = mnt_flags; |
1909 | err = graft_tree(newmnt, path); | 1958 | err = graft_tree(newmnt, parent, mp); |
1910 | 1959 | ||
1911 | unlock: | 1960 | unlock: |
1912 | unlock_mount(path); | 1961 | unlock_mount(mp); |
1913 | return err; | 1962 | return err; |
1914 | } | 1963 | } |
1915 | 1964 | ||
@@ -1982,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path) | |||
1982 | fail: | 2031 | fail: |
1983 | /* remove m from any expiration list it may be on */ | 2032 | /* remove m from any expiration list it may be on */ |
1984 | if (!list_empty(&mnt->mnt_expire)) { | 2033 | if (!list_empty(&mnt->mnt_expire)) { |
1985 | down_write(&namespace_sem); | 2034 | namespace_lock(); |
1986 | br_write_lock(&vfsmount_lock); | 2035 | br_write_lock(&vfsmount_lock); |
1987 | list_del_init(&mnt->mnt_expire); | 2036 | list_del_init(&mnt->mnt_expire); |
1988 | br_write_unlock(&vfsmount_lock); | 2037 | br_write_unlock(&vfsmount_lock); |
1989 | up_write(&namespace_sem); | 2038 | namespace_unlock(); |
1990 | } | 2039 | } |
1991 | mntput(m); | 2040 | mntput(m); |
1992 | mntput(m); | 2041 | mntput(m); |
@@ -2000,13 +2049,13 @@ fail: | |||
2000 | */ | 2049 | */ |
2001 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) | 2050 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) |
2002 | { | 2051 | { |
2003 | down_write(&namespace_sem); | 2052 | namespace_lock(); |
2004 | br_write_lock(&vfsmount_lock); | 2053 | br_write_lock(&vfsmount_lock); |
2005 | 2054 | ||
2006 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); | 2055 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); |
2007 | 2056 | ||
2008 | br_write_unlock(&vfsmount_lock); | 2057 | br_write_unlock(&vfsmount_lock); |
2009 | up_write(&namespace_sem); | 2058 | namespace_unlock(); |
2010 | } | 2059 | } |
2011 | EXPORT_SYMBOL(mnt_set_expiry); | 2060 | EXPORT_SYMBOL(mnt_set_expiry); |
2012 | 2061 | ||
@@ -2019,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
2019 | { | 2068 | { |
2020 | struct mount *mnt, *next; | 2069 | struct mount *mnt, *next; |
2021 | LIST_HEAD(graveyard); | 2070 | LIST_HEAD(graveyard); |
2022 | LIST_HEAD(umounts); | ||
2023 | 2071 | ||
2024 | if (list_empty(mounts)) | 2072 | if (list_empty(mounts)) |
2025 | return; | 2073 | return; |
2026 | 2074 | ||
2027 | down_write(&namespace_sem); | 2075 | namespace_lock(); |
2028 | br_write_lock(&vfsmount_lock); | 2076 | br_write_lock(&vfsmount_lock); |
2029 | 2077 | ||
2030 | /* extract from the expiration list every vfsmount that matches the | 2078 | /* extract from the expiration list every vfsmount that matches the |
@@ -2042,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
2042 | while (!list_empty(&graveyard)) { | 2090 | while (!list_empty(&graveyard)) { |
2043 | mnt = list_first_entry(&graveyard, struct mount, mnt_expire); | 2091 | mnt = list_first_entry(&graveyard, struct mount, mnt_expire); |
2044 | touch_mnt_namespace(mnt->mnt_ns); | 2092 | touch_mnt_namespace(mnt->mnt_ns); |
2045 | umount_tree(mnt, 1, &umounts); | 2093 | umount_tree(mnt, 1); |
2046 | } | 2094 | } |
2047 | br_write_unlock(&vfsmount_lock); | 2095 | br_write_unlock(&vfsmount_lock); |
2048 | up_write(&namespace_sem); | 2096 | namespace_unlock(); |
2049 | |||
2050 | release_mounts(&umounts); | ||
2051 | } | 2097 | } |
2052 | 2098 | ||
2053 | EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); | 2099 | EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); |
@@ -2104,7 +2150,7 @@ resume: | |||
2104 | * | 2150 | * |
2105 | * vfsmount_lock must be held for write | 2151 | * vfsmount_lock must be held for write |
2106 | */ | 2152 | */ |
2107 | static void shrink_submounts(struct mount *mnt, struct list_head *umounts) | 2153 | static void shrink_submounts(struct mount *mnt) |
2108 | { | 2154 | { |
2109 | LIST_HEAD(graveyard); | 2155 | LIST_HEAD(graveyard); |
2110 | struct mount *m; | 2156 | struct mount *m; |
@@ -2115,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts) | |||
2115 | m = list_first_entry(&graveyard, struct mount, | 2161 | m = list_first_entry(&graveyard, struct mount, |
2116 | mnt_expire); | 2162 | mnt_expire); |
2117 | touch_mnt_namespace(m->mnt_ns); | 2163 | touch_mnt_namespace(m->mnt_ns); |
2118 | umount_tree(m, 1, umounts); | 2164 | umount_tree(m, 1); |
2119 | } | 2165 | } |
2120 | } | 2166 | } |
2121 | } | 2167 | } |
@@ -2342,14 +2388,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2342 | if (IS_ERR(new_ns)) | 2388 | if (IS_ERR(new_ns)) |
2343 | return new_ns; | 2389 | return new_ns; |
2344 | 2390 | ||
2345 | down_write(&namespace_sem); | 2391 | namespace_lock(); |
2346 | /* First pass: copy the tree topology */ | 2392 | /* First pass: copy the tree topology */ |
2347 | copy_flags = CL_COPY_ALL | CL_EXPIRE; | 2393 | copy_flags = CL_COPY_ALL | CL_EXPIRE; |
2348 | if (user_ns != mnt_ns->user_ns) | 2394 | if (user_ns != mnt_ns->user_ns) |
2349 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; | 2395 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; |
2350 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); | 2396 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
2351 | if (IS_ERR(new)) { | 2397 | if (IS_ERR(new)) { |
2352 | up_write(&namespace_sem); | 2398 | namespace_unlock(); |
2353 | free_mnt_ns(new_ns); | 2399 | free_mnt_ns(new_ns); |
2354 | return ERR_CAST(new); | 2400 | return ERR_CAST(new); |
2355 | } | 2401 | } |
@@ -2380,7 +2426,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2380 | p = next_mnt(p, old); | 2426 | p = next_mnt(p, old); |
2381 | q = next_mnt(q, new); | 2427 | q = next_mnt(q, new); |
2382 | } | 2428 | } |
2383 | up_write(&namespace_sem); | 2429 | namespace_unlock(); |
2384 | 2430 | ||
2385 | if (rootmnt) | 2431 | if (rootmnt) |
2386 | mntput(rootmnt); | 2432 | mntput(rootmnt); |
@@ -2550,7 +2596,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2550 | const char __user *, put_old) | 2596 | const char __user *, put_old) |
2551 | { | 2597 | { |
2552 | struct path new, old, parent_path, root_parent, root; | 2598 | struct path new, old, parent_path, root_parent, root; |
2553 | struct mount *new_mnt, *root_mnt; | 2599 | struct mount *new_mnt, *root_mnt, *old_mnt; |
2600 | struct mountpoint *old_mp, *root_mp; | ||
2554 | int error; | 2601 | int error; |
2555 | 2602 | ||
2556 | if (!may_mount()) | 2603 | if (!may_mount()) |
@@ -2569,14 +2616,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2569 | goto out2; | 2616 | goto out2; |
2570 | 2617 | ||
2571 | get_fs_root(current->fs, &root); | 2618 | get_fs_root(current->fs, &root); |
2572 | error = lock_mount(&old); | 2619 | old_mp = lock_mount(&old); |
2573 | if (error) | 2620 | error = PTR_ERR(old_mp); |
2621 | if (IS_ERR(old_mp)) | ||
2574 | goto out3; | 2622 | goto out3; |
2575 | 2623 | ||
2576 | error = -EINVAL; | 2624 | error = -EINVAL; |
2577 | new_mnt = real_mount(new.mnt); | 2625 | new_mnt = real_mount(new.mnt); |
2578 | root_mnt = real_mount(root.mnt); | 2626 | root_mnt = real_mount(root.mnt); |
2579 | if (IS_MNT_SHARED(real_mount(old.mnt)) || | 2627 | old_mnt = real_mount(old.mnt); |
2628 | if (IS_MNT_SHARED(old_mnt) || | ||
2580 | IS_MNT_SHARED(new_mnt->mnt_parent) || | 2629 | IS_MNT_SHARED(new_mnt->mnt_parent) || |
2581 | IS_MNT_SHARED(root_mnt->mnt_parent)) | 2630 | IS_MNT_SHARED(root_mnt->mnt_parent)) |
2582 | goto out4; | 2631 | goto out4; |
@@ -2585,37 +2634,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2585 | error = -ENOENT; | 2634 | error = -ENOENT; |
2586 | if (d_unlinked(new.dentry)) | 2635 | if (d_unlinked(new.dentry)) |
2587 | goto out4; | 2636 | goto out4; |
2588 | if (d_unlinked(old.dentry)) | ||
2589 | goto out4; | ||
2590 | error = -EBUSY; | 2637 | error = -EBUSY; |
2591 | if (new.mnt == root.mnt || | 2638 | if (new_mnt == root_mnt || old_mnt == root_mnt) |
2592 | old.mnt == root.mnt) | ||
2593 | goto out4; /* loop, on the same file system */ | 2639 | goto out4; /* loop, on the same file system */ |
2594 | error = -EINVAL; | 2640 | error = -EINVAL; |
2595 | if (root.mnt->mnt_root != root.dentry) | 2641 | if (root.mnt->mnt_root != root.dentry) |
2596 | goto out4; /* not a mountpoint */ | 2642 | goto out4; /* not a mountpoint */ |
2597 | if (!mnt_has_parent(root_mnt)) | 2643 | if (!mnt_has_parent(root_mnt)) |
2598 | goto out4; /* not attached */ | 2644 | goto out4; /* not attached */ |
2645 | root_mp = root_mnt->mnt_mp; | ||
2599 | if (new.mnt->mnt_root != new.dentry) | 2646 | if (new.mnt->mnt_root != new.dentry) |
2600 | goto out4; /* not a mountpoint */ | 2647 | goto out4; /* not a mountpoint */ |
2601 | if (!mnt_has_parent(new_mnt)) | 2648 | if (!mnt_has_parent(new_mnt)) |
2602 | goto out4; /* not attached */ | 2649 | goto out4; /* not attached */ |
2603 | /* make sure we can reach put_old from new_root */ | 2650 | /* make sure we can reach put_old from new_root */ |
2604 | if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) | 2651 | if (!is_path_reachable(old_mnt, old.dentry, &new)) |
2605 | goto out4; | 2652 | goto out4; |
2653 | root_mp->m_count++; /* pin it so it won't go away */ | ||
2606 | br_write_lock(&vfsmount_lock); | 2654 | br_write_lock(&vfsmount_lock); |
2607 | detach_mnt(new_mnt, &parent_path); | 2655 | detach_mnt(new_mnt, &parent_path); |
2608 | detach_mnt(root_mnt, &root_parent); | 2656 | detach_mnt(root_mnt, &root_parent); |
2609 | /* mount old root on put_old */ | 2657 | /* mount old root on put_old */ |
2610 | attach_mnt(root_mnt, &old); | 2658 | attach_mnt(root_mnt, old_mnt, old_mp); |
2611 | /* mount new_root on / */ | 2659 | /* mount new_root on / */ |
2612 | attach_mnt(new_mnt, &root_parent); | 2660 | attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); |
2613 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2661 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2614 | br_write_unlock(&vfsmount_lock); | 2662 | br_write_unlock(&vfsmount_lock); |
2615 | chroot_fs_refs(&root, &new); | 2663 | chroot_fs_refs(&root, &new); |
2664 | put_mountpoint(root_mp); | ||
2616 | error = 0; | 2665 | error = 0; |
2617 | out4: | 2666 | out4: |
2618 | unlock_mount(&old); | 2667 | unlock_mount(old_mp); |
2619 | if (!error) { | 2668 | if (!error) { |
2620 | path_put(&root_parent); | 2669 | path_put(&root_parent); |
2621 | path_put(&parent_path); | 2670 | path_put(&parent_path); |
@@ -2670,14 +2719,17 @@ void __init mnt_init(void) | |||
2670 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 2719 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
2671 | 2720 | ||
2672 | mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); | 2721 | mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); |
2722 | mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); | ||
2673 | 2723 | ||
2674 | if (!mount_hashtable) | 2724 | if (!mount_hashtable || !mountpoint_hashtable) |
2675 | panic("Failed to allocate mount hash table\n"); | 2725 | panic("Failed to allocate mount hash table\n"); |
2676 | 2726 | ||
2677 | printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); | 2727 | printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); |
2678 | 2728 | ||
2679 | for (u = 0; u < HASH_SIZE; u++) | 2729 | for (u = 0; u < HASH_SIZE; u++) |
2680 | INIT_LIST_HEAD(&mount_hashtable[u]); | 2730 | INIT_LIST_HEAD(&mount_hashtable[u]); |
2731 | for (u = 0; u < HASH_SIZE; u++) | ||
2732 | INIT_LIST_HEAD(&mountpoint_hashtable[u]); | ||
2681 | 2733 | ||
2682 | br_lock_init(&vfsmount_lock); | 2734 | br_lock_init(&vfsmount_lock); |
2683 | 2735 | ||
@@ -2694,16 +2746,13 @@ void __init mnt_init(void) | |||
2694 | 2746 | ||
2695 | void put_mnt_ns(struct mnt_namespace *ns) | 2747 | void put_mnt_ns(struct mnt_namespace *ns) |
2696 | { | 2748 | { |
2697 | LIST_HEAD(umount_list); | ||
2698 | |||
2699 | if (!atomic_dec_and_test(&ns->count)) | 2749 | if (!atomic_dec_and_test(&ns->count)) |
2700 | return; | 2750 | return; |
2701 | down_write(&namespace_sem); | 2751 | namespace_lock(); |
2702 | br_write_lock(&vfsmount_lock); | 2752 | br_write_lock(&vfsmount_lock); |
2703 | umount_tree(ns->root, 0, &umount_list); | 2753 | umount_tree(ns->root, 0); |
2704 | br_write_unlock(&vfsmount_lock); | 2754 | br_write_unlock(&vfsmount_lock); |
2705 | up_write(&namespace_sem); | 2755 | namespace_unlock(); |
2706 | release_mounts(&umount_list); | ||
2707 | free_mnt_ns(ns); | 2756 | free_mnt_ns(ns); |
2708 | } | 2757 | } |
2709 | 2758 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f33455b4d957..5bee0313dffd 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -177,7 +177,7 @@ static int export_features_open(struct inode *inode, struct file *file) | |||
177 | return single_open(file, export_features_show, NULL); | 177 | return single_open(file, export_features_show, NULL); |
178 | } | 178 | } |
179 | 179 | ||
180 | static struct file_operations export_features_operations = { | 180 | static const struct file_operations export_features_operations = { |
181 | .open = export_features_open, | 181 | .open = export_features_open, |
182 | .read = seq_read, | 182 | .read = seq_read, |
183 | .llseek = seq_lseek, | 183 | .llseek = seq_lseek, |
@@ -196,7 +196,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file) | |||
196 | return single_open(file, supported_enctypes_show, NULL); | 196 | return single_open(file, supported_enctypes_show, NULL); |
197 | } | 197 | } |
198 | 198 | ||
199 | static struct file_operations supported_enctypes_ops = { | 199 | static const struct file_operations supported_enctypes_ops = { |
200 | .open = supported_enctypes_open, | 200 | .open = supported_enctypes_open, |
201 | .read = seq_read, | 201 | .read = seq_read, |
202 | .llseek = seq_lseek, | 202 | .llseek = seq_lseek, |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index c616a70e8cf9..959815c1e017 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -287,9 +287,6 @@ static int inotify_release(struct inode *ignored, struct file *file) | |||
287 | 287 | ||
288 | pr_debug("%s: group=%p\n", __func__, group); | 288 | pr_debug("%s: group=%p\n", __func__, group); |
289 | 289 | ||
290 | if (file->f_flags & FASYNC) | ||
291 | fsnotify_fasync(-1, file, 0); | ||
292 | |||
293 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ | 290 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ |
294 | fsnotify_destroy_group(group); | 291 | fsnotify_destroy_group(group); |
295 | 292 | ||
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 5b2d4f0853ac..1da4b81e6f76 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2129 | 2129 | ||
2130 | BUG_ON(iocb->ki_pos != pos); | 2130 | BUG_ON(iocb->ki_pos != pos); |
2131 | 2131 | ||
2132 | sb_start_write(inode->i_sb); | ||
2133 | mutex_lock(&inode->i_mutex); | 2132 | mutex_lock(&inode->i_mutex); |
2134 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); | 2133 | ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); |
2135 | mutex_unlock(&inode->i_mutex); | 2134 | mutex_unlock(&inode->i_mutex); |
@@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2138 | if (err < 0) | 2137 | if (err < 0) |
2139 | ret = err; | 2138 | ret = err; |
2140 | } | 2139 | } |
2141 | sb_end_write(inode->i_sb); | ||
2142 | return ret; | 2140 | return ret; |
2143 | } | 2141 | } |
2144 | 2142 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6474cb44004d..8a7509f9e6f5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2248 | if (iocb->ki_left == 0) | 2248 | if (iocb->ki_left == 0) |
2249 | return 0; | 2249 | return 0; |
2250 | 2250 | ||
2251 | sb_start_write(inode->i_sb); | ||
2252 | |||
2253 | appending = file->f_flags & O_APPEND ? 1 : 0; | 2251 | appending = file->f_flags & O_APPEND ? 1 : 0; |
2254 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; | 2252 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; |
2255 | 2253 | ||
@@ -2423,7 +2421,6 @@ out_sems: | |||
2423 | ocfs2_iocb_clear_sem_locked(iocb); | 2421 | ocfs2_iocb_clear_sem_locked(iocb); |
2424 | 2422 | ||
2425 | mutex_unlock(&inode->i_mutex); | 2423 | mutex_unlock(&inode->i_mutex); |
2426 | sb_end_write(inode->i_sb); | ||
2427 | 2424 | ||
2428 | if (written) | 2425 | if (written) |
2429 | ret = written; | 2426 | ret = written; |
@@ -2468,8 +2465,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
2468 | out->f_path.dentry->d_name.len, | 2465 | out->f_path.dentry->d_name.len, |
2469 | out->f_path.dentry->d_name.name, len); | 2466 | out->f_path.dentry->d_name.name, len); |
2470 | 2467 | ||
2471 | if (pipe->inode) | 2468 | pipe_lock(pipe); |
2472 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | ||
2473 | 2469 | ||
2474 | splice_from_pipe_begin(&sd); | 2470 | splice_from_pipe_begin(&sd); |
2475 | do { | 2471 | do { |
@@ -2489,8 +2485,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
2489 | } while (ret > 0); | 2485 | } while (ret > 0); |
2490 | splice_from_pipe_end(pipe, &sd); | 2486 | splice_from_pipe_end(pipe, &sd); |
2491 | 2487 | ||
2492 | if (pipe->inode) | 2488 | pipe_unlock(pipe); |
2493 | mutex_unlock(&pipe->inode->i_mutex); | ||
2494 | 2489 | ||
2495 | if (sd.num_spliced) | 2490 | if (sd.num_spliced) |
2496 | ret = sd.num_spliced; | 2491 | ret = sd.num_spliced; |
@@ -25,6 +25,8 @@ | |||
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <asm/ioctls.h> | 26 | #include <asm/ioctls.h> |
27 | 27 | ||
28 | #include "internal.h" | ||
29 | |||
28 | /* | 30 | /* |
29 | * The max size that a non-root user is allowed to grow the pipe. Can | 31 | * The max size that a non-root user is allowed to grow the pipe. Can |
30 | * be set by root in /proc/sys/fs/pipe-max-size | 32 | * be set by root in /proc/sys/fs/pipe-max-size |
@@ -53,8 +55,8 @@ unsigned int pipe_min_size = PAGE_SIZE; | |||
53 | 55 | ||
54 | static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) | 56 | static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) |
55 | { | 57 | { |
56 | if (pipe->inode) | 58 | if (pipe->files) |
57 | mutex_lock_nested(&pipe->inode->i_mutex, subclass); | 59 | mutex_lock_nested(&pipe->mutex, subclass); |
58 | } | 60 | } |
59 | 61 | ||
60 | void pipe_lock(struct pipe_inode_info *pipe) | 62 | void pipe_lock(struct pipe_inode_info *pipe) |
@@ -68,11 +70,21 @@ EXPORT_SYMBOL(pipe_lock); | |||
68 | 70 | ||
69 | void pipe_unlock(struct pipe_inode_info *pipe) | 71 | void pipe_unlock(struct pipe_inode_info *pipe) |
70 | { | 72 | { |
71 | if (pipe->inode) | 73 | if (pipe->files) |
72 | mutex_unlock(&pipe->inode->i_mutex); | 74 | mutex_unlock(&pipe->mutex); |
73 | } | 75 | } |
74 | EXPORT_SYMBOL(pipe_unlock); | 76 | EXPORT_SYMBOL(pipe_unlock); |
75 | 77 | ||
78 | static inline void __pipe_lock(struct pipe_inode_info *pipe) | ||
79 | { | ||
80 | mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); | ||
81 | } | ||
82 | |||
83 | static inline void __pipe_unlock(struct pipe_inode_info *pipe) | ||
84 | { | ||
85 | mutex_unlock(&pipe->mutex); | ||
86 | } | ||
87 | |||
76 | void pipe_double_lock(struct pipe_inode_info *pipe1, | 88 | void pipe_double_lock(struct pipe_inode_info *pipe1, |
77 | struct pipe_inode_info *pipe2) | 89 | struct pipe_inode_info *pipe2) |
78 | { | 90 | { |
@@ -361,8 +373,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, | |||
361 | unsigned long nr_segs, loff_t pos) | 373 | unsigned long nr_segs, loff_t pos) |
362 | { | 374 | { |
363 | struct file *filp = iocb->ki_filp; | 375 | struct file *filp = iocb->ki_filp; |
364 | struct inode *inode = file_inode(filp); | 376 | struct pipe_inode_info *pipe = filp->private_data; |
365 | struct pipe_inode_info *pipe; | ||
366 | int do_wakeup; | 377 | int do_wakeup; |
367 | ssize_t ret; | 378 | ssize_t ret; |
368 | struct iovec *iov = (struct iovec *)_iov; | 379 | struct iovec *iov = (struct iovec *)_iov; |
@@ -375,8 +386,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, | |||
375 | 386 | ||
376 | do_wakeup = 0; | 387 | do_wakeup = 0; |
377 | ret = 0; | 388 | ret = 0; |
378 | mutex_lock(&inode->i_mutex); | 389 | __pipe_lock(pipe); |
379 | pipe = inode->i_pipe; | ||
380 | for (;;) { | 390 | for (;;) { |
381 | int bufs = pipe->nrbufs; | 391 | int bufs = pipe->nrbufs; |
382 | if (bufs) { | 392 | if (bufs) { |
@@ -464,7 +474,7 @@ redo: | |||
464 | } | 474 | } |
465 | pipe_wait(pipe); | 475 | pipe_wait(pipe); |
466 | } | 476 | } |
467 | mutex_unlock(&inode->i_mutex); | 477 | __pipe_unlock(pipe); |
468 | 478 | ||
469 | /* Signal writers asynchronously that there is more room. */ | 479 | /* Signal writers asynchronously that there is more room. */ |
470 | if (do_wakeup) { | 480 | if (do_wakeup) { |
@@ -486,8 +496,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
486 | unsigned long nr_segs, loff_t ppos) | 496 | unsigned long nr_segs, loff_t ppos) |
487 | { | 497 | { |
488 | struct file *filp = iocb->ki_filp; | 498 | struct file *filp = iocb->ki_filp; |
489 | struct inode *inode = file_inode(filp); | 499 | struct pipe_inode_info *pipe = filp->private_data; |
490 | struct pipe_inode_info *pipe; | ||
491 | ssize_t ret; | 500 | ssize_t ret; |
492 | int do_wakeup; | 501 | int do_wakeup; |
493 | struct iovec *iov = (struct iovec *)_iov; | 502 | struct iovec *iov = (struct iovec *)_iov; |
@@ -501,8 +510,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
501 | 510 | ||
502 | do_wakeup = 0; | 511 | do_wakeup = 0; |
503 | ret = 0; | 512 | ret = 0; |
504 | mutex_lock(&inode->i_mutex); | 513 | __pipe_lock(pipe); |
505 | pipe = inode->i_pipe; | ||
506 | 514 | ||
507 | if (!pipe->readers) { | 515 | if (!pipe->readers) { |
508 | send_sig(SIGPIPE, current, 0); | 516 | send_sig(SIGPIPE, current, 0); |
@@ -649,7 +657,7 @@ redo2: | |||
649 | pipe->waiting_writers--; | 657 | pipe->waiting_writers--; |
650 | } | 658 | } |
651 | out: | 659 | out: |
652 | mutex_unlock(&inode->i_mutex); | 660 | __pipe_unlock(pipe); |
653 | if (do_wakeup) { | 661 | if (do_wakeup) { |
654 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); | 662 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); |
655 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 663 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
@@ -662,29 +670,14 @@ out: | |||
662 | return ret; | 670 | return ret; |
663 | } | 671 | } |
664 | 672 | ||
665 | static ssize_t | ||
666 | bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) | ||
667 | { | ||
668 | return -EBADF; | ||
669 | } | ||
670 | |||
671 | static ssize_t | ||
672 | bad_pipe_w(struct file *filp, const char __user *buf, size_t count, | ||
673 | loff_t *ppos) | ||
674 | { | ||
675 | return -EBADF; | ||
676 | } | ||
677 | |||
678 | static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 673 | static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
679 | { | 674 | { |
680 | struct inode *inode = file_inode(filp); | 675 | struct pipe_inode_info *pipe = filp->private_data; |
681 | struct pipe_inode_info *pipe; | ||
682 | int count, buf, nrbufs; | 676 | int count, buf, nrbufs; |
683 | 677 | ||
684 | switch (cmd) { | 678 | switch (cmd) { |
685 | case FIONREAD: | 679 | case FIONREAD: |
686 | mutex_lock(&inode->i_mutex); | 680 | __pipe_lock(pipe); |
687 | pipe = inode->i_pipe; | ||
688 | count = 0; | 681 | count = 0; |
689 | buf = pipe->curbuf; | 682 | buf = pipe->curbuf; |
690 | nrbufs = pipe->nrbufs; | 683 | nrbufs = pipe->nrbufs; |
@@ -692,7 +685,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
692 | count += pipe->bufs[buf].len; | 685 | count += pipe->bufs[buf].len; |
693 | buf = (buf+1) & (pipe->buffers - 1); | 686 | buf = (buf+1) & (pipe->buffers - 1); |
694 | } | 687 | } |
695 | mutex_unlock(&inode->i_mutex); | 688 | __pipe_unlock(pipe); |
696 | 689 | ||
697 | return put_user(count, (int __user *)arg); | 690 | return put_user(count, (int __user *)arg); |
698 | default: | 691 | default: |
@@ -705,8 +698,7 @@ static unsigned int | |||
705 | pipe_poll(struct file *filp, poll_table *wait) | 698 | pipe_poll(struct file *filp, poll_table *wait) |
706 | { | 699 | { |
707 | unsigned int mask; | 700 | unsigned int mask; |
708 | struct inode *inode = file_inode(filp); | 701 | struct pipe_inode_info *pipe = filp->private_data; |
709 | struct pipe_inode_info *pipe = inode->i_pipe; | ||
710 | int nrbufs; | 702 | int nrbufs; |
711 | 703 | ||
712 | poll_wait(filp, &pipe->wait, wait); | 704 | poll_wait(filp, &pipe->wait, wait); |
@@ -734,197 +726,56 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
734 | } | 726 | } |
735 | 727 | ||
736 | static int | 728 | static int |
737 | pipe_release(struct inode *inode, int decr, int decw) | 729 | pipe_release(struct inode *inode, struct file *file) |
738 | { | 730 | { |
739 | struct pipe_inode_info *pipe; | 731 | struct pipe_inode_info *pipe = inode->i_pipe; |
732 | int kill = 0; | ||
740 | 733 | ||
741 | mutex_lock(&inode->i_mutex); | 734 | __pipe_lock(pipe); |
742 | pipe = inode->i_pipe; | 735 | if (file->f_mode & FMODE_READ) |
743 | pipe->readers -= decr; | 736 | pipe->readers--; |
744 | pipe->writers -= decw; | 737 | if (file->f_mode & FMODE_WRITE) |
738 | pipe->writers--; | ||
745 | 739 | ||
746 | if (!pipe->readers && !pipe->writers) { | 740 | if (pipe->readers || pipe->writers) { |
747 | free_pipe_info(inode); | ||
748 | } else { | ||
749 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); | 741 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); |
750 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 742 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
751 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 743 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
752 | } | 744 | } |
753 | mutex_unlock(&inode->i_mutex); | 745 | spin_lock(&inode->i_lock); |
754 | 746 | if (!--pipe->files) { | |
755 | return 0; | 747 | inode->i_pipe = NULL; |
756 | } | 748 | kill = 1; |
757 | 749 | } | |
758 | static int | 750 | spin_unlock(&inode->i_lock); |
759 | pipe_read_fasync(int fd, struct file *filp, int on) | 751 | __pipe_unlock(pipe); |
760 | { | ||
761 | struct inode *inode = file_inode(filp); | ||
762 | int retval; | ||
763 | |||
764 | mutex_lock(&inode->i_mutex); | ||
765 | retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); | ||
766 | mutex_unlock(&inode->i_mutex); | ||
767 | |||
768 | return retval; | ||
769 | } | ||
770 | |||
771 | |||
772 | static int | ||
773 | pipe_write_fasync(int fd, struct file *filp, int on) | ||
774 | { | ||
775 | struct inode *inode = file_inode(filp); | ||
776 | int retval; | ||
777 | 752 | ||
778 | mutex_lock(&inode->i_mutex); | 753 | if (kill) |
779 | retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); | 754 | free_pipe_info(pipe); |
780 | mutex_unlock(&inode->i_mutex); | ||
781 | 755 | ||
782 | return retval; | 756 | return 0; |
783 | } | 757 | } |
784 | 758 | ||
785 | |||
786 | static int | 759 | static int |
787 | pipe_rdwr_fasync(int fd, struct file *filp, int on) | 760 | pipe_fasync(int fd, struct file *filp, int on) |
788 | { | 761 | { |
789 | struct inode *inode = file_inode(filp); | 762 | struct pipe_inode_info *pipe = filp->private_data; |
790 | struct pipe_inode_info *pipe = inode->i_pipe; | 763 | int retval = 0; |
791 | int retval; | ||
792 | 764 | ||
793 | mutex_lock(&inode->i_mutex); | 765 | __pipe_lock(pipe); |
794 | retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); | 766 | if (filp->f_mode & FMODE_READ) |
795 | if (retval >= 0) { | 767 | retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); |
768 | if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { | ||
796 | retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); | 769 | retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); |
797 | if (retval < 0) /* this can happen only if on == T */ | 770 | if (retval < 0 && (filp->f_mode & FMODE_READ)) |
771 | /* this can happen only if on == T */ | ||
798 | fasync_helper(-1, filp, 0, &pipe->fasync_readers); | 772 | fasync_helper(-1, filp, 0, &pipe->fasync_readers); |
799 | } | 773 | } |
800 | mutex_unlock(&inode->i_mutex); | 774 | __pipe_unlock(pipe); |
801 | return retval; | 775 | return retval; |
802 | } | 776 | } |
803 | 777 | ||
804 | 778 | struct pipe_inode_info *alloc_pipe_info(void) | |
805 | static int | ||
806 | pipe_read_release(struct inode *inode, struct file *filp) | ||
807 | { | ||
808 | return pipe_release(inode, 1, 0); | ||
809 | } | ||
810 | |||
811 | static int | ||
812 | pipe_write_release(struct inode *inode, struct file *filp) | ||
813 | { | ||
814 | return pipe_release(inode, 0, 1); | ||
815 | } | ||
816 | |||
817 | static int | ||
818 | pipe_rdwr_release(struct inode *inode, struct file *filp) | ||
819 | { | ||
820 | int decr, decw; | ||
821 | |||
822 | decr = (filp->f_mode & FMODE_READ) != 0; | ||
823 | decw = (filp->f_mode & FMODE_WRITE) != 0; | ||
824 | return pipe_release(inode, decr, decw); | ||
825 | } | ||
826 | |||
827 | static int | ||
828 | pipe_read_open(struct inode *inode, struct file *filp) | ||
829 | { | ||
830 | int ret = -ENOENT; | ||
831 | |||
832 | mutex_lock(&inode->i_mutex); | ||
833 | |||
834 | if (inode->i_pipe) { | ||
835 | ret = 0; | ||
836 | inode->i_pipe->readers++; | ||
837 | } | ||
838 | |||
839 | mutex_unlock(&inode->i_mutex); | ||
840 | |||
841 | return ret; | ||
842 | } | ||
843 | |||
844 | static int | ||
845 | pipe_write_open(struct inode *inode, struct file *filp) | ||
846 | { | ||
847 | int ret = -ENOENT; | ||
848 | |||
849 | mutex_lock(&inode->i_mutex); | ||
850 | |||
851 | if (inode->i_pipe) { | ||
852 | ret = 0; | ||
853 | inode->i_pipe->writers++; | ||
854 | } | ||
855 | |||
856 | mutex_unlock(&inode->i_mutex); | ||
857 | |||
858 | return ret; | ||
859 | } | ||
860 | |||
861 | static int | ||
862 | pipe_rdwr_open(struct inode *inode, struct file *filp) | ||
863 | { | ||
864 | int ret = -ENOENT; | ||
865 | |||
866 | if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) | ||
867 | return -EINVAL; | ||
868 | |||
869 | mutex_lock(&inode->i_mutex); | ||
870 | |||
871 | if (inode->i_pipe) { | ||
872 | ret = 0; | ||
873 | if (filp->f_mode & FMODE_READ) | ||
874 | inode->i_pipe->readers++; | ||
875 | if (filp->f_mode & FMODE_WRITE) | ||
876 | inode->i_pipe->writers++; | ||
877 | } | ||
878 | |||
879 | mutex_unlock(&inode->i_mutex); | ||
880 | |||
881 | return ret; | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | * The file_operations structs are not static because they | ||
886 | * are also used in linux/fs/fifo.c to do operations on FIFOs. | ||
887 | * | ||
888 | * Pipes reuse fifos' file_operations structs. | ||
889 | */ | ||
890 | const struct file_operations read_pipefifo_fops = { | ||
891 | .llseek = no_llseek, | ||
892 | .read = do_sync_read, | ||
893 | .aio_read = pipe_read, | ||
894 | .write = bad_pipe_w, | ||
895 | .poll = pipe_poll, | ||
896 | .unlocked_ioctl = pipe_ioctl, | ||
897 | .open = pipe_read_open, | ||
898 | .release = pipe_read_release, | ||
899 | .fasync = pipe_read_fasync, | ||
900 | }; | ||
901 | |||
902 | const struct file_operations write_pipefifo_fops = { | ||
903 | .llseek = no_llseek, | ||
904 | .read = bad_pipe_r, | ||
905 | .write = do_sync_write, | ||
906 | .aio_write = pipe_write, | ||
907 | .poll = pipe_poll, | ||
908 | .unlocked_ioctl = pipe_ioctl, | ||
909 | .open = pipe_write_open, | ||
910 | .release = pipe_write_release, | ||
911 | .fasync = pipe_write_fasync, | ||
912 | }; | ||
913 | |||
914 | const struct file_operations rdwr_pipefifo_fops = { | ||
915 | .llseek = no_llseek, | ||
916 | .read = do_sync_read, | ||
917 | .aio_read = pipe_read, | ||
918 | .write = do_sync_write, | ||
919 | .aio_write = pipe_write, | ||
920 | .poll = pipe_poll, | ||
921 | .unlocked_ioctl = pipe_ioctl, | ||
922 | .open = pipe_rdwr_open, | ||
923 | .release = pipe_rdwr_release, | ||
924 | .fasync = pipe_rdwr_fasync, | ||
925 | }; | ||
926 | |||
927 | struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | ||
928 | { | 779 | { |
929 | struct pipe_inode_info *pipe; | 780 | struct pipe_inode_info *pipe; |
930 | 781 | ||
@@ -934,8 +785,8 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
934 | if (pipe->bufs) { | 785 | if (pipe->bufs) { |
935 | init_waitqueue_head(&pipe->wait); | 786 | init_waitqueue_head(&pipe->wait); |
936 | pipe->r_counter = pipe->w_counter = 1; | 787 | pipe->r_counter = pipe->w_counter = 1; |
937 | pipe->inode = inode; | ||
938 | pipe->buffers = PIPE_DEF_BUFFERS; | 788 | pipe->buffers = PIPE_DEF_BUFFERS; |
789 | mutex_init(&pipe->mutex); | ||
939 | return pipe; | 790 | return pipe; |
940 | } | 791 | } |
941 | kfree(pipe); | 792 | kfree(pipe); |
@@ -944,7 +795,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
944 | return NULL; | 795 | return NULL; |
945 | } | 796 | } |
946 | 797 | ||
947 | void __free_pipe_info(struct pipe_inode_info *pipe) | 798 | void free_pipe_info(struct pipe_inode_info *pipe) |
948 | { | 799 | { |
949 | int i; | 800 | int i; |
950 | 801 | ||
@@ -959,12 +810,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe) | |||
959 | kfree(pipe); | 810 | kfree(pipe); |
960 | } | 811 | } |
961 | 812 | ||
962 | void free_pipe_info(struct inode *inode) | ||
963 | { | ||
964 | __free_pipe_info(inode->i_pipe); | ||
965 | inode->i_pipe = NULL; | ||
966 | } | ||
967 | |||
968 | static struct vfsmount *pipe_mnt __read_mostly; | 813 | static struct vfsmount *pipe_mnt __read_mostly; |
969 | 814 | ||
970 | /* | 815 | /* |
@@ -990,13 +835,14 @@ static struct inode * get_pipe_inode(void) | |||
990 | 835 | ||
991 | inode->i_ino = get_next_ino(); | 836 | inode->i_ino = get_next_ino(); |
992 | 837 | ||
993 | pipe = alloc_pipe_info(inode); | 838 | pipe = alloc_pipe_info(); |
994 | if (!pipe) | 839 | if (!pipe) |
995 | goto fail_iput; | 840 | goto fail_iput; |
996 | inode->i_pipe = pipe; | ||
997 | 841 | ||
842 | inode->i_pipe = pipe; | ||
843 | pipe->files = 2; | ||
998 | pipe->readers = pipe->writers = 1; | 844 | pipe->readers = pipe->writers = 1; |
999 | inode->i_fop = &rdwr_pipefifo_fops; | 845 | inode->i_fop = &pipefifo_fops; |
1000 | 846 | ||
1001 | /* | 847 | /* |
1002 | * Mark the inode dirty from the very beginning, | 848 | * Mark the inode dirty from the very beginning, |
@@ -1039,17 +885,19 @@ int create_pipe_files(struct file **res, int flags) | |||
1039 | d_instantiate(path.dentry, inode); | 885 | d_instantiate(path.dentry, inode); |
1040 | 886 | ||
1041 | err = -ENFILE; | 887 | err = -ENFILE; |
1042 | f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); | 888 | f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); |
1043 | if (IS_ERR(f)) | 889 | if (IS_ERR(f)) |
1044 | goto err_dentry; | 890 | goto err_dentry; |
1045 | 891 | ||
1046 | f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); | 892 | f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); |
893 | f->private_data = inode->i_pipe; | ||
1047 | 894 | ||
1048 | res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); | 895 | res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); |
1049 | if (IS_ERR(res[0])) | 896 | if (IS_ERR(res[0])) |
1050 | goto err_file; | 897 | goto err_file; |
1051 | 898 | ||
1052 | path_get(&path); | 899 | path_get(&path); |
900 | res[0]->private_data = inode->i_pipe; | ||
1053 | res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); | 901 | res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); |
1054 | res[1] = f; | 902 | res[1] = f; |
1055 | return 0; | 903 | return 0; |
@@ -1057,12 +905,12 @@ int create_pipe_files(struct file **res, int flags) | |||
1057 | err_file: | 905 | err_file: |
1058 | put_filp(f); | 906 | put_filp(f); |
1059 | err_dentry: | 907 | err_dentry: |
1060 | free_pipe_info(inode); | 908 | free_pipe_info(inode->i_pipe); |
1061 | path_put(&path); | 909 | path_put(&path); |
1062 | return err; | 910 | return err; |
1063 | 911 | ||
1064 | err_inode: | 912 | err_inode: |
1065 | free_pipe_info(inode); | 913 | free_pipe_info(inode->i_pipe); |
1066 | iput(inode); | 914 | iput(inode); |
1067 | return err; | 915 | return err; |
1068 | } | 916 | } |
@@ -1144,6 +992,168 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
1144 | return sys_pipe2(fildes, 0); | 992 | return sys_pipe2(fildes, 0); |
1145 | } | 993 | } |
1146 | 994 | ||
995 | static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) | ||
996 | { | ||
997 | int cur = *cnt; | ||
998 | |||
999 | while (cur == *cnt) { | ||
1000 | pipe_wait(pipe); | ||
1001 | if (signal_pending(current)) | ||
1002 | break; | ||
1003 | } | ||
1004 | return cur == *cnt ? -ERESTARTSYS : 0; | ||
1005 | } | ||
1006 | |||
1007 | static void wake_up_partner(struct pipe_inode_info *pipe) | ||
1008 | { | ||
1009 | wake_up_interruptible(&pipe->wait); | ||
1010 | } | ||
1011 | |||
1012 | static int fifo_open(struct inode *inode, struct file *filp) | ||
1013 | { | ||
1014 | struct pipe_inode_info *pipe; | ||
1015 | bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; | ||
1016 | int kill = 0; | ||
1017 | int ret; | ||
1018 | |||
1019 | filp->f_version = 0; | ||
1020 | |||
1021 | spin_lock(&inode->i_lock); | ||
1022 | if (inode->i_pipe) { | ||
1023 | pipe = inode->i_pipe; | ||
1024 | pipe->files++; | ||
1025 | spin_unlock(&inode->i_lock); | ||
1026 | } else { | ||
1027 | spin_unlock(&inode->i_lock); | ||
1028 | pipe = alloc_pipe_info(); | ||
1029 | if (!pipe) | ||
1030 | return -ENOMEM; | ||
1031 | pipe->files = 1; | ||
1032 | spin_lock(&inode->i_lock); | ||
1033 | if (unlikely(inode->i_pipe)) { | ||
1034 | inode->i_pipe->files++; | ||
1035 | spin_unlock(&inode->i_lock); | ||
1036 | free_pipe_info(pipe); | ||
1037 | pipe = inode->i_pipe; | ||
1038 | } else { | ||
1039 | inode->i_pipe = pipe; | ||
1040 | spin_unlock(&inode->i_lock); | ||
1041 | } | ||
1042 | } | ||
1043 | filp->private_data = pipe; | ||
1044 | /* OK, we have a pipe and it's pinned down */ | ||
1045 | |||
1046 | __pipe_lock(pipe); | ||
1047 | |||
1048 | /* We can only do regular read/write on fifos */ | ||
1049 | filp->f_mode &= (FMODE_READ | FMODE_WRITE); | ||
1050 | |||
1051 | switch (filp->f_mode) { | ||
1052 | case FMODE_READ: | ||
1053 | /* | ||
1054 | * O_RDONLY | ||
1055 | * POSIX.1 says that O_NONBLOCK means return with the FIFO | ||
1056 | * opened, even when there is no process writing the FIFO. | ||
1057 | */ | ||
1058 | pipe->r_counter++; | ||
1059 | if (pipe->readers++ == 0) | ||
1060 | wake_up_partner(pipe); | ||
1061 | |||
1062 | if (!is_pipe && !pipe->writers) { | ||
1063 | if ((filp->f_flags & O_NONBLOCK)) { | ||
1064 | /* suppress POLLHUP until we have | ||
1065 | * seen a writer */ | ||
1066 | filp->f_version = pipe->w_counter; | ||
1067 | } else { | ||
1068 | if (wait_for_partner(pipe, &pipe->w_counter)) | ||
1069 | goto err_rd; | ||
1070 | } | ||
1071 | } | ||
1072 | break; | ||
1073 | |||
1074 | case FMODE_WRITE: | ||
1075 | /* | ||
1076 | * O_WRONLY | ||
1077 | * POSIX.1 says that O_NONBLOCK means return -1 with | ||
1078 | * errno=ENXIO when there is no process reading the FIFO. | ||
1079 | */ | ||
1080 | ret = -ENXIO; | ||
1081 | if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) | ||
1082 | goto err; | ||
1083 | |||
1084 | pipe->w_counter++; | ||
1085 | if (!pipe->writers++) | ||
1086 | wake_up_partner(pipe); | ||
1087 | |||
1088 | if (!is_pipe && !pipe->readers) { | ||
1089 | if (wait_for_partner(pipe, &pipe->r_counter)) | ||
1090 | goto err_wr; | ||
1091 | } | ||
1092 | break; | ||
1093 | |||
1094 | case FMODE_READ | FMODE_WRITE: | ||
1095 | /* | ||
1096 | * O_RDWR | ||
1097 | * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. | ||
1098 | * This implementation will NEVER block on a O_RDWR open, since | ||
1099 | * the process can at least talk to itself. | ||
1100 | */ | ||
1101 | |||
1102 | pipe->readers++; | ||
1103 | pipe->writers++; | ||
1104 | pipe->r_counter++; | ||
1105 | pipe->w_counter++; | ||
1106 | if (pipe->readers == 1 || pipe->writers == 1) | ||
1107 | wake_up_partner(pipe); | ||
1108 | break; | ||
1109 | |||
1110 | default: | ||
1111 | ret = -EINVAL; | ||
1112 | goto err; | ||
1113 | } | ||
1114 | |||
1115 | /* Ok! */ | ||
1116 | __pipe_unlock(pipe); | ||
1117 | return 0; | ||
1118 | |||
1119 | err_rd: | ||
1120 | if (!--pipe->readers) | ||
1121 | wake_up_interruptible(&pipe->wait); | ||
1122 | ret = -ERESTARTSYS; | ||
1123 | goto err; | ||
1124 | |||
1125 | err_wr: | ||
1126 | if (!--pipe->writers) | ||
1127 | wake_up_interruptible(&pipe->wait); | ||
1128 | ret = -ERESTARTSYS; | ||
1129 | goto err; | ||
1130 | |||
1131 | err: | ||
1132 | spin_lock(&inode->i_lock); | ||
1133 | if (!--pipe->files) { | ||
1134 | inode->i_pipe = NULL; | ||
1135 | kill = 1; | ||
1136 | } | ||
1137 | spin_unlock(&inode->i_lock); | ||
1138 | __pipe_unlock(pipe); | ||
1139 | if (kill) | ||
1140 | free_pipe_info(pipe); | ||
1141 | return ret; | ||
1142 | } | ||
1143 | |||
1144 | const struct file_operations pipefifo_fops = { | ||
1145 | .open = fifo_open, | ||
1146 | .llseek = no_llseek, | ||
1147 | .read = do_sync_read, | ||
1148 | .aio_read = pipe_read, | ||
1149 | .write = do_sync_write, | ||
1150 | .aio_write = pipe_write, | ||
1151 | .poll = pipe_poll, | ||
1152 | .unlocked_ioctl = pipe_ioctl, | ||
1153 | .release = pipe_release, | ||
1154 | .fasync = pipe_fasync, | ||
1155 | }; | ||
1156 | |||
1147 | /* | 1157 | /* |
1148 | * Allocate a new array of pipe buffers and copy the info over. Returns the | 1158 | * Allocate a new array of pipe buffers and copy the info over. Returns the |
1149 | * pipe size if successful, or return -ERROR on error. | 1159 | * pipe size if successful, or return -ERROR on error. |
@@ -1229,9 +1239,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, | |||
1229 | */ | 1239 | */ |
1230 | struct pipe_inode_info *get_pipe_info(struct file *file) | 1240 | struct pipe_inode_info *get_pipe_info(struct file *file) |
1231 | { | 1241 | { |
1232 | struct inode *i = file_inode(file); | 1242 | return file->f_op == &pipefifo_fops ? file->private_data : NULL; |
1233 | |||
1234 | return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; | ||
1235 | } | 1243 | } |
1236 | 1244 | ||
1237 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | 1245 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) |
@@ -1243,7 +1251,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1243 | if (!pipe) | 1251 | if (!pipe) |
1244 | return -EBADF; | 1252 | return -EBADF; |
1245 | 1253 | ||
1246 | mutex_lock(&pipe->inode->i_mutex); | 1254 | __pipe_lock(pipe); |
1247 | 1255 | ||
1248 | switch (cmd) { | 1256 | switch (cmd) { |
1249 | case F_SETPIPE_SZ: { | 1257 | case F_SETPIPE_SZ: { |
@@ -1272,7 +1280,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1272 | } | 1280 | } |
1273 | 1281 | ||
1274 | out: | 1282 | out: |
1275 | mutex_unlock(&pipe->inode->i_mutex); | 1283 | __pipe_unlock(pipe); |
1276 | return ret; | 1284 | return ret; |
1277 | } | 1285 | } |
1278 | 1286 | ||
diff --git a/fs/pnode.c b/fs/pnode.c index 8b29d2164da6..3d2a7141b87a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -218,7 +218,7 @@ static struct mount *get_source(struct mount *dest, | |||
218 | * @source_mnt: source mount. | 218 | * @source_mnt: source mount. |
219 | * @tree_list : list of heads of trees to be attached. | 219 | * @tree_list : list of heads of trees to be attached. |
220 | */ | 220 | */ |
221 | int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | 221 | int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, |
222 | struct mount *source_mnt, struct list_head *tree_list) | 222 | struct mount *source_mnt, struct list_head *tree_list) |
223 | { | 223 | { |
224 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | 224 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; |
@@ -227,7 +227,6 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | |||
227 | struct mount *prev_dest_mnt = dest_mnt; | 227 | struct mount *prev_dest_mnt = dest_mnt; |
228 | struct mount *prev_src_mnt = source_mnt; | 228 | struct mount *prev_src_mnt = source_mnt; |
229 | LIST_HEAD(tmp_list); | 229 | LIST_HEAD(tmp_list); |
230 | LIST_HEAD(umount_list); | ||
231 | 230 | ||
232 | for (m = propagation_next(dest_mnt, dest_mnt); m; | 231 | for (m = propagation_next(dest_mnt, dest_mnt); m; |
233 | m = propagation_next(m, dest_mnt)) { | 232 | m = propagation_next(m, dest_mnt)) { |
@@ -250,8 +249,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | |||
250 | goto out; | 249 | goto out; |
251 | } | 250 | } |
252 | 251 | ||
253 | if (is_subdir(dest_dentry, m->mnt.mnt_root)) { | 252 | if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { |
254 | mnt_set_mountpoint(m, dest_dentry, child); | 253 | mnt_set_mountpoint(m, dest_mp, child); |
255 | list_add_tail(&child->mnt_hash, tree_list); | 254 | list_add_tail(&child->mnt_hash, tree_list); |
256 | } else { | 255 | } else { |
257 | /* | 256 | /* |
@@ -267,10 +266,9 @@ out: | |||
267 | br_write_lock(&vfsmount_lock); | 266 | br_write_lock(&vfsmount_lock); |
268 | while (!list_empty(&tmp_list)) { | 267 | while (!list_empty(&tmp_list)) { |
269 | child = list_first_entry(&tmp_list, struct mount, mnt_hash); | 268 | child = list_first_entry(&tmp_list, struct mount, mnt_hash); |
270 | umount_tree(child, 0, &umount_list); | 269 | umount_tree(child, 0); |
271 | } | 270 | } |
272 | br_write_unlock(&vfsmount_lock); | 271 | br_write_unlock(&vfsmount_lock); |
273 | release_mounts(&umount_list); | ||
274 | return ret; | 272 | return ret; |
275 | } | 273 | } |
276 | 274 | ||
diff --git a/fs/pnode.h b/fs/pnode.h index a0493d5ebfbf..b091445c1c4a 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -32,17 +32,16 @@ static inline void set_mnt_shared(struct mount *mnt) | |||
32 | } | 32 | } |
33 | 33 | ||
34 | void change_mnt_propagation(struct mount *, int); | 34 | void change_mnt_propagation(struct mount *, int); |
35 | int propagate_mnt(struct mount *, struct dentry *, struct mount *, | 35 | int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, |
36 | struct list_head *); | 36 | struct list_head *); |
37 | int propagate_umount(struct list_head *); | 37 | int propagate_umount(struct list_head *); |
38 | int propagate_mount_busy(struct mount *, int); | 38 | int propagate_mount_busy(struct mount *, int); |
39 | void mnt_release_group_id(struct mount *); | 39 | void mnt_release_group_id(struct mount *); |
40 | int get_dominating_id(struct mount *mnt, const struct path *root); | 40 | int get_dominating_id(struct mount *mnt, const struct path *root); |
41 | unsigned int mnt_get_count(struct mount *mnt); | 41 | unsigned int mnt_get_count(struct mount *mnt); |
42 | void mnt_set_mountpoint(struct mount *, struct dentry *, | 42 | void mnt_set_mountpoint(struct mount *, struct mountpoint *, |
43 | struct mount *); | 43 | struct mount *); |
44 | void release_mounts(struct list_head *); | 44 | void umount_tree(struct mount *, int); |
45 | void umount_tree(struct mount *, int, struct list_head *); | ||
46 | struct mount *copy_tree(struct mount *, struct dentry *, int); | 45 | struct mount *copy_tree(struct mount *, struct dentry *, int); |
47 | bool is_path_reachable(struct mount *, struct dentry *, | 46 | bool is_path_reachable(struct mount *, struct dentry *, |
48 | const struct path *root); | 47 | const struct path *root); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3861bcec41ff..dd51e50001fe 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -405,6 +405,37 @@ static const struct file_operations proc_lstats_operations = { | |||
405 | 405 | ||
406 | #endif | 406 | #endif |
407 | 407 | ||
408 | #ifdef CONFIG_CGROUPS | ||
409 | static int cgroup_open(struct inode *inode, struct file *file) | ||
410 | { | ||
411 | struct pid *pid = PROC_I(inode)->pid; | ||
412 | return single_open(file, proc_cgroup_show, pid); | ||
413 | } | ||
414 | |||
415 | static const struct file_operations proc_cgroup_operations = { | ||
416 | .open = cgroup_open, | ||
417 | .read = seq_read, | ||
418 | .llseek = seq_lseek, | ||
419 | .release = single_release, | ||
420 | }; | ||
421 | #endif | ||
422 | |||
423 | #ifdef CONFIG_PROC_PID_CPUSET | ||
424 | |||
425 | static int cpuset_open(struct inode *inode, struct file *file) | ||
426 | { | ||
427 | struct pid *pid = PROC_I(inode)->pid; | ||
428 | return single_open(file, proc_cpuset_show, pid); | ||
429 | } | ||
430 | |||
431 | static const struct file_operations proc_cpuset_operations = { | ||
432 | .open = cpuset_open, | ||
433 | .read = seq_read, | ||
434 | .llseek = seq_lseek, | ||
435 | .release = single_release, | ||
436 | }; | ||
437 | #endif | ||
438 | |||
408 | static int proc_oom_score(struct task_struct *task, char *buffer) | 439 | static int proc_oom_score(struct task_struct *task, char *buffer) |
409 | { | 440 | { |
410 | unsigned long totalpages = totalram_pages + total_swap_pages; | 441 | unsigned long totalpages = totalram_pages + total_swap_pages; |
@@ -1621,6 +1652,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) | |||
1621 | return 0; | 1652 | return 0; |
1622 | } | 1653 | } |
1623 | 1654 | ||
1655 | int pid_delete_dentry(const struct dentry *dentry) | ||
1656 | { | ||
1657 | /* Is the task we represent dead? | ||
1658 | * If so, then don't put the dentry on the lru list, | ||
1659 | * kill it immediately. | ||
1660 | */ | ||
1661 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1662 | } | ||
1663 | |||
1624 | const struct dentry_operations pid_dentry_operations = | 1664 | const struct dentry_operations pid_dentry_operations = |
1625 | { | 1665 | { |
1626 | .d_revalidate = pid_revalidate, | 1666 | .d_revalidate = pid_revalidate, |
@@ -2893,7 +2933,7 @@ retry: | |||
2893 | return iter; | 2933 | return iter; |
2894 | } | 2934 | } |
2895 | 2935 | ||
2896 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY) | 2936 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) |
2897 | 2937 | ||
2898 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 2938 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
2899 | struct tgid_iter iter) | 2939 | struct tgid_iter iter) |
@@ -2916,13 +2956,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2916 | struct tgid_iter iter; | 2956 | struct tgid_iter iter; |
2917 | struct pid_namespace *ns; | 2957 | struct pid_namespace *ns; |
2918 | filldir_t __filldir; | 2958 | filldir_t __filldir; |
2959 | loff_t pos = filp->f_pos; | ||
2919 | 2960 | ||
2920 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2961 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2921 | goto out; | 2962 | goto out; |
2922 | 2963 | ||
2923 | ns = filp->f_dentry->d_sb->s_fs_info; | 2964 | if (pos == TGID_OFFSET - 1) { |
2965 | if (proc_fill_cache(filp, dirent, filldir, "self", 4, | ||
2966 | NULL, NULL, NULL) < 0) | ||
2967 | goto out; | ||
2968 | iter.tgid = 0; | ||
2969 | } else { | ||
2970 | iter.tgid = pos - TGID_OFFSET; | ||
2971 | } | ||
2924 | iter.task = NULL; | 2972 | iter.task = NULL; |
2925 | iter.tgid = filp->f_pos - TGID_OFFSET; | 2973 | ns = filp->f_dentry->d_sb->s_fs_info; |
2926 | for (iter = next_tgid(ns, iter); | 2974 | for (iter = next_tgid(ns, iter); |
2927 | iter.task; | 2975 | iter.task; |
2928 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 2976 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
diff --git a/fs/proc/fd.h b/fs/proc/fd.h index cbb1d47deda8..7c047f256ae2 100644 --- a/fs/proc/fd.h +++ b/fs/proc/fd.h | |||
@@ -11,4 +11,9 @@ extern const struct inode_operations proc_fdinfo_inode_operations; | |||
11 | 11 | ||
12 | extern int proc_fd_permission(struct inode *inode, int mask); | 12 | extern int proc_fd_permission(struct inode *inode, int mask); |
13 | 13 | ||
14 | static inline int proc_fd(struct inode *inode) | ||
15 | { | ||
16 | return PROC_I(inode)->fd; | ||
17 | } | ||
18 | |||
14 | #endif /* __PROCFS_FD_H__ */ | 19 | #endif /* __PROCFS_FD_H__ */ |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 21e1a8f1659d..a2596afffae6 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -36,212 +36,6 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry | |||
36 | return !memcmp(name, de->name, len); | 36 | return !memcmp(name, de->name, len); |
37 | } | 37 | } |
38 | 38 | ||
39 | /* buffer size is one page but our output routines use some slack for overruns */ | ||
40 | #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) | ||
41 | |||
42 | static ssize_t | ||
43 | __proc_file_read(struct file *file, char __user *buf, size_t nbytes, | ||
44 | loff_t *ppos) | ||
45 | { | ||
46 | struct inode * inode = file_inode(file); | ||
47 | char *page; | ||
48 | ssize_t retval=0; | ||
49 | int eof=0; | ||
50 | ssize_t n, count; | ||
51 | char *start; | ||
52 | struct proc_dir_entry * dp; | ||
53 | unsigned long long pos; | ||
54 | |||
55 | /* | ||
56 | * Gaah, please just use "seq_file" instead. The legacy /proc | ||
57 | * interfaces cut loff_t down to off_t for reads, and ignore | ||
58 | * the offset entirely for writes.. | ||
59 | */ | ||
60 | pos = *ppos; | ||
61 | if (pos > MAX_NON_LFS) | ||
62 | return 0; | ||
63 | if (nbytes > MAX_NON_LFS - pos) | ||
64 | nbytes = MAX_NON_LFS - pos; | ||
65 | |||
66 | dp = PDE(inode); | ||
67 | if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) | ||
68 | return -ENOMEM; | ||
69 | |||
70 | while ((nbytes > 0) && !eof) { | ||
71 | count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); | ||
72 | |||
73 | start = NULL; | ||
74 | if (dp->read_proc) { | ||
75 | /* | ||
76 | * How to be a proc read function | ||
77 | * ------------------------------ | ||
78 | * Prototype: | ||
79 | * int f(char *buffer, char **start, off_t offset, | ||
80 | * int count, int *peof, void *dat) | ||
81 | * | ||
82 | * Assume that the buffer is "count" bytes in size. | ||
83 | * | ||
84 | * If you know you have supplied all the data you | ||
85 | * have, set *peof. | ||
86 | * | ||
87 | * You have three ways to return data: | ||
88 | * 0) Leave *start = NULL. (This is the default.) | ||
89 | * Put the data of the requested offset at that | ||
90 | * offset within the buffer. Return the number (n) | ||
91 | * of bytes there are from the beginning of the | ||
92 | * buffer up to the last byte of data. If the | ||
93 | * number of supplied bytes (= n - offset) is | ||
94 | * greater than zero and you didn't signal eof | ||
95 | * and the reader is prepared to take more data | ||
96 | * you will be called again with the requested | ||
97 | * offset advanced by the number of bytes | ||
98 | * absorbed. This interface is useful for files | ||
99 | * no larger than the buffer. | ||
100 | * 1) Set *start = an unsigned long value less than | ||
101 | * the buffer address but greater than zero. | ||
102 | * Put the data of the requested offset at the | ||
103 | * beginning of the buffer. Return the number of | ||
104 | * bytes of data placed there. If this number is | ||
105 | * greater than zero and you didn't signal eof | ||
106 | * and the reader is prepared to take more data | ||
107 | * you will be called again with the requested | ||
108 | * offset advanced by *start. This interface is | ||
109 | * useful when you have a large file consisting | ||
110 | * of a series of blocks which you want to count | ||
111 | * and return as wholes. | ||
112 | * (Hack by Paul.Russell@rustcorp.com.au) | ||
113 | * 2) Set *start = an address within the buffer. | ||
114 | * Put the data of the requested offset at *start. | ||
115 | * Return the number of bytes of data placed there. | ||
116 | * If this number is greater than zero and you | ||
117 | * didn't signal eof and the reader is prepared to | ||
118 | * take more data you will be called again with the | ||
119 | * requested offset advanced by the number of bytes | ||
120 | * absorbed. | ||
121 | */ | ||
122 | n = dp->read_proc(page, &start, *ppos, | ||
123 | count, &eof, dp->data); | ||
124 | } else | ||
125 | break; | ||
126 | |||
127 | if (n == 0) /* end of file */ | ||
128 | break; | ||
129 | if (n < 0) { /* error */ | ||
130 | if (retval == 0) | ||
131 | retval = n; | ||
132 | break; | ||
133 | } | ||
134 | |||
135 | if (start == NULL) { | ||
136 | if (n > PAGE_SIZE) /* Apparent buffer overflow */ | ||
137 | n = PAGE_SIZE; | ||
138 | n -= *ppos; | ||
139 | if (n <= 0) | ||
140 | break; | ||
141 | if (n > count) | ||
142 | n = count; | ||
143 | start = page + *ppos; | ||
144 | } else if (start < page) { | ||
145 | if (n > PAGE_SIZE) /* Apparent buffer overflow */ | ||
146 | n = PAGE_SIZE; | ||
147 | if (n > count) { | ||
148 | /* | ||
149 | * Don't reduce n because doing so might | ||
150 | * cut off part of a data block. | ||
151 | */ | ||
152 | pr_warn("proc_file_read: count exceeded\n"); | ||
153 | } | ||
154 | } else /* start >= page */ { | ||
155 | unsigned long startoff = (unsigned long)(start - page); | ||
156 | if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */ | ||
157 | n = PAGE_SIZE - startoff; | ||
158 | if (n > count) | ||
159 | n = count; | ||
160 | } | ||
161 | |||
162 | n -= copy_to_user(buf, start < page ? page : start, n); | ||
163 | if (n == 0) { | ||
164 | if (retval == 0) | ||
165 | retval = -EFAULT; | ||
166 | break; | ||
167 | } | ||
168 | |||
169 | *ppos += start < page ? (unsigned long)start : n; | ||
170 | nbytes -= n; | ||
171 | buf += n; | ||
172 | retval += n; | ||
173 | } | ||
174 | free_page((unsigned long) page); | ||
175 | return retval; | ||
176 | } | ||
177 | |||
178 | static ssize_t | ||
179 | proc_file_read(struct file *file, char __user *buf, size_t nbytes, | ||
180 | loff_t *ppos) | ||
181 | { | ||
182 | struct proc_dir_entry *pde = PDE(file_inode(file)); | ||
183 | ssize_t rv = -EIO; | ||
184 | |||
185 | spin_lock(&pde->pde_unload_lock); | ||
186 | if (!pde->proc_fops) { | ||
187 | spin_unlock(&pde->pde_unload_lock); | ||
188 | return rv; | ||
189 | } | ||
190 | pde->pde_users++; | ||
191 | spin_unlock(&pde->pde_unload_lock); | ||
192 | |||
193 | rv = __proc_file_read(file, buf, nbytes, ppos); | ||
194 | |||
195 | pde_users_dec(pde); | ||
196 | return rv; | ||
197 | } | ||
198 | |||
199 | static ssize_t | ||
200 | proc_file_write(struct file *file, const char __user *buffer, | ||
201 | size_t count, loff_t *ppos) | ||
202 | { | ||
203 | struct proc_dir_entry *pde = PDE(file_inode(file)); | ||
204 | ssize_t rv = -EIO; | ||
205 | |||
206 | if (pde->write_proc) { | ||
207 | spin_lock(&pde->pde_unload_lock); | ||
208 | if (!pde->proc_fops) { | ||
209 | spin_unlock(&pde->pde_unload_lock); | ||
210 | return rv; | ||
211 | } | ||
212 | pde->pde_users++; | ||
213 | spin_unlock(&pde->pde_unload_lock); | ||
214 | |||
215 | /* FIXME: does this routine need ppos? probably... */ | ||
216 | rv = pde->write_proc(file, buffer, count, pde->data); | ||
217 | pde_users_dec(pde); | ||
218 | } | ||
219 | return rv; | ||
220 | } | ||
221 | |||
222 | |||
223 | static loff_t | ||
224 | proc_file_lseek(struct file *file, loff_t offset, int orig) | ||
225 | { | ||
226 | loff_t retval = -EINVAL; | ||
227 | switch (orig) { | ||
228 | case 1: | ||
229 | offset += file->f_pos; | ||
230 | /* fallthrough */ | ||
231 | case 0: | ||
232 | if (offset < 0 || offset > MAX_NON_LFS) | ||
233 | break; | ||
234 | file->f_pos = retval = offset; | ||
235 | } | ||
236 | return retval; | ||
237 | } | ||
238 | |||
239 | static const struct file_operations proc_file_operations = { | ||
240 | .llseek = proc_file_lseek, | ||
241 | .read = proc_file_read, | ||
242 | .write = proc_file_write, | ||
243 | }; | ||
244 | |||
245 | static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) | 39 | static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) |
246 | { | 40 | { |
247 | struct inode *inode = dentry->d_inode; | 41 | struct inode *inode = dentry->d_inode; |
@@ -371,7 +165,7 @@ void proc_free_inum(unsigned int inum) | |||
371 | 165 | ||
372 | static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) | 166 | static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) |
373 | { | 167 | { |
374 | nd_set_link(nd, PDE(dentry->d_inode)->data); | 168 | nd_set_link(nd, __PDE_DATA(dentry->d_inode)); |
375 | return NULL; | 169 | return NULL; |
376 | } | 170 | } |
377 | 171 | ||
@@ -541,19 +335,17 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
541 | return ret; | 335 | return ret; |
542 | 336 | ||
543 | if (S_ISDIR(dp->mode)) { | 337 | if (S_ISDIR(dp->mode)) { |
544 | if (dp->proc_iops == NULL) { | 338 | dp->proc_fops = &proc_dir_operations; |
545 | dp->proc_fops = &proc_dir_operations; | 339 | dp->proc_iops = &proc_dir_inode_operations; |
546 | dp->proc_iops = &proc_dir_inode_operations; | ||
547 | } | ||
548 | dir->nlink++; | 340 | dir->nlink++; |
549 | } else if (S_ISLNK(dp->mode)) { | 341 | } else if (S_ISLNK(dp->mode)) { |
550 | if (dp->proc_iops == NULL) | 342 | dp->proc_iops = &proc_link_inode_operations; |
551 | dp->proc_iops = &proc_link_inode_operations; | ||
552 | } else if (S_ISREG(dp->mode)) { | 343 | } else if (S_ISREG(dp->mode)) { |
553 | if (dp->proc_fops == NULL) | 344 | BUG_ON(dp->proc_fops == NULL); |
554 | dp->proc_fops = &proc_file_operations; | 345 | dp->proc_iops = &proc_file_inode_operations; |
555 | if (dp->proc_iops == NULL) | 346 | } else { |
556 | dp->proc_iops = &proc_file_inode_operations; | 347 | WARN_ON(1); |
348 | return -EINVAL; | ||
557 | } | 349 | } |
558 | 350 | ||
559 | spin_lock(&proc_subdir_lock); | 351 | spin_lock(&proc_subdir_lock); |
@@ -636,13 +428,17 @@ struct proc_dir_entry *proc_symlink(const char *name, | |||
636 | } | 428 | } |
637 | EXPORT_SYMBOL(proc_symlink); | 429 | EXPORT_SYMBOL(proc_symlink); |
638 | 430 | ||
639 | struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, | 431 | struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, |
640 | struct proc_dir_entry *parent) | 432 | struct proc_dir_entry *parent, void *data) |
641 | { | 433 | { |
642 | struct proc_dir_entry *ent; | 434 | struct proc_dir_entry *ent; |
643 | 435 | ||
436 | if (mode == 0) | ||
437 | mode = S_IRUGO | S_IXUGO; | ||
438 | |||
644 | ent = __proc_create(&parent, name, S_IFDIR | mode, 2); | 439 | ent = __proc_create(&parent, name, S_IFDIR | mode, 2); |
645 | if (ent) { | 440 | if (ent) { |
441 | ent->data = data; | ||
646 | if (proc_register(parent, ent) < 0) { | 442 | if (proc_register(parent, ent) < 0) { |
647 | kfree(ent); | 443 | kfree(ent); |
648 | ent = NULL; | 444 | ent = NULL; |
@@ -650,82 +446,39 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, | |||
650 | } | 446 | } |
651 | return ent; | 447 | return ent; |
652 | } | 448 | } |
653 | EXPORT_SYMBOL(proc_mkdir_mode); | 449 | EXPORT_SYMBOL_GPL(proc_mkdir_data); |
654 | 450 | ||
655 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, | 451 | struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, |
656 | struct proc_dir_entry *parent) | 452 | struct proc_dir_entry *parent) |
657 | { | 453 | { |
658 | struct proc_dir_entry *ent; | 454 | return proc_mkdir_data(name, mode, parent, NULL); |
659 | |||
660 | ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); | ||
661 | if (ent) { | ||
662 | ent->data = net; | ||
663 | if (proc_register(parent, ent) < 0) { | ||
664 | kfree(ent); | ||
665 | ent = NULL; | ||
666 | } | ||
667 | } | ||
668 | return ent; | ||
669 | } | 455 | } |
670 | EXPORT_SYMBOL_GPL(proc_net_mkdir); | 456 | EXPORT_SYMBOL(proc_mkdir_mode); |
671 | 457 | ||
672 | struct proc_dir_entry *proc_mkdir(const char *name, | 458 | struct proc_dir_entry *proc_mkdir(const char *name, |
673 | struct proc_dir_entry *parent) | 459 | struct proc_dir_entry *parent) |
674 | { | 460 | { |
675 | return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); | 461 | return proc_mkdir_data(name, 0, parent, NULL); |
676 | } | 462 | } |
677 | EXPORT_SYMBOL(proc_mkdir); | 463 | EXPORT_SYMBOL(proc_mkdir); |
678 | 464 | ||
679 | struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, | ||
680 | struct proc_dir_entry *parent) | ||
681 | { | ||
682 | struct proc_dir_entry *ent; | ||
683 | nlink_t nlink; | ||
684 | |||
685 | if (S_ISDIR(mode)) { | ||
686 | if ((mode & S_IALLUGO) == 0) | ||
687 | mode |= S_IRUGO | S_IXUGO; | ||
688 | nlink = 2; | ||
689 | } else { | ||
690 | if ((mode & S_IFMT) == 0) | ||
691 | mode |= S_IFREG; | ||
692 | if ((mode & S_IALLUGO) == 0) | ||
693 | mode |= S_IRUGO; | ||
694 | nlink = 1; | ||
695 | } | ||
696 | |||
697 | ent = __proc_create(&parent, name, mode, nlink); | ||
698 | if (ent) { | ||
699 | if (proc_register(parent, ent) < 0) { | ||
700 | kfree(ent); | ||
701 | ent = NULL; | ||
702 | } | ||
703 | } | ||
704 | return ent; | ||
705 | } | ||
706 | EXPORT_SYMBOL(create_proc_entry); | ||
707 | |||
708 | struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, | 465 | struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, |
709 | struct proc_dir_entry *parent, | 466 | struct proc_dir_entry *parent, |
710 | const struct file_operations *proc_fops, | 467 | const struct file_operations *proc_fops, |
711 | void *data) | 468 | void *data) |
712 | { | 469 | { |
713 | struct proc_dir_entry *pde; | 470 | struct proc_dir_entry *pde; |
714 | nlink_t nlink; | 471 | if ((mode & S_IFMT) == 0) |
472 | mode |= S_IFREG; | ||
715 | 473 | ||
716 | if (S_ISDIR(mode)) { | 474 | if (!S_ISREG(mode)) { |
717 | if ((mode & S_IALLUGO) == 0) | 475 | WARN_ON(1); /* use proc_mkdir() */ |
718 | mode |= S_IRUGO | S_IXUGO; | 476 | return NULL; |
719 | nlink = 2; | ||
720 | } else { | ||
721 | if ((mode & S_IFMT) == 0) | ||
722 | mode |= S_IFREG; | ||
723 | if ((mode & S_IALLUGO) == 0) | ||
724 | mode |= S_IRUGO; | ||
725 | nlink = 1; | ||
726 | } | 477 | } |
727 | 478 | ||
728 | pde = __proc_create(&parent, name, mode, nlink); | 479 | if ((mode & S_IALLUGO) == 0) |
480 | mode |= S_IRUGO; | ||
481 | pde = __proc_create(&parent, name, mode, 1); | ||
729 | if (!pde) | 482 | if (!pde) |
730 | goto out; | 483 | goto out; |
731 | pde->proc_fops = proc_fops; | 484 | pde->proc_fops = proc_fops; |
@@ -739,6 +492,19 @@ out: | |||
739 | return NULL; | 492 | return NULL; |
740 | } | 493 | } |
741 | EXPORT_SYMBOL(proc_create_data); | 494 | EXPORT_SYMBOL(proc_create_data); |
495 | |||
496 | void proc_set_size(struct proc_dir_entry *de, loff_t size) | ||
497 | { | ||
498 | de->size = size; | ||
499 | } | ||
500 | EXPORT_SYMBOL(proc_set_size); | ||
501 | |||
502 | void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) | ||
503 | { | ||
504 | de->uid = uid; | ||
505 | de->gid = gid; | ||
506 | } | ||
507 | EXPORT_SYMBOL(proc_set_user); | ||
742 | 508 | ||
743 | static void free_proc_entry(struct proc_dir_entry *de) | 509 | static void free_proc_entry(struct proc_dir_entry *de) |
744 | { | 510 | { |
@@ -755,41 +521,6 @@ void pde_put(struct proc_dir_entry *pde) | |||
755 | free_proc_entry(pde); | 521 | free_proc_entry(pde); |
756 | } | 522 | } |
757 | 523 | ||
758 | static void entry_rundown(struct proc_dir_entry *de) | ||
759 | { | ||
760 | spin_lock(&de->pde_unload_lock); | ||
761 | /* | ||
762 | * Stop accepting new callers into module. If you're | ||
763 | * dynamically allocating ->proc_fops, save a pointer somewhere. | ||
764 | */ | ||
765 | de->proc_fops = NULL; | ||
766 | /* Wait until all existing callers into module are done. */ | ||
767 | if (de->pde_users > 0) { | ||
768 | DECLARE_COMPLETION_ONSTACK(c); | ||
769 | |||
770 | if (!de->pde_unload_completion) | ||
771 | de->pde_unload_completion = &c; | ||
772 | |||
773 | spin_unlock(&de->pde_unload_lock); | ||
774 | |||
775 | wait_for_completion(de->pde_unload_completion); | ||
776 | |||
777 | spin_lock(&de->pde_unload_lock); | ||
778 | } | ||
779 | |||
780 | while (!list_empty(&de->pde_openers)) { | ||
781 | struct pde_opener *pdeo; | ||
782 | |||
783 | pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); | ||
784 | list_del(&pdeo->lh); | ||
785 | spin_unlock(&de->pde_unload_lock); | ||
786 | pdeo->release(pdeo->inode, pdeo->file); | ||
787 | kfree(pdeo); | ||
788 | spin_lock(&de->pde_unload_lock); | ||
789 | } | ||
790 | spin_unlock(&de->pde_unload_lock); | ||
791 | } | ||
792 | |||
793 | /* | 524 | /* |
794 | * Remove a /proc entry and free it if it's not currently in use. | 525 | * Remove a /proc entry and free it if it's not currently in use. |
795 | */ | 526 | */ |
@@ -821,7 +552,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
821 | return; | 552 | return; |
822 | } | 553 | } |
823 | 554 | ||
824 | entry_rundown(de); | 555 | proc_entry_rundown(de); |
825 | 556 | ||
826 | if (S_ISDIR(de->mode)) | 557 | if (S_ISDIR(de->mode)) |
827 | parent->nlink--; | 558 | parent->nlink--; |
@@ -870,7 +601,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
870 | } | 601 | } |
871 | spin_unlock(&proc_subdir_lock); | 602 | spin_unlock(&proc_subdir_lock); |
872 | 603 | ||
873 | entry_rundown(de); | 604 | proc_entry_rundown(de); |
874 | next = de->parent; | 605 | next = de->parent; |
875 | if (S_ISDIR(de->mode)) | 606 | if (S_ISDIR(de->mode)) |
876 | next->nlink--; | 607 | next->nlink--; |
@@ -886,3 +617,23 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
886 | return 0; | 617 | return 0; |
887 | } | 618 | } |
888 | EXPORT_SYMBOL(remove_proc_subtree); | 619 | EXPORT_SYMBOL(remove_proc_subtree); |
620 | |||
621 | void *proc_get_parent_data(const struct inode *inode) | ||
622 | { | ||
623 | struct proc_dir_entry *de = PDE(inode); | ||
624 | return de->parent->data; | ||
625 | } | ||
626 | EXPORT_SYMBOL_GPL(proc_get_parent_data); | ||
627 | |||
628 | void proc_remove(struct proc_dir_entry *de) | ||
629 | { | ||
630 | if (de) | ||
631 | remove_proc_subtree(de->name, de->parent); | ||
632 | } | ||
633 | EXPORT_SYMBOL(proc_remove); | ||
634 | |||
635 | void *PDE_DATA(const struct inode *inode) | ||
636 | { | ||
637 | return __PDE_DATA(inode); | ||
638 | } | ||
639 | EXPORT_SYMBOL(PDE_DATA); | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 869116c2afbe..073aea60cf8f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/seq_file.h> | 22 | #include <linux/seq_file.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
25 | #include <linux/magic.h> | ||
25 | 26 | ||
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | 28 | ||
@@ -50,8 +51,8 @@ static void proc_evict_inode(struct inode *inode) | |||
50 | sysctl_head_put(head); | 51 | sysctl_head_put(head); |
51 | } | 52 | } |
52 | /* Release any associated namespace */ | 53 | /* Release any associated namespace */ |
53 | ns_ops = PROC_I(inode)->ns_ops; | 54 | ns_ops = PROC_I(inode)->ns.ns_ops; |
54 | ns = PROC_I(inode)->ns; | 55 | ns = PROC_I(inode)->ns.ns; |
55 | if (ns_ops && ns) | 56 | if (ns_ops && ns) |
56 | ns_ops->put(ns); | 57 | ns_ops->put(ns); |
57 | } | 58 | } |
@@ -72,8 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) | |||
72 | ei->pde = NULL; | 73 | ei->pde = NULL; |
73 | ei->sysctl = NULL; | 74 | ei->sysctl = NULL; |
74 | ei->sysctl_entry = NULL; | 75 | ei->sysctl_entry = NULL; |
75 | ei->ns = NULL; | 76 | ei->ns.ns = NULL; |
76 | ei->ns_ops = NULL; | 77 | ei->ns.ns_ops = NULL; |
77 | inode = &ei->vfs_inode; | 78 | inode = &ei->vfs_inode; |
78 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 79 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
79 | return inode; | 80 | return inode; |
@@ -129,96 +130,100 @@ static const struct super_operations proc_sops = { | |||
129 | .show_options = proc_show_options, | 130 | .show_options = proc_show_options, |
130 | }; | 131 | }; |
131 | 132 | ||
132 | static void __pde_users_dec(struct proc_dir_entry *pde) | 133 | enum {BIAS = -1U<<31}; |
134 | |||
135 | static inline int use_pde(struct proc_dir_entry *pde) | ||
136 | { | ||
137 | return atomic_inc_unless_negative(&pde->in_use); | ||
138 | } | ||
139 | |||
140 | static void unuse_pde(struct proc_dir_entry *pde) | ||
133 | { | 141 | { |
134 | pde->pde_users--; | 142 | if (atomic_dec_return(&pde->in_use) == BIAS) |
135 | if (pde->pde_unload_completion && pde->pde_users == 0) | ||
136 | complete(pde->pde_unload_completion); | 143 | complete(pde->pde_unload_completion); |
137 | } | 144 | } |
138 | 145 | ||
139 | void pde_users_dec(struct proc_dir_entry *pde) | 146 | /* pde is locked */ |
147 | static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | ||
140 | { | 148 | { |
141 | spin_lock(&pde->pde_unload_lock); | 149 | if (pdeo->closing) { |
142 | __pde_users_dec(pde); | 150 | /* somebody else is doing that, just wait */ |
143 | spin_unlock(&pde->pde_unload_lock); | 151 | DECLARE_COMPLETION_ONSTACK(c); |
152 | pdeo->c = &c; | ||
153 | spin_unlock(&pde->pde_unload_lock); | ||
154 | wait_for_completion(&c); | ||
155 | spin_lock(&pde->pde_unload_lock); | ||
156 | } else { | ||
157 | struct file *file; | ||
158 | pdeo->closing = 1; | ||
159 | spin_unlock(&pde->pde_unload_lock); | ||
160 | file = pdeo->file; | ||
161 | pde->proc_fops->release(file_inode(file), file); | ||
162 | spin_lock(&pde->pde_unload_lock); | ||
163 | list_del_init(&pdeo->lh); | ||
164 | if (pdeo->c) | ||
165 | complete(pdeo->c); | ||
166 | kfree(pdeo); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | void proc_entry_rundown(struct proc_dir_entry *de) | ||
171 | { | ||
172 | DECLARE_COMPLETION_ONSTACK(c); | ||
173 | /* Wait until all existing callers into module are done. */ | ||
174 | de->pde_unload_completion = &c; | ||
175 | if (atomic_add_return(BIAS, &de->in_use) != BIAS) | ||
176 | wait_for_completion(&c); | ||
177 | |||
178 | spin_lock(&de->pde_unload_lock); | ||
179 | while (!list_empty(&de->pde_openers)) { | ||
180 | struct pde_opener *pdeo; | ||
181 | pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); | ||
182 | close_pdeo(de, pdeo); | ||
183 | } | ||
184 | spin_unlock(&de->pde_unload_lock); | ||
144 | } | 185 | } |
145 | 186 | ||
146 | static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) | 187 | static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) |
147 | { | 188 | { |
148 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 189 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
149 | loff_t rv = -EINVAL; | 190 | loff_t rv = -EINVAL; |
150 | loff_t (*llseek)(struct file *, loff_t, int); | 191 | if (use_pde(pde)) { |
151 | 192 | loff_t (*llseek)(struct file *, loff_t, int); | |
152 | spin_lock(&pde->pde_unload_lock); | 193 | llseek = pde->proc_fops->llseek; |
153 | /* | 194 | if (!llseek) |
154 | * remove_proc_entry() is going to delete PDE (as part of module | 195 | llseek = default_llseek; |
155 | * cleanup sequence). No new callers into module allowed. | 196 | rv = llseek(file, offset, whence); |
156 | */ | 197 | unuse_pde(pde); |
157 | if (!pde->proc_fops) { | ||
158 | spin_unlock(&pde->pde_unload_lock); | ||
159 | return rv; | ||
160 | } | 198 | } |
161 | /* | ||
162 | * Bump refcount so that remove_proc_entry will wail for ->llseek to | ||
163 | * complete. | ||
164 | */ | ||
165 | pde->pde_users++; | ||
166 | /* | ||
167 | * Save function pointer under lock, to protect against ->proc_fops | ||
168 | * NULL'ifying right after ->pde_unload_lock is dropped. | ||
169 | */ | ||
170 | llseek = pde->proc_fops->llseek; | ||
171 | spin_unlock(&pde->pde_unload_lock); | ||
172 | |||
173 | if (!llseek) | ||
174 | llseek = default_llseek; | ||
175 | rv = llseek(file, offset, whence); | ||
176 | |||
177 | pde_users_dec(pde); | ||
178 | return rv; | 199 | return rv; |
179 | } | 200 | } |
180 | 201 | ||
181 | static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 202 | static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
182 | { | 203 | { |
204 | ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); | ||
183 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 205 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
184 | ssize_t rv = -EIO; | 206 | ssize_t rv = -EIO; |
185 | ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); | 207 | if (use_pde(pde)) { |
186 | 208 | read = pde->proc_fops->read; | |
187 | spin_lock(&pde->pde_unload_lock); | 209 | if (read) |
188 | if (!pde->proc_fops) { | 210 | rv = read(file, buf, count, ppos); |
189 | spin_unlock(&pde->pde_unload_lock); | 211 | unuse_pde(pde); |
190 | return rv; | ||
191 | } | 212 | } |
192 | pde->pde_users++; | ||
193 | read = pde->proc_fops->read; | ||
194 | spin_unlock(&pde->pde_unload_lock); | ||
195 | |||
196 | if (read) | ||
197 | rv = read(file, buf, count, ppos); | ||
198 | |||
199 | pde_users_dec(pde); | ||
200 | return rv; | 213 | return rv; |
201 | } | 214 | } |
202 | 215 | ||
203 | static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) | 216 | static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) |
204 | { | 217 | { |
218 | ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); | ||
205 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 219 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
206 | ssize_t rv = -EIO; | 220 | ssize_t rv = -EIO; |
207 | ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); | 221 | if (use_pde(pde)) { |
208 | 222 | write = pde->proc_fops->write; | |
209 | spin_lock(&pde->pde_unload_lock); | 223 | if (write) |
210 | if (!pde->proc_fops) { | 224 | rv = write(file, buf, count, ppos); |
211 | spin_unlock(&pde->pde_unload_lock); | 225 | unuse_pde(pde); |
212 | return rv; | ||
213 | } | 226 | } |
214 | pde->pde_users++; | ||
215 | write = pde->proc_fops->write; | ||
216 | spin_unlock(&pde->pde_unload_lock); | ||
217 | |||
218 | if (write) | ||
219 | rv = write(file, buf, count, ppos); | ||
220 | |||
221 | pde_users_dec(pde); | ||
222 | return rv; | 227 | return rv; |
223 | } | 228 | } |
224 | 229 | ||
@@ -227,20 +232,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p | |||
227 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 232 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
228 | unsigned int rv = DEFAULT_POLLMASK; | 233 | unsigned int rv = DEFAULT_POLLMASK; |
229 | unsigned int (*poll)(struct file *, struct poll_table_struct *); | 234 | unsigned int (*poll)(struct file *, struct poll_table_struct *); |
230 | 235 | if (use_pde(pde)) { | |
231 | spin_lock(&pde->pde_unload_lock); | 236 | poll = pde->proc_fops->poll; |
232 | if (!pde->proc_fops) { | 237 | if (poll) |
233 | spin_unlock(&pde->pde_unload_lock); | 238 | rv = poll(file, pts); |
234 | return rv; | 239 | unuse_pde(pde); |
235 | } | 240 | } |
236 | pde->pde_users++; | ||
237 | poll = pde->proc_fops->poll; | ||
238 | spin_unlock(&pde->pde_unload_lock); | ||
239 | |||
240 | if (poll) | ||
241 | rv = poll(file, pts); | ||
242 | |||
243 | pde_users_dec(pde); | ||
244 | return rv; | 241 | return rv; |
245 | } | 242 | } |
246 | 243 | ||
@@ -249,20 +246,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne | |||
249 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 246 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
250 | long rv = -ENOTTY; | 247 | long rv = -ENOTTY; |
251 | long (*ioctl)(struct file *, unsigned int, unsigned long); | 248 | long (*ioctl)(struct file *, unsigned int, unsigned long); |
252 | 249 | if (use_pde(pde)) { | |
253 | spin_lock(&pde->pde_unload_lock); | 250 | ioctl = pde->proc_fops->unlocked_ioctl; |
254 | if (!pde->proc_fops) { | 251 | if (ioctl) |
255 | spin_unlock(&pde->pde_unload_lock); | 252 | rv = ioctl(file, cmd, arg); |
256 | return rv; | 253 | unuse_pde(pde); |
257 | } | 254 | } |
258 | pde->pde_users++; | ||
259 | ioctl = pde->proc_fops->unlocked_ioctl; | ||
260 | spin_unlock(&pde->pde_unload_lock); | ||
261 | |||
262 | if (ioctl) | ||
263 | rv = ioctl(file, cmd, arg); | ||
264 | |||
265 | pde_users_dec(pde); | ||
266 | return rv; | 255 | return rv; |
267 | } | 256 | } |
268 | 257 | ||
@@ -272,20 +261,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned | |||
272 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 261 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
273 | long rv = -ENOTTY; | 262 | long rv = -ENOTTY; |
274 | long (*compat_ioctl)(struct file *, unsigned int, unsigned long); | 263 | long (*compat_ioctl)(struct file *, unsigned int, unsigned long); |
275 | 264 | if (use_pde(pde)) { | |
276 | spin_lock(&pde->pde_unload_lock); | 265 | compat_ioctl = pde->proc_fops->compat_ioctl; |
277 | if (!pde->proc_fops) { | 266 | if (compat_ioctl) |
278 | spin_unlock(&pde->pde_unload_lock); | 267 | rv = compat_ioctl(file, cmd, arg); |
279 | return rv; | 268 | unuse_pde(pde); |
280 | } | 269 | } |
281 | pde->pde_users++; | ||
282 | compat_ioctl = pde->proc_fops->compat_ioctl; | ||
283 | spin_unlock(&pde->pde_unload_lock); | ||
284 | |||
285 | if (compat_ioctl) | ||
286 | rv = compat_ioctl(file, cmd, arg); | ||
287 | |||
288 | pde_users_dec(pde); | ||
289 | return rv; | 270 | return rv; |
290 | } | 271 | } |
291 | #endif | 272 | #endif |
@@ -295,20 +276,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) | |||
295 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 276 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
296 | int rv = -EIO; | 277 | int rv = -EIO; |
297 | int (*mmap)(struct file *, struct vm_area_struct *); | 278 | int (*mmap)(struct file *, struct vm_area_struct *); |
298 | 279 | if (use_pde(pde)) { | |
299 | spin_lock(&pde->pde_unload_lock); | 280 | mmap = pde->proc_fops->mmap; |
300 | if (!pde->proc_fops) { | 281 | if (mmap) |
301 | spin_unlock(&pde->pde_unload_lock); | 282 | rv = mmap(file, vma); |
302 | return rv; | 283 | unuse_pde(pde); |
303 | } | 284 | } |
304 | pde->pde_users++; | ||
305 | mmap = pde->proc_fops->mmap; | ||
306 | spin_unlock(&pde->pde_unload_lock); | ||
307 | |||
308 | if (mmap) | ||
309 | rv = mmap(file, vma); | ||
310 | |||
311 | pde_users_dec(pde); | ||
312 | return rv; | 285 | return rv; |
313 | } | 286 | } |
314 | 287 | ||
@@ -330,91 +303,47 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
330 | * by hand in remove_proc_entry(). For this, save opener's credentials | 303 | * by hand in remove_proc_entry(). For this, save opener's credentials |
331 | * for later. | 304 | * for later. |
332 | */ | 305 | */ |
333 | pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); | 306 | pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL); |
334 | if (!pdeo) | 307 | if (!pdeo) |
335 | return -ENOMEM; | 308 | return -ENOMEM; |
336 | 309 | ||
337 | spin_lock(&pde->pde_unload_lock); | 310 | if (!use_pde(pde)) { |
338 | if (!pde->proc_fops) { | ||
339 | spin_unlock(&pde->pde_unload_lock); | ||
340 | kfree(pdeo); | 311 | kfree(pdeo); |
341 | return -ENOENT; | 312 | return -ENOENT; |
342 | } | 313 | } |
343 | pde->pde_users++; | ||
344 | open = pde->proc_fops->open; | 314 | open = pde->proc_fops->open; |
345 | release = pde->proc_fops->release; | 315 | release = pde->proc_fops->release; |
346 | spin_unlock(&pde->pde_unload_lock); | ||
347 | 316 | ||
348 | if (open) | 317 | if (open) |
349 | rv = open(inode, file); | 318 | rv = open(inode, file); |
350 | 319 | ||
351 | spin_lock(&pde->pde_unload_lock); | ||
352 | if (rv == 0 && release) { | 320 | if (rv == 0 && release) { |
353 | /* To know what to release. */ | 321 | /* To know what to release. */ |
354 | pdeo->inode = inode; | ||
355 | pdeo->file = file; | 322 | pdeo->file = file; |
356 | /* Strictly for "too late" ->release in proc_reg_release(). */ | 323 | /* Strictly for "too late" ->release in proc_reg_release(). */ |
357 | pdeo->release = release; | 324 | spin_lock(&pde->pde_unload_lock); |
358 | list_add(&pdeo->lh, &pde->pde_openers); | 325 | list_add(&pdeo->lh, &pde->pde_openers); |
326 | spin_unlock(&pde->pde_unload_lock); | ||
359 | } else | 327 | } else |
360 | kfree(pdeo); | 328 | kfree(pdeo); |
361 | __pde_users_dec(pde); | ||
362 | spin_unlock(&pde->pde_unload_lock); | ||
363 | return rv; | ||
364 | } | ||
365 | |||
366 | static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, | ||
367 | struct inode *inode, struct file *file) | ||
368 | { | ||
369 | struct pde_opener *pdeo; | ||
370 | 329 | ||
371 | list_for_each_entry(pdeo, &pde->pde_openers, lh) { | 330 | unuse_pde(pde); |
372 | if (pdeo->inode == inode && pdeo->file == file) | 331 | return rv; |
373 | return pdeo; | ||
374 | } | ||
375 | return NULL; | ||
376 | } | 332 | } |
377 | 333 | ||
378 | static int proc_reg_release(struct inode *inode, struct file *file) | 334 | static int proc_reg_release(struct inode *inode, struct file *file) |
379 | { | 335 | { |
380 | struct proc_dir_entry *pde = PDE(inode); | 336 | struct proc_dir_entry *pde = PDE(inode); |
381 | int rv = 0; | ||
382 | int (*release)(struct inode *, struct file *); | ||
383 | struct pde_opener *pdeo; | 337 | struct pde_opener *pdeo; |
384 | |||
385 | spin_lock(&pde->pde_unload_lock); | 338 | spin_lock(&pde->pde_unload_lock); |
386 | pdeo = find_pde_opener(pde, inode, file); | 339 | list_for_each_entry(pdeo, &pde->pde_openers, lh) { |
387 | if (!pde->proc_fops) { | 340 | if (pdeo->file == file) { |
388 | /* | 341 | close_pdeo(pde, pdeo); |
389 | * Can't simply exit, __fput() will think that everything is OK, | 342 | break; |
390 | * and move on to freeing struct file. remove_proc_entry() will | 343 | } |
391 | * find slacker in opener's list and will try to do non-trivial | ||
392 | * things with struct file. Therefore, remove opener from list. | ||
393 | * | ||
394 | * But if opener is removed from list, who will ->release it? | ||
395 | */ | ||
396 | if (pdeo) { | ||
397 | list_del(&pdeo->lh); | ||
398 | spin_unlock(&pde->pde_unload_lock); | ||
399 | rv = pdeo->release(inode, file); | ||
400 | kfree(pdeo); | ||
401 | } else | ||
402 | spin_unlock(&pde->pde_unload_lock); | ||
403 | return rv; | ||
404 | } | ||
405 | pde->pde_users++; | ||
406 | release = pde->proc_fops->release; | ||
407 | if (pdeo) { | ||
408 | list_del(&pdeo->lh); | ||
409 | kfree(pdeo); | ||
410 | } | 344 | } |
411 | spin_unlock(&pde->pde_unload_lock); | 345 | spin_unlock(&pde->pde_unload_lock); |
412 | 346 | return 0; | |
413 | if (release) | ||
414 | rv = release(inode, file); | ||
415 | |||
416 | pde_users_dec(pde); | ||
417 | return rv; | ||
418 | } | 347 | } |
419 | 348 | ||
420 | static const struct file_operations proc_reg_file_ops = { | 349 | static const struct file_operations proc_reg_file_ops = { |
@@ -462,8 +391,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
462 | inode->i_size = de->size; | 391 | inode->i_size = de->size; |
463 | if (de->nlink) | 392 | if (de->nlink) |
464 | set_nlink(inode, de->nlink); | 393 | set_nlink(inode, de->nlink); |
465 | if (de->proc_iops) | 394 | WARN_ON(!de->proc_iops); |
466 | inode->i_op = de->proc_iops; | 395 | inode->i_op = de->proc_iops; |
467 | if (de->proc_fops) { | 396 | if (de->proc_fops) { |
468 | if (S_ISREG(inode->i_mode)) { | 397 | if (S_ISREG(inode->i_mode)) { |
469 | #ifdef CONFIG_COMPAT | 398 | #ifdef CONFIG_COMPAT |
@@ -506,5 +435,5 @@ int proc_fill_super(struct super_block *s) | |||
506 | return -ENOMEM; | 435 | return -ENOMEM; |
507 | } | 436 | } |
508 | 437 | ||
509 | return 0; | 438 | return proc_setup_self(s); |
510 | } | 439 | } |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 75710357a517..d600fb098b6a 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* internal.h: internal procfs definitions | 1 | /* Internal procfs definitions |
2 | * | 2 | * |
3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
@@ -9,62 +9,83 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | #include <linux/proc_ns.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/atomic.h> | ||
14 | #include <linux/binfmts.h> | 16 | #include <linux/binfmts.h> |
15 | struct ctl_table_header; | ||
16 | struct mempolicy; | ||
17 | 17 | ||
18 | extern struct proc_dir_entry proc_root; | 18 | struct ctl_table_header; |
19 | extern void proc_self_init(void); | 19 | struct mempolicy; |
20 | #ifdef CONFIG_PROC_SYSCTL | ||
21 | extern int proc_sys_init(void); | ||
22 | extern void sysctl_head_put(struct ctl_table_header *head); | ||
23 | #else | ||
24 | static inline void proc_sys_init(void) { } | ||
25 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
26 | #endif | ||
27 | #ifdef CONFIG_NET | ||
28 | extern int proc_net_init(void); | ||
29 | #else | ||
30 | static inline int proc_net_init(void) { return 0; } | ||
31 | #endif | ||
32 | 20 | ||
33 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 21 | /* |
34 | struct pid *pid, struct task_struct *task); | 22 | * This is not completely implemented yet. The idea is to |
35 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 23 | * create an in-memory tree (like the actual /proc filesystem |
36 | struct pid *pid, struct task_struct *task); | 24 | * tree) of these proc_dir_entries, so that we can dynamically |
37 | extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 25 | * add new files to /proc. |
38 | struct pid *pid, struct task_struct *task); | 26 | * |
39 | extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | 27 | * The "next" pointer creates a linked list of one /proc directory, |
40 | struct pid *pid, struct task_struct *task); | 28 | * while parent/subdir create the directory structure (every |
41 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | 29 | * /proc file has a parent, but "subdir" is NULL for all |
30 | * non-directory entries). | ||
31 | */ | ||
32 | struct proc_dir_entry { | ||
33 | unsigned int low_ino; | ||
34 | umode_t mode; | ||
35 | nlink_t nlink; | ||
36 | kuid_t uid; | ||
37 | kgid_t gid; | ||
38 | loff_t size; | ||
39 | const struct inode_operations *proc_iops; | ||
40 | const struct file_operations *proc_fops; | ||
41 | struct proc_dir_entry *next, *parent, *subdir; | ||
42 | void *data; | ||
43 | atomic_t count; /* use count */ | ||
44 | atomic_t in_use; /* number of callers into module in progress; */ | ||
45 | /* negative -> it's going away RSN */ | ||
46 | struct completion *pde_unload_completion; | ||
47 | struct list_head pde_openers; /* who did ->open, but not ->release */ | ||
48 | spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ | ||
49 | u8 namelen; | ||
50 | char name[]; | ||
51 | }; | ||
42 | 52 | ||
43 | extern const struct file_operations proc_tid_children_operations; | 53 | union proc_op { |
44 | extern const struct file_operations proc_pid_maps_operations; | 54 | int (*proc_get_link)(struct dentry *, struct path *); |
45 | extern const struct file_operations proc_tid_maps_operations; | 55 | int (*proc_read)(struct task_struct *task, char *page); |
46 | extern const struct file_operations proc_pid_numa_maps_operations; | 56 | int (*proc_show)(struct seq_file *m, |
47 | extern const struct file_operations proc_tid_numa_maps_operations; | 57 | struct pid_namespace *ns, struct pid *pid, |
48 | extern const struct file_operations proc_pid_smaps_operations; | 58 | struct task_struct *task); |
49 | extern const struct file_operations proc_tid_smaps_operations; | 59 | }; |
50 | extern const struct file_operations proc_clear_refs_operations; | ||
51 | extern const struct file_operations proc_pagemap_operations; | ||
52 | extern const struct file_operations proc_net_operations; | ||
53 | extern const struct inode_operations proc_net_inode_operations; | ||
54 | extern const struct inode_operations proc_pid_link_inode_operations; | ||
55 | 60 | ||
56 | struct proc_maps_private { | 61 | struct proc_inode { |
57 | struct pid *pid; | 62 | struct pid *pid; |
58 | struct task_struct *task; | 63 | int fd; |
59 | #ifdef CONFIG_MMU | 64 | union proc_op op; |
60 | struct vm_area_struct *tail_vma; | 65 | struct proc_dir_entry *pde; |
61 | #endif | 66 | struct ctl_table_header *sysctl; |
62 | #ifdef CONFIG_NUMA | 67 | struct ctl_table *sysctl_entry; |
63 | struct mempolicy *task_mempolicy; | 68 | struct proc_ns ns; |
64 | #endif | 69 | struct inode vfs_inode; |
65 | }; | 70 | }; |
66 | 71 | ||
67 | void proc_init_inodecache(void); | 72 | /* |
73 | * General functions | ||
74 | */ | ||
75 | static inline struct proc_inode *PROC_I(const struct inode *inode) | ||
76 | { | ||
77 | return container_of(inode, struct proc_inode, vfs_inode); | ||
78 | } | ||
79 | |||
80 | static inline struct proc_dir_entry *PDE(const struct inode *inode) | ||
81 | { | ||
82 | return PROC_I(inode)->pde; | ||
83 | } | ||
84 | |||
85 | static inline void *__PDE_DATA(const struct inode *inode) | ||
86 | { | ||
87 | return PDE(inode)->data; | ||
88 | } | ||
68 | 89 | ||
69 | static inline struct pid *proc_pid(struct inode *inode) | 90 | static inline struct pid *proc_pid(struct inode *inode) |
70 | { | 91 | { |
@@ -76,11 +97,6 @@ static inline struct task_struct *get_proc_task(struct inode *inode) | |||
76 | return get_pid_task(proc_pid(inode), PIDTYPE_PID); | 97 | return get_pid_task(proc_pid(inode), PIDTYPE_PID); |
77 | } | 98 | } |
78 | 99 | ||
79 | static inline int proc_fd(struct inode *inode) | ||
80 | { | ||
81 | return PROC_I(inode)->fd; | ||
82 | } | ||
83 | |||
84 | static inline int task_dumpable(struct task_struct *task) | 100 | static inline int task_dumpable(struct task_struct *task) |
85 | { | 101 | { |
86 | int dumpable = 0; | 102 | int dumpable = 0; |
@@ -96,15 +112,6 @@ static inline int task_dumpable(struct task_struct *task) | |||
96 | return 0; | 112 | return 0; |
97 | } | 113 | } |
98 | 114 | ||
99 | static inline int pid_delete_dentry(const struct dentry * dentry) | ||
100 | { | ||
101 | /* Is the task we represent dead? | ||
102 | * If so, then don't put the dentry on the lru list, | ||
103 | * kill it immediately. | ||
104 | */ | ||
105 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
106 | } | ||
107 | |||
108 | static inline unsigned name_to_int(struct dentry *dentry) | 115 | static inline unsigned name_to_int(struct dentry *dentry) |
109 | { | 116 | { |
110 | const char *name = dentry->d_name.name; | 117 | const char *name = dentry->d_name.name; |
@@ -127,63 +134,165 @@ out: | |||
127 | return ~0U; | 134 | return ~0U; |
128 | } | 135 | } |
129 | 136 | ||
130 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, | 137 | /* |
131 | struct dentry *dentry); | 138 | * Offset of the first process in the /proc root directory.. |
132 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 139 | */ |
133 | filldir_t filldir); | 140 | #define FIRST_PROCESS_ENTRY 256 |
141 | |||
142 | /* Worst case buffer size needed for holding an integer. */ | ||
143 | #define PROC_NUMBUF 13 | ||
134 | 144 | ||
135 | struct pde_opener { | 145 | /* |
136 | struct inode *inode; | 146 | * array.c |
137 | struct file *file; | 147 | */ |
138 | int (*release)(struct inode *, struct file *); | 148 | extern const struct file_operations proc_tid_children_operations; |
139 | struct list_head lh; | ||
140 | }; | ||
141 | void pde_users_dec(struct proc_dir_entry *pde); | ||
142 | 149 | ||
150 | extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, | ||
151 | struct pid *, struct task_struct *); | ||
152 | extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, | ||
153 | struct pid *, struct task_struct *); | ||
154 | extern int proc_pid_status(struct seq_file *, struct pid_namespace *, | ||
155 | struct pid *, struct task_struct *); | ||
156 | extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, | ||
157 | struct pid *, struct task_struct *); | ||
158 | |||
159 | /* | ||
160 | * base.c | ||
161 | */ | ||
162 | extern const struct dentry_operations pid_dentry_operations; | ||
163 | extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *); | ||
164 | extern int proc_setattr(struct dentry *, struct iattr *); | ||
165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); | ||
166 | extern int pid_revalidate(struct dentry *, unsigned int); | ||
167 | extern int pid_delete_dentry(const struct dentry *); | ||
168 | extern int proc_pid_readdir(struct file *, void *, filldir_t); | ||
169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); | ||
170 | extern loff_t mem_lseek(struct file *, loff_t, int); | ||
171 | |||
172 | /* Lookups */ | ||
173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | ||
174 | struct task_struct *, const void *); | ||
175 | extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int, | ||
176 | instantiate_t, struct task_struct *, const void *); | ||
177 | |||
178 | /* | ||
179 | * generic.c | ||
180 | */ | ||
143 | extern spinlock_t proc_subdir_lock; | 181 | extern spinlock_t proc_subdir_lock; |
144 | 182 | ||
145 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); | 183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); |
146 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | 184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, |
147 | unsigned long task_vsize(struct mm_struct *); | 185 | struct dentry *); |
148 | unsigned long task_statm(struct mm_struct *, | 186 | extern int proc_readdir(struct file *, void *, filldir_t); |
149 | unsigned long *, unsigned long *, unsigned long *, unsigned long *); | 187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t); |
150 | void task_mem(struct seq_file *, struct mm_struct *); | ||
151 | 188 | ||
152 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | 189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) |
153 | { | 190 | { |
154 | atomic_inc(&pde->count); | 191 | atomic_inc(&pde->count); |
155 | return pde; | 192 | return pde; |
156 | } | 193 | } |
157 | void pde_put(struct proc_dir_entry *pde); | 194 | extern void pde_put(struct proc_dir_entry *); |
158 | |||
159 | int proc_fill_super(struct super_block *); | ||
160 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | ||
161 | int proc_remount(struct super_block *sb, int *flags, char *data); | ||
162 | 195 | ||
163 | /* | 196 | /* |
164 | * These are generic /proc routines that use the internal | 197 | * inode.c |
165 | * "struct proc_dir_entry" tree to traverse the filesystem. | ||
166 | * | ||
167 | * The /proc root directory has extended versions to take care | ||
168 | * of the /proc/<pid> subdirectories. | ||
169 | */ | 198 | */ |
170 | int proc_readdir(struct file *, void *, filldir_t); | 199 | struct pde_opener { |
171 | struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); | 200 | struct file *file; |
201 | struct list_head lh; | ||
202 | int closing; | ||
203 | struct completion *c; | ||
204 | }; | ||
172 | 205 | ||
206 | extern const struct inode_operations proc_pid_link_inode_operations; | ||
173 | 207 | ||
208 | extern void proc_init_inodecache(void); | ||
209 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | ||
210 | extern int proc_fill_super(struct super_block *); | ||
211 | extern void proc_entry_rundown(struct proc_dir_entry *); | ||
174 | 212 | ||
175 | /* Lookups */ | 213 | /* |
176 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | 214 | * proc_devtree.c |
177 | struct task_struct *, const void *); | 215 | */ |
178 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 216 | #ifdef CONFIG_PROC_DEVICETREE |
179 | const char *name, int len, | 217 | extern void proc_device_tree_init(void); |
180 | instantiate_t instantiate, struct task_struct *task, const void *ptr); | 218 | #endif |
181 | int pid_revalidate(struct dentry *dentry, unsigned int flags); | ||
182 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); | ||
183 | extern const struct dentry_operations pid_dentry_operations; | ||
184 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
185 | int proc_setattr(struct dentry *dentry, struct iattr *attr); | ||
186 | 219 | ||
220 | /* | ||
221 | * proc_namespaces.c | ||
222 | */ | ||
187 | extern const struct inode_operations proc_ns_dir_inode_operations; | 223 | extern const struct inode_operations proc_ns_dir_inode_operations; |
188 | extern const struct file_operations proc_ns_dir_operations; | 224 | extern const struct file_operations proc_ns_dir_operations; |
189 | 225 | ||
226 | /* | ||
227 | * proc_net.c | ||
228 | */ | ||
229 | extern const struct file_operations proc_net_operations; | ||
230 | extern const struct inode_operations proc_net_inode_operations; | ||
231 | |||
232 | #ifdef CONFIG_NET | ||
233 | extern int proc_net_init(void); | ||
234 | #else | ||
235 | static inline int proc_net_init(void) { return 0; } | ||
236 | #endif | ||
237 | |||
238 | /* | ||
239 | * proc_self.c | ||
240 | */ | ||
241 | extern int proc_setup_self(struct super_block *); | ||
242 | |||
243 | /* | ||
244 | * proc_sysctl.c | ||
245 | */ | ||
246 | #ifdef CONFIG_PROC_SYSCTL | ||
247 | extern int proc_sys_init(void); | ||
248 | extern void sysctl_head_put(struct ctl_table_header *); | ||
249 | #else | ||
250 | static inline void proc_sys_init(void) { } | ||
251 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
252 | #endif | ||
253 | |||
254 | /* | ||
255 | * proc_tty.c | ||
256 | */ | ||
257 | #ifdef CONFIG_TTY | ||
258 | extern void proc_tty_init(void); | ||
259 | #else | ||
260 | static inline void proc_tty_init(void) {} | ||
261 | #endif | ||
262 | |||
263 | /* | ||
264 | * root.c | ||
265 | */ | ||
266 | extern struct proc_dir_entry proc_root; | ||
267 | |||
268 | extern void proc_self_init(void); | ||
269 | extern int proc_remount(struct super_block *, int *, char *); | ||
270 | |||
271 | /* | ||
272 | * task_[no]mmu.c | ||
273 | */ | ||
274 | struct proc_maps_private { | ||
275 | struct pid *pid; | ||
276 | struct task_struct *task; | ||
277 | #ifdef CONFIG_MMU | ||
278 | struct vm_area_struct *tail_vma; | ||
279 | #endif | ||
280 | #ifdef CONFIG_NUMA | ||
281 | struct mempolicy *task_mempolicy; | ||
282 | #endif | ||
283 | }; | ||
284 | |||
285 | extern const struct file_operations proc_pid_maps_operations; | ||
286 | extern const struct file_operations proc_tid_maps_operations; | ||
287 | extern const struct file_operations proc_pid_numa_maps_operations; | ||
288 | extern const struct file_operations proc_tid_numa_maps_operations; | ||
289 | extern const struct file_operations proc_pid_smaps_operations; | ||
290 | extern const struct file_operations proc_tid_smaps_operations; | ||
291 | extern const struct file_operations proc_clear_refs_operations; | ||
292 | extern const struct file_operations proc_pagemap_operations; | ||
293 | |||
294 | extern unsigned long task_vsize(struct mm_struct *); | ||
295 | extern unsigned long task_statm(struct mm_struct *, | ||
296 | unsigned long *, unsigned long *, | ||
297 | unsigned long *, unsigned long *); | ||
298 | extern void task_mem(struct seq_file *, struct mm_struct *); | ||
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index f6a13f489e30..0a22194e5d58 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
14 | #include <linux/kcore.h> | ||
14 | #include <linux/user.h> | 15 | #include <linux/user.h> |
15 | #include <linux/capability.h> | 16 | #include <linux/capability.h> |
16 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
@@ -28,6 +29,7 @@ | |||
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/memory.h> | 30 | #include <linux/memory.h> |
30 | #include <asm/sections.h> | 31 | #include <asm/sections.h> |
32 | #include "internal.h" | ||
31 | 33 | ||
32 | #define CORE_STR "CORE" | 34 | #define CORE_STR "CORE" |
33 | 35 | ||
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 66b51c0383da..54bdc6701e9f 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -51,7 +51,7 @@ static int ns_delete_dentry(const struct dentry *dentry) | |||
51 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | 51 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) |
52 | { | 52 | { |
53 | struct inode *inode = dentry->d_inode; | 53 | struct inode *inode = dentry->d_inode; |
54 | const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; | 54 | const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; |
55 | 55 | ||
56 | return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", | 56 | return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", |
57 | ns_ops->name, inode->i_ino); | 57 | ns_ops->name, inode->i_ino); |
@@ -95,8 +95,8 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, | |||
95 | inode->i_op = &ns_inode_operations; | 95 | inode->i_op = &ns_inode_operations; |
96 | inode->i_mode = S_IFREG | S_IRUGO; | 96 | inode->i_mode = S_IFREG | S_IRUGO; |
97 | inode->i_fop = &ns_file_operations; | 97 | inode->i_fop = &ns_file_operations; |
98 | ei->ns_ops = ns_ops; | 98 | ei->ns.ns_ops = ns_ops; |
99 | ei->ns = ns; | 99 | ei->ns.ns = ns; |
100 | unlock_new_inode(inode); | 100 | unlock_new_inode(inode); |
101 | } else { | 101 | } else { |
102 | ns_ops->put(ns); | 102 | ns_ops->put(ns); |
@@ -128,7 +128,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
128 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 128 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
129 | goto out_put_task; | 129 | goto out_put_task; |
130 | 130 | ||
131 | ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); | 131 | ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); |
132 | if (IS_ERR(ns_path.dentry)) { | 132 | if (IS_ERR(ns_path.dentry)) { |
133 | error = ERR_CAST(ns_path.dentry); | 133 | error = ERR_CAST(ns_path.dentry); |
134 | goto out_put_task; | 134 | goto out_put_task; |
@@ -148,7 +148,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl | |||
148 | { | 148 | { |
149 | struct inode *inode = dentry->d_inode; | 149 | struct inode *inode = dentry->d_inode; |
150 | struct proc_inode *ei = PROC_I(inode); | 150 | struct proc_inode *ei = PROC_I(inode); |
151 | const struct proc_ns_operations *ns_ops = ei->ns_ops; | 151 | const struct proc_ns_operations *ns_ops = ei->ns.ns_ops; |
152 | struct task_struct *task; | 152 | struct task_struct *task; |
153 | void *ns; | 153 | void *ns; |
154 | char name[50]; | 154 | char name[50]; |
@@ -202,7 +202,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
202 | ei = PROC_I(inode); | 202 | ei = PROC_I(inode); |
203 | inode->i_mode = S_IFLNK|S_IRWXUGO; | 203 | inode->i_mode = S_IFLNK|S_IRWXUGO; |
204 | inode->i_op = &proc_ns_link_inode_operations; | 204 | inode->i_op = &proc_ns_link_inode_operations; |
205 | ei->ns_ops = ns_ops; | 205 | ei->ns.ns_ops = ns_ops; |
206 | 206 | ||
207 | d_set_d_op(dentry, &pid_dentry_operations); | 207 | d_set_d_op(dentry, &pid_dentry_operations); |
208 | d_add(dentry, inode); | 208 | d_add(dentry, inode); |
@@ -337,6 +337,11 @@ out_invalid: | |||
337 | return ERR_PTR(-EINVAL); | 337 | return ERR_PTR(-EINVAL); |
338 | } | 338 | } |
339 | 339 | ||
340 | struct proc_ns *get_proc_ns(struct inode *inode) | ||
341 | { | ||
342 | return &PROC_I(inode)->ns; | ||
343 | } | ||
344 | |||
340 | bool proc_ns_inode(struct inode *inode) | 345 | bool proc_ns_inode(struct inode *inode) |
341 | { | 346 | { |
342 | return inode->i_fop == &ns_file_operations; | 347 | return inode->i_fop == &ns_file_operations; |
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 30b590f5bd35..505afc950e0a 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c | |||
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v) | |||
41 | 41 | ||
42 | static int property_proc_open(struct inode *inode, struct file *file) | 42 | static int property_proc_open(struct inode *inode, struct file *file) |
43 | { | 43 | { |
44 | return single_open(file, property_proc_show, PDE(inode)->data); | 44 | return single_open(file, property_proc_show, __PDE_DATA(inode)); |
45 | } | 45 | } |
46 | 46 | ||
47 | static const struct file_operations property_proc_fops = { | 47 | static const struct file_operations property_proc_fops = { |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index b4ac6572474f..986e83220d56 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -26,6 +26,10 @@ | |||
26 | 26 | ||
27 | #include "internal.h" | 27 | #include "internal.h" |
28 | 28 | ||
29 | static inline struct net *PDE_NET(struct proc_dir_entry *pde) | ||
30 | { | ||
31 | return pde->parent->data; | ||
32 | } | ||
29 | 33 | ||
30 | static struct net *get_proc_net(const struct inode *inode) | 34 | static struct net *get_proc_net(const struct inode *inode) |
31 | { | 35 | { |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 9c7fab1d23f0..41a6ea93f486 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -141,6 +141,8 @@ static void proc_kill_sb(struct super_block *sb) | |||
141 | struct pid_namespace *ns; | 141 | struct pid_namespace *ns; |
142 | 142 | ||
143 | ns = (struct pid_namespace *)sb->s_fs_info; | 143 | ns = (struct pid_namespace *)sb->s_fs_info; |
144 | if (ns->proc_self) | ||
145 | dput(ns->proc_self); | ||
144 | kill_anon_super(sb); | 146 | kill_anon_super(sb); |
145 | put_pid_ns(ns); | 147 | put_pid_ns(ns); |
146 | } | 148 | } |
diff --git a/fs/proc/self.c b/fs/proc/self.c index aa5cc3bff140..6b6a993b5c25 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c | |||
@@ -1,6 +1,8 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/sched.h> | 1 | #include <linux/sched.h> |
3 | #include <linux/namei.h> | 2 | #include <linux/namei.h> |
3 | #include <linux/slab.h> | ||
4 | #include <linux/pid_namespace.h> | ||
5 | #include "internal.h" | ||
4 | 6 | ||
5 | /* | 7 | /* |
6 | * /proc/self: | 8 | * /proc/self: |
@@ -48,12 +50,43 @@ static const struct inode_operations proc_self_inode_operations = { | |||
48 | .put_link = proc_self_put_link, | 50 | .put_link = proc_self_put_link, |
49 | }; | 51 | }; |
50 | 52 | ||
51 | void __init proc_self_init(void) | 53 | static unsigned self_inum; |
54 | |||
55 | int proc_setup_self(struct super_block *s) | ||
52 | { | 56 | { |
53 | struct proc_dir_entry *proc_self_symlink; | 57 | struct inode *root_inode = s->s_root->d_inode; |
54 | mode_t mode; | 58 | struct pid_namespace *ns = s->s_fs_info; |
59 | struct dentry *self; | ||
60 | |||
61 | mutex_lock(&root_inode->i_mutex); | ||
62 | self = d_alloc_name(s->s_root, "self"); | ||
63 | if (self) { | ||
64 | struct inode *inode = new_inode_pseudo(s); | ||
65 | if (inode) { | ||
66 | inode->i_ino = self_inum; | ||
67 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
68 | inode->i_mode = S_IFLNK | S_IRWXUGO; | ||
69 | inode->i_uid = GLOBAL_ROOT_UID; | ||
70 | inode->i_gid = GLOBAL_ROOT_GID; | ||
71 | inode->i_op = &proc_self_inode_operations; | ||
72 | d_add(self, inode); | ||
73 | } else { | ||
74 | dput(self); | ||
75 | self = ERR_PTR(-ENOMEM); | ||
76 | } | ||
77 | } else { | ||
78 | self = ERR_PTR(-ENOMEM); | ||
79 | } | ||
80 | mutex_unlock(&root_inode->i_mutex); | ||
81 | if (IS_ERR(self)) { | ||
82 | pr_err("proc_fill_super: can't allocate /proc/self\n"); | ||
83 | return PTR_ERR(self); | ||
84 | } | ||
85 | ns->proc_self = self; | ||
86 | return 0; | ||
87 | } | ||
55 | 88 | ||
56 | mode = S_IFLNK | S_IRWXUGO; | 89 | void __init proc_self_init(void) |
57 | proc_self_symlink = proc_create("self", mode, NULL, NULL ); | 90 | { |
58 | proc_self_symlink->proc_iops = &proc_self_inode_operations; | 91 | proc_alloc_inum(&self_inum); |
59 | } | 92 | } |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index b870f740ab5a..17f7e080d7ff 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -8,7 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/proc_fs.h> | 11 | #include <linux/kcore.h> |
12 | #include <linux/user.h> | 12 | #include <linux/user.h> |
13 | #include <linux/elf.h> | 13 | #include <linux/elf.h> |
14 | #include <linux/elfcore.h> | 14 | #include <linux/elfcore.h> |
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
24 | #include <asm/io.h> | 24 | #include <asm/io.h> |
25 | #include "internal.h" | ||
25 | 26 | ||
26 | /* List representing chunks of contiguous memory areas and their offsets in | 27 | /* List representing chunks of contiguous memory areas and their offsets in |
27 | * vmcore file. | 28 | * vmcore file. |
@@ -698,7 +699,7 @@ void vmcore_cleanup(void) | |||
698 | struct list_head *pos, *next; | 699 | struct list_head *pos, *next; |
699 | 700 | ||
700 | if (proc_vmcore) { | 701 | if (proc_vmcore) { |
701 | remove_proc_entry(proc_vmcore->name, proc_vmcore->parent); | 702 | proc_remove(proc_vmcore); |
702 | proc_vmcore = NULL; | 703 | proc_vmcore = NULL; |
703 | } | 704 | } |
704 | 705 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 8274a794253b..605dbbcb1973 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -459,6 +459,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
459 | ret = rw_verify_area(WRITE, file, pos, count); | 459 | ret = rw_verify_area(WRITE, file, pos, count); |
460 | if (ret >= 0) { | 460 | if (ret >= 0) { |
461 | count = ret; | 461 | count = ret; |
462 | file_start_write(file); | ||
462 | if (file->f_op->write) | 463 | if (file->f_op->write) |
463 | ret = file->f_op->write(file, buf, count, pos); | 464 | ret = file->f_op->write(file, buf, count, pos); |
464 | else | 465 | else |
@@ -468,6 +469,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
468 | add_wchar(current, ret); | 469 | add_wchar(current, ret); |
469 | } | 470 | } |
470 | inc_syscw(current); | 471 | inc_syscw(current); |
472 | file_end_write(file); | ||
471 | } | 473 | } |
472 | 474 | ||
473 | return ret; | 475 | return ret; |
@@ -576,7 +578,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
576 | } | 578 | } |
577 | EXPORT_SYMBOL(iov_shorten); | 579 | EXPORT_SYMBOL(iov_shorten); |
578 | 580 | ||
579 | ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | 581 | static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, |
580 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) | 582 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) |
581 | { | 583 | { |
582 | struct kiocb kiocb; | 584 | struct kiocb kiocb; |
@@ -601,7 +603,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | |||
601 | } | 603 | } |
602 | 604 | ||
603 | /* Do it by hand, with file-ops */ | 605 | /* Do it by hand, with file-ops */ |
604 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | 606 | static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, |
605 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn) | 607 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn) |
606 | { | 608 | { |
607 | struct iovec *vector = iov; | 609 | struct iovec *vector = iov; |
@@ -743,6 +745,7 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
743 | } else { | 745 | } else { |
744 | fn = (io_fn_t)file->f_op->write; | 746 | fn = (io_fn_t)file->f_op->write; |
745 | fnv = file->f_op->aio_write; | 747 | fnv = file->f_op->aio_write; |
748 | file_start_write(file); | ||
746 | } | 749 | } |
747 | 750 | ||
748 | if (fnv) | 751 | if (fnv) |
@@ -751,6 +754,9 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
751 | else | 754 | else |
752 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | 755 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); |
753 | 756 | ||
757 | if (type != READ) | ||
758 | file_end_write(file); | ||
759 | |||
754 | out: | 760 | out: |
755 | if (iov != iovstack) | 761 | if (iov != iovstack) |
756 | kfree(iov); | 762 | kfree(iov); |
@@ -881,6 +887,201 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
881 | return ret; | 887 | return ret; |
882 | } | 888 | } |
883 | 889 | ||
890 | #ifdef CONFIG_COMPAT | ||
891 | |||
892 | static ssize_t compat_do_readv_writev(int type, struct file *file, | ||
893 | const struct compat_iovec __user *uvector, | ||
894 | unsigned long nr_segs, loff_t *pos) | ||
895 | { | ||
896 | compat_ssize_t tot_len; | ||
897 | struct iovec iovstack[UIO_FASTIOV]; | ||
898 | struct iovec *iov = iovstack; | ||
899 | ssize_t ret; | ||
900 | io_fn_t fn; | ||
901 | iov_fn_t fnv; | ||
902 | |||
903 | ret = -EINVAL; | ||
904 | if (!file->f_op) | ||
905 | goto out; | ||
906 | |||
907 | ret = -EFAULT; | ||
908 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) | ||
909 | goto out; | ||
910 | |||
911 | ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, | ||
912 | UIO_FASTIOV, iovstack, &iov); | ||
913 | if (ret <= 0) | ||
914 | goto out; | ||
915 | |||
916 | tot_len = ret; | ||
917 | ret = rw_verify_area(type, file, pos, tot_len); | ||
918 | if (ret < 0) | ||
919 | goto out; | ||
920 | |||
921 | fnv = NULL; | ||
922 | if (type == READ) { | ||
923 | fn = file->f_op->read; | ||
924 | fnv = file->f_op->aio_read; | ||
925 | } else { | ||
926 | fn = (io_fn_t)file->f_op->write; | ||
927 | fnv = file->f_op->aio_write; | ||
928 | file_start_write(file); | ||
929 | } | ||
930 | |||
931 | if (fnv) | ||
932 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, | ||
933 | pos, fnv); | ||
934 | else | ||
935 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | ||
936 | |||
937 | if (type != READ) | ||
938 | file_end_write(file); | ||
939 | |||
940 | out: | ||
941 | if (iov != iovstack) | ||
942 | kfree(iov); | ||
943 | if ((ret + (type == READ)) > 0) { | ||
944 | if (type == READ) | ||
945 | fsnotify_access(file); | ||
946 | else | ||
947 | fsnotify_modify(file); | ||
948 | } | ||
949 | return ret; | ||
950 | } | ||
951 | |||
952 | static size_t compat_readv(struct file *file, | ||
953 | const struct compat_iovec __user *vec, | ||
954 | unsigned long vlen, loff_t *pos) | ||
955 | { | ||
956 | ssize_t ret = -EBADF; | ||
957 | |||
958 | if (!(file->f_mode & FMODE_READ)) | ||
959 | goto out; | ||
960 | |||
961 | ret = -EINVAL; | ||
962 | if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) | ||
963 | goto out; | ||
964 | |||
965 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); | ||
966 | |||
967 | out: | ||
968 | if (ret > 0) | ||
969 | add_rchar(current, ret); | ||
970 | inc_syscr(current); | ||
971 | return ret; | ||
972 | } | ||
973 | |||
974 | COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, | ||
975 | const struct compat_iovec __user *,vec, | ||
976 | unsigned long, vlen) | ||
977 | { | ||
978 | struct fd f = fdget(fd); | ||
979 | ssize_t ret; | ||
980 | loff_t pos; | ||
981 | |||
982 | if (!f.file) | ||
983 | return -EBADF; | ||
984 | pos = f.file->f_pos; | ||
985 | ret = compat_readv(f.file, vec, vlen, &pos); | ||
986 | f.file->f_pos = pos; | ||
987 | fdput(f); | ||
988 | return ret; | ||
989 | } | ||
990 | |||
991 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | ||
992 | const struct compat_iovec __user *,vec, | ||
993 | unsigned long, vlen, loff_t, pos) | ||
994 | { | ||
995 | struct fd f; | ||
996 | ssize_t ret; | ||
997 | |||
998 | if (pos < 0) | ||
999 | return -EINVAL; | ||
1000 | f = fdget(fd); | ||
1001 | if (!f.file) | ||
1002 | return -EBADF; | ||
1003 | ret = -ESPIPE; | ||
1004 | if (f.file->f_mode & FMODE_PREAD) | ||
1005 | ret = compat_readv(f.file, vec, vlen, &pos); | ||
1006 | fdput(f); | ||
1007 | return ret; | ||
1008 | } | ||
1009 | |||
1010 | COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd, | ||
1011 | const struct compat_iovec __user *,vec, | ||
1012 | unsigned long, vlen, u32, pos_low, u32, pos_high) | ||
1013 | { | ||
1014 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1015 | return compat_sys_preadv64(fd, vec, vlen, pos); | ||
1016 | } | ||
1017 | |||
1018 | static size_t compat_writev(struct file *file, | ||
1019 | const struct compat_iovec __user *vec, | ||
1020 | unsigned long vlen, loff_t *pos) | ||
1021 | { | ||
1022 | ssize_t ret = -EBADF; | ||
1023 | |||
1024 | if (!(file->f_mode & FMODE_WRITE)) | ||
1025 | goto out; | ||
1026 | |||
1027 | ret = -EINVAL; | ||
1028 | if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) | ||
1029 | goto out; | ||
1030 | |||
1031 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); | ||
1032 | |||
1033 | out: | ||
1034 | if (ret > 0) | ||
1035 | add_wchar(current, ret); | ||
1036 | inc_syscw(current); | ||
1037 | return ret; | ||
1038 | } | ||
1039 | |||
1040 | COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, | ||
1041 | const struct compat_iovec __user *, vec, | ||
1042 | unsigned long, vlen) | ||
1043 | { | ||
1044 | struct fd f = fdget(fd); | ||
1045 | ssize_t ret; | ||
1046 | loff_t pos; | ||
1047 | |||
1048 | if (!f.file) | ||
1049 | return -EBADF; | ||
1050 | pos = f.file->f_pos; | ||
1051 | ret = compat_writev(f.file, vec, vlen, &pos); | ||
1052 | f.file->f_pos = pos; | ||
1053 | fdput(f); | ||
1054 | return ret; | ||
1055 | } | ||
1056 | |||
1057 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | ||
1058 | const struct compat_iovec __user *,vec, | ||
1059 | unsigned long, vlen, loff_t, pos) | ||
1060 | { | ||
1061 | struct fd f; | ||
1062 | ssize_t ret; | ||
1063 | |||
1064 | if (pos < 0) | ||
1065 | return -EINVAL; | ||
1066 | f = fdget(fd); | ||
1067 | if (!f.file) | ||
1068 | return -EBADF; | ||
1069 | ret = -ESPIPE; | ||
1070 | if (f.file->f_mode & FMODE_PWRITE) | ||
1071 | ret = compat_writev(f.file, vec, vlen, &pos); | ||
1072 | fdput(f); | ||
1073 | return ret; | ||
1074 | } | ||
1075 | |||
1076 | COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd, | ||
1077 | const struct compat_iovec __user *,vec, | ||
1078 | unsigned long, vlen, u32, pos_low, u32, pos_high) | ||
1079 | { | ||
1080 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1081 | return compat_sys_pwritev64(fd, vec, vlen, pos); | ||
1082 | } | ||
1083 | #endif | ||
1084 | |||
884 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | 1085 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, |
885 | size_t count, loff_t max) | 1086 | size_t count, loff_t max) |
886 | { | 1087 | { |
diff --git a/fs/read_write.h b/fs/read_write.h index d07b954c6e0c..0ec530d9305b 100644 --- a/fs/read_write.h +++ b/fs/read_write.h | |||
@@ -7,8 +7,3 @@ | |||
7 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); | 7 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); |
8 | typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, | 8 | typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, |
9 | unsigned long, loff_t); | 9 | unsigned long, loff_t); |
10 | |||
11 | ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | ||
12 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); | ||
13 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | ||
14 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn); | ||
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6165bd4784f6..dcaafcfc23b0 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -234,68 +234,9 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, | |||
234 | return ret; | 234 | return ret; |
235 | } | 235 | } |
236 | 236 | ||
237 | /* Write @count bytes at position @ppos in a file indicated by @file | ||
238 | from the buffer @buf. | ||
239 | |||
240 | generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want | ||
241 | something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was | ||
242 | written for (ext2/3). This is for several reasons: | ||
243 | |||
244 | * It has no understanding of any filesystem specific optimizations. | ||
245 | |||
246 | * It enters the filesystem repeatedly for each page that is written. | ||
247 | |||
248 | * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key | ||
249 | * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time | ||
250 | * to reiserfs which allows for fewer tree traversals. | ||
251 | |||
252 | * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. | ||
253 | |||
254 | * Asking the block allocation code for blocks one at a time is slightly less efficient. | ||
255 | |||
256 | All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to | ||
257 | use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make | ||
258 | things right finally. | ||
259 | |||
260 | Future Features: providing search_by_key with hints. | ||
261 | |||
262 | */ | ||
263 | static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ | ||
264 | const char __user * buf, /* pointer to user supplied data | ||
265 | (in userspace) */ | ||
266 | size_t count, /* amount of bytes to write */ | ||
267 | loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to | ||
268 | * new current position before returning. */ | ||
269 | ) | ||
270 | { | ||
271 | struct inode *inode = file_inode(file); // Inode of the file that we are writing to. | ||
272 | /* To simplify coding at this time, we store | ||
273 | locked pages in array for now */ | ||
274 | struct reiserfs_transaction_handle th; | ||
275 | th.t_trans_id = 0; | ||
276 | |||
277 | /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items | ||
278 | * lying around (most of the disk, in fact). Despite the filesystem | ||
279 | * now being a v3.6 format, the old items still can't support large | ||
280 | * file sizes. Catch this case here, as the rest of the VFS layer is | ||
281 | * oblivious to the different limitations between old and new items. | ||
282 | * reiserfs_setattr catches this for truncates. This chunk is lifted | ||
283 | * from generic_write_checks. */ | ||
284 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && | ||
285 | *ppos + count > MAX_NON_LFS) { | ||
286 | if (*ppos >= MAX_NON_LFS) { | ||
287 | return -EFBIG; | ||
288 | } | ||
289 | if (count > MAX_NON_LFS - (unsigned long)*ppos) | ||
290 | count = MAX_NON_LFS - (unsigned long)*ppos; | ||
291 | } | ||
292 | |||
293 | return do_sync_write(file, buf, count, ppos); | ||
294 | } | ||
295 | |||
296 | const struct file_operations reiserfs_file_operations = { | 237 | const struct file_operations reiserfs_file_operations = { |
297 | .read = do_sync_read, | 238 | .read = do_sync_read, |
298 | .write = reiserfs_file_write, | 239 | .write = do_sync_write, |
299 | .unlocked_ioctl = reiserfs_ioctl, | 240 | .unlocked_ioctl = reiserfs_ioctl, |
300 | #ifdef CONFIG_COMPAT | 241 | #ifdef CONFIG_COMPAT |
301 | .compat_ioctl = reiserfs_compat_ioctl, | 242 | .compat_ioctl = reiserfs_compat_ioctl, |
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 9cc0740adffa..33532f79b4f7 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c | |||
@@ -394,20 +394,24 @@ static int set_sb(struct super_block *sb, void *data) | |||
394 | return -ENOENT; | 394 | return -ENOENT; |
395 | } | 395 | } |
396 | 396 | ||
397 | struct reiserfs_seq_private { | ||
398 | struct super_block *sb; | ||
399 | int (*show) (struct seq_file *, struct super_block *); | ||
400 | }; | ||
401 | |||
397 | static void *r_start(struct seq_file *m, loff_t * pos) | 402 | static void *r_start(struct seq_file *m, loff_t * pos) |
398 | { | 403 | { |
399 | struct proc_dir_entry *de = m->private; | 404 | struct reiserfs_seq_private *priv = m->private; |
400 | struct super_block *s = de->parent->data; | ||
401 | loff_t l = *pos; | 405 | loff_t l = *pos; |
402 | 406 | ||
403 | if (l) | 407 | if (l) |
404 | return NULL; | 408 | return NULL; |
405 | 409 | ||
406 | if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s))) | 410 | if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb))) |
407 | return NULL; | 411 | return NULL; |
408 | 412 | ||
409 | up_write(&s->s_umount); | 413 | up_write(&priv->sb->s_umount); |
410 | return s; | 414 | return priv->sb; |
411 | } | 415 | } |
412 | 416 | ||
413 | static void *r_next(struct seq_file *m, void *v, loff_t * pos) | 417 | static void *r_next(struct seq_file *m, void *v, loff_t * pos) |
@@ -426,9 +430,8 @@ static void r_stop(struct seq_file *m, void *v) | |||
426 | 430 | ||
427 | static int r_show(struct seq_file *m, void *v) | 431 | static int r_show(struct seq_file *m, void *v) |
428 | { | 432 | { |
429 | struct proc_dir_entry *de = m->private; | 433 | struct reiserfs_seq_private *priv = m->private; |
430 | int (*show) (struct seq_file *, struct super_block *) = de->data; | 434 | return priv->show(m, v); |
431 | return show(m, v); | ||
432 | } | 435 | } |
433 | 436 | ||
434 | static const struct seq_operations r_ops = { | 437 | static const struct seq_operations r_ops = { |
@@ -440,11 +443,15 @@ static const struct seq_operations r_ops = { | |||
440 | 443 | ||
441 | static int r_open(struct inode *inode, struct file *file) | 444 | static int r_open(struct inode *inode, struct file *file) |
442 | { | 445 | { |
443 | int ret = seq_open(file, &r_ops); | 446 | struct reiserfs_seq_private *priv; |
447 | int ret = seq_open_private(file, &r_ops, | ||
448 | sizeof(struct reiserfs_seq_private)); | ||
444 | 449 | ||
445 | if (!ret) { | 450 | if (!ret) { |
446 | struct seq_file *m = file->private_data; | 451 | struct seq_file *m = file->private_data; |
447 | m->private = PDE(inode); | 452 | priv = m->private; |
453 | priv->sb = proc_get_parent_data(inode); | ||
454 | priv->show = PDE_DATA(inode); | ||
448 | } | 455 | } |
449 | return ret; | 456 | return ret; |
450 | } | 457 | } |
@@ -453,7 +460,7 @@ static const struct file_operations r_file_operations = { | |||
453 | .open = r_open, | 460 | .open = r_open, |
454 | .read = seq_read, | 461 | .read = seq_read, |
455 | .llseek = seq_lseek, | 462 | .llseek = seq_lseek, |
456 | .release = seq_release, | 463 | .release = seq_release_private, |
457 | .owner = THIS_MODULE, | 464 | .owner = THIS_MODULE, |
458 | }; | 465 | }; |
459 | 466 | ||
@@ -479,9 +486,8 @@ int reiserfs_proc_info_init(struct super_block *sb) | |||
479 | *s = '!'; | 486 | *s = '!'; |
480 | 487 | ||
481 | spin_lock_init(&__PINFO(sb).lock); | 488 | spin_lock_init(&__PINFO(sb).lock); |
482 | REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root); | 489 | REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb); |
483 | if (REISERFS_SB(sb)->procdir) { | 490 | if (REISERFS_SB(sb)->procdir) { |
484 | REISERFS_SB(sb)->procdir->data = sb; | ||
485 | add_file(sb, "version", show_version); | 491 | add_file(sb, "version", show_version); |
486 | add_file(sb, "super", show_super); | 492 | add_file(sb, "super", show_super); |
487 | add_file(sb, "per-level", show_per_level); | 493 | add_file(sb, "per-level", show_per_level); |
@@ -499,29 +505,17 @@ int reiserfs_proc_info_init(struct super_block *sb) | |||
499 | int reiserfs_proc_info_done(struct super_block *sb) | 505 | int reiserfs_proc_info_done(struct super_block *sb) |
500 | { | 506 | { |
501 | struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; | 507 | struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; |
502 | char b[BDEVNAME_SIZE]; | 508 | if (de) { |
503 | char *s; | 509 | char b[BDEVNAME_SIZE]; |
510 | char *s; | ||
504 | 511 | ||
505 | /* Some block devices use /'s */ | 512 | /* Some block devices use /'s */ |
506 | strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); | 513 | strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); |
507 | s = strchr(b, '/'); | 514 | s = strchr(b, '/'); |
508 | if (s) | 515 | if (s) |
509 | *s = '!'; | 516 | *s = '!'; |
510 | 517 | ||
511 | if (de) { | 518 | remove_proc_subtree(b, proc_info_root); |
512 | remove_proc_entry("journal", de); | ||
513 | remove_proc_entry("oidmap", de); | ||
514 | remove_proc_entry("on-disk-super", de); | ||
515 | remove_proc_entry("bitmap", de); | ||
516 | remove_proc_entry("per-level", de); | ||
517 | remove_proc_entry("super", de); | ||
518 | remove_proc_entry("version", de); | ||
519 | } | ||
520 | spin_lock(&__PINFO(sb).lock); | ||
521 | __PINFO(sb).exiting = 1; | ||
522 | spin_unlock(&__PINFO(sb).lock); | ||
523 | if (proc_info_root) { | ||
524 | remove_proc_entry(b, proc_info_root); | ||
525 | REISERFS_SB(sb)->procdir = NULL; | 519 | REISERFS_SB(sb)->procdir = NULL; |
526 | } | 520 | } |
527 | return 0; | 521 | return 0; |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 38bb59f3f2ad..774c1eb7f1c9 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -599,6 +599,24 @@ int single_open(struct file *file, int (*show)(struct seq_file *, void *), | |||
599 | } | 599 | } |
600 | EXPORT_SYMBOL(single_open); | 600 | EXPORT_SYMBOL(single_open); |
601 | 601 | ||
602 | int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), | ||
603 | void *data, size_t size) | ||
604 | { | ||
605 | char *buf = kmalloc(size, GFP_KERNEL); | ||
606 | int ret; | ||
607 | if (!buf) | ||
608 | return -ENOMEM; | ||
609 | ret = single_open(file, show, data); | ||
610 | if (ret) { | ||
611 | kfree(buf); | ||
612 | return ret; | ||
613 | } | ||
614 | ((struct seq_file *)file->private_data)->buf = buf; | ||
615 | ((struct seq_file *)file->private_data)->size = size; | ||
616 | return 0; | ||
617 | } | ||
618 | EXPORT_SYMBOL(single_open_size); | ||
619 | |||
602 | int single_release(struct inode *inode, struct file *file) | 620 | int single_release(struct inode *inode, struct file *file) |
603 | { | 621 | { |
604 | const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; | 622 | const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; |
diff --git a/fs/splice.c b/fs/splice.c index 6b485b8753bd..e6b25598c8c4 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -219,7 +219,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
219 | page_nr++; | 219 | page_nr++; |
220 | ret += buf->len; | 220 | ret += buf->len; |
221 | 221 | ||
222 | if (pipe->inode) | 222 | if (pipe->files) |
223 | do_wakeup = 1; | 223 | do_wakeup = 1; |
224 | 224 | ||
225 | if (!--spd->nr_pages) | 225 | if (!--spd->nr_pages) |
@@ -829,7 +829,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |||
829 | ops->release(pipe, buf); | 829 | ops->release(pipe, buf); |
830 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | 830 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
831 | pipe->nrbufs--; | 831 | pipe->nrbufs--; |
832 | if (pipe->inode) | 832 | if (pipe->files) |
833 | sd->need_wakeup = true; | 833 | sd->need_wakeup = true; |
834 | } | 834 | } |
835 | 835 | ||
@@ -1001,8 +1001,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1001 | }; | 1001 | }; |
1002 | ssize_t ret; | 1002 | ssize_t ret; |
1003 | 1003 | ||
1004 | sb_start_write(inode->i_sb); | ||
1005 | |||
1006 | pipe_lock(pipe); | 1004 | pipe_lock(pipe); |
1007 | 1005 | ||
1008 | splice_from_pipe_begin(&sd); | 1006 | splice_from_pipe_begin(&sd); |
@@ -1038,7 +1036,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1038 | *ppos += ret; | 1036 | *ppos += ret; |
1039 | balance_dirty_pages_ratelimited(mapping); | 1037 | balance_dirty_pages_ratelimited(mapping); |
1040 | } | 1038 | } |
1041 | sb_end_write(inode->i_sb); | ||
1042 | 1039 | ||
1043 | return ret; | 1040 | return ret; |
1044 | } | 1041 | } |
@@ -1118,7 +1115,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
1118 | else | 1115 | else |
1119 | splice_write = default_file_splice_write; | 1116 | splice_write = default_file_splice_write; |
1120 | 1117 | ||
1121 | return splice_write(pipe, out, ppos, len, flags); | 1118 | file_start_write(out); |
1119 | ret = splice_write(pipe, out, ppos, len, flags); | ||
1120 | file_end_write(out); | ||
1121 | return ret; | ||
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | /* | 1124 | /* |
@@ -1184,7 +1184,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
1184 | */ | 1184 | */ |
1185 | pipe = current->splice_pipe; | 1185 | pipe = current->splice_pipe; |
1186 | if (unlikely(!pipe)) { | 1186 | if (unlikely(!pipe)) { |
1187 | pipe = alloc_pipe_info(NULL); | 1187 | pipe = alloc_pipe_info(); |
1188 | if (!pipe) | 1188 | if (!pipe) |
1189 | return -ENOMEM; | 1189 | return -ENOMEM; |
1190 | 1190 | ||
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f03bf1a456fb..3800128d2171 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -775,8 +775,6 @@ xfs_file_aio_write( | |||
775 | if (ocount == 0) | 775 | if (ocount == 0) |
776 | return 0; | 776 | return 0; |
777 | 777 | ||
778 | sb_start_write(inode->i_sb); | ||
779 | |||
780 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 778 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
781 | ret = -EIO; | 779 | ret = -EIO; |
782 | goto out; | 780 | goto out; |
@@ -800,7 +798,6 @@ xfs_file_aio_write( | |||
800 | } | 798 | } |
801 | 799 | ||
802 | out: | 800 | out: |
803 | sb_end_write(inode->i_sb); | ||
804 | return ret; | 801 | return ret; |
805 | } | 802 | } |
806 | 803 | ||