diff options
43 files changed, 797 insertions, 450 deletions
diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae1f9bb..c350d01716bd 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c | |||
| @@ -675,8 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) | |||
| 675 | } | 675 | } |
| 676 | 676 | ||
| 677 | set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ | 677 | set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ |
| 678 | filp->private_data = tty; | 678 | |
| 679 | file_move(filp, &tty->tty_files); | 679 | tty_add_file(tty, filp); |
| 680 | 680 | ||
| 681 | retval = devpts_pty_new(inode, tty->link); | 681 | retval = devpts_pty_new(inode, tty->link); |
| 682 | if (retval) | 682 | if (retval) |
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42375a2..949067a0bd47 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c | |||
| @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ | |||
| 136 | DEFINE_MUTEX(tty_mutex); | 136 | DEFINE_MUTEX(tty_mutex); |
| 137 | EXPORT_SYMBOL(tty_mutex); | 137 | EXPORT_SYMBOL(tty_mutex); |
| 138 | 138 | ||
| 139 | /* Spinlock to protect the tty->tty_files list */ | ||
| 140 | DEFINE_SPINLOCK(tty_files_lock); | ||
| 141 | |||
| 139 | static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); | 142 | static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); |
| 140 | static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); | 143 | static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); |
| 141 | ssize_t redirected_tty_write(struct file *, const char __user *, | 144 | ssize_t redirected_tty_write(struct file *, const char __user *, |
| @@ -185,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) | |||
| 185 | kfree(tty); | 188 | kfree(tty); |
| 186 | } | 189 | } |
| 187 | 190 | ||
| 191 | static inline struct tty_struct *file_tty(struct file *file) | ||
| 192 | { | ||
| 193 | return ((struct tty_file_private *)file->private_data)->tty; | ||
| 194 | } | ||
| 195 | |||
| 196 | /* Associate a new file with the tty structure */ | ||
| 197 | void tty_add_file(struct tty_struct *tty, struct file *file) | ||
| 198 | { | ||
| 199 | struct tty_file_private *priv; | ||
| 200 | |||
| 201 | /* XXX: must implement proper error handling in callers */ | ||
| 202 | priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); | ||
| 203 | |||
| 204 | priv->tty = tty; | ||
| 205 | priv->file = file; | ||
| 206 | file->private_data = priv; | ||
| 207 | |||
| 208 | spin_lock(&tty_files_lock); | ||
| 209 | list_add(&priv->list, &tty->tty_files); | ||
| 210 | spin_unlock(&tty_files_lock); | ||
| 211 | } | ||
| 212 | |||
| 213 | /* Delete file from its tty */ | ||
| 214 | void tty_del_file(struct file *file) | ||
| 215 | { | ||
| 216 | struct tty_file_private *priv = file->private_data; | ||
| 217 | |||
| 218 | spin_lock(&tty_files_lock); | ||
| 219 | list_del(&priv->list); | ||
| 220 | spin_unlock(&tty_files_lock); | ||
| 221 | file->private_data = NULL; | ||
| 222 | kfree(priv); | ||
| 223 | } | ||
| 224 | |||
| 225 | |||
| 188 | #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) | 226 | #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) |
| 189 | 227 | ||
| 190 | /** | 228 | /** |
| @@ -235,11 +273,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) | |||
| 235 | struct list_head *p; | 273 | struct list_head *p; |
| 236 | int count = 0; | 274 | int count = 0; |
| 237 | 275 | ||
| 238 | file_list_lock(); | 276 | spin_lock(&tty_files_lock); |
| 239 | list_for_each(p, &tty->tty_files) { | 277 | list_for_each(p, &tty->tty_files) { |
| 240 | count++; | 278 | count++; |
| 241 | } | 279 | } |
| 242 | file_list_unlock(); | 280 | spin_unlock(&tty_files_lock); |
| 243 | if (tty->driver->type == TTY_DRIVER_TYPE_PTY && | 281 | if (tty->driver->type == TTY_DRIVER_TYPE_PTY && |
| 244 | tty->driver->subtype == PTY_TYPE_SLAVE && | 282 | tty->driver->subtype == PTY_TYPE_SLAVE && |
| 245 | tty->link && tty->link->count) | 283 | tty->link && tty->link->count) |
| @@ -497,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) | |||
| 497 | struct file *cons_filp = NULL; | 535 | struct file *cons_filp = NULL; |
| 498 | struct file *filp, *f = NULL; | 536 | struct file *filp, *f = NULL; |
| 499 | struct task_struct *p; | 537 | struct task_struct *p; |
| 538 | struct tty_file_private *priv; | ||
| 500 | int closecount = 0, n; | 539 | int closecount = 0, n; |
| 501 | unsigned long flags; | 540 | unsigned long flags; |
| 502 | int refs = 0; | 541 | int refs = 0; |
| @@ -506,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) | |||
| 506 | 545 | ||
| 507 | 546 | ||
| 508 | spin_lock(&redirect_lock); | 547 | spin_lock(&redirect_lock); |
| 509 | if (redirect && redirect->private_data == tty) { | 548 | if (redirect && file_tty(redirect) == tty) { |
| 510 | f = redirect; | 549 | f = redirect; |
| 511 | redirect = NULL; | 550 | redirect = NULL; |
| 512 | } | 551 | } |
| @@ -519,9 +558,10 @@ void __tty_hangup(struct tty_struct *tty) | |||
| 519 | workqueue with the lock held */ | 558 | workqueue with the lock held */ |
| 520 | check_tty_count(tty, "tty_hangup"); | 559 | check_tty_count(tty, "tty_hangup"); |
| 521 | 560 | ||
| 522 | file_list_lock(); | 561 | spin_lock(&tty_files_lock); |
| 523 | /* This breaks for file handles being sent over AF_UNIX sockets ? */ | 562 | /* This breaks for file handles being sent over AF_UNIX sockets ? */ |
| 524 | list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { | 563 | list_for_each_entry(priv, &tty->tty_files, list) { |
| 564 | filp = priv->file; | ||
| 525 | if (filp->f_op->write == redirected_tty_write) | 565 | if (filp->f_op->write == redirected_tty_write) |
| 526 | cons_filp = filp; | 566 | cons_filp = filp; |
| 527 | if (filp->f_op->write != tty_write) | 567 | if (filp->f_op->write != tty_write) |
| @@ -530,7 +570,7 @@ void __tty_hangup(struct tty_struct *tty) | |||
| 530 | __tty_fasync(-1, filp, 0); /* can't block */ | 570 | __tty_fasync(-1, filp, 0); /* can't block */ |
| 531 | filp->f_op = &hung_up_tty_fops; | 571 | filp->f_op = &hung_up_tty_fops; |
| 532 | } | 572 | } |
| 533 | file_list_unlock(); | 573 | spin_unlock(&tty_files_lock); |
| 534 | 574 | ||
| 535 | tty_ldisc_hangup(tty); | 575 | tty_ldisc_hangup(tty); |
| 536 | 576 | ||
| @@ -889,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, | |||
| 889 | loff_t *ppos) | 929 | loff_t *ppos) |
| 890 | { | 930 | { |
| 891 | int i; | 931 | int i; |
| 892 | struct tty_struct *tty; | 932 | struct inode *inode = file->f_path.dentry->d_inode; |
| 893 | struct inode *inode; | 933 | struct tty_struct *tty = file_tty(file); |
| 894 | struct tty_ldisc *ld; | 934 | struct tty_ldisc *ld; |
| 895 | 935 | ||
| 896 | tty = file->private_data; | ||
| 897 | inode = file->f_path.dentry->d_inode; | ||
| 898 | if (tty_paranoia_check(tty, inode, "tty_read")) | 936 | if (tty_paranoia_check(tty, inode, "tty_read")) |
| 899 | return -EIO; | 937 | return -EIO; |
| 900 | if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) | 938 | if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) |
| @@ -1065,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) | |||
| 1065 | static ssize_t tty_write(struct file *file, const char __user *buf, | 1103 | static ssize_t tty_write(struct file *file, const char __user *buf, |
| 1066 | size_t count, loff_t *ppos) | 1104 | size_t count, loff_t *ppos) |
| 1067 | { | 1105 | { |
| 1068 | struct tty_struct *tty; | ||
| 1069 | struct inode *inode = file->f_path.dentry->d_inode; | 1106 | struct inode *inode = file->f_path.dentry->d_inode; |
| 1107 | struct tty_struct *tty = file_tty(file); | ||
| 1108 | struct tty_ldisc *ld; | ||
| 1070 | ssize_t ret; | 1109 | ssize_t ret; |
| 1071 | struct tty_ldisc *ld; | ||
| 1072 | 1110 | ||
| 1073 | tty = file->private_data; | ||
| 1074 | if (tty_paranoia_check(tty, inode, "tty_write")) | 1111 | if (tty_paranoia_check(tty, inode, "tty_write")) |
| 1075 | return -EIO; | 1112 | return -EIO; |
| 1076 | if (!tty || !tty->ops->write || | 1113 | if (!tty || !tty->ops->write || |
| @@ -1424,9 +1461,9 @@ static void release_one_tty(struct work_struct *work) | |||
| 1424 | tty_driver_kref_put(driver); | 1461 | tty_driver_kref_put(driver); |
| 1425 | module_put(driver->owner); | 1462 | module_put(driver->owner); |
| 1426 | 1463 | ||
| 1427 | file_list_lock(); | 1464 | spin_lock(&tty_files_lock); |
| 1428 | list_del_init(&tty->tty_files); | 1465 | list_del_init(&tty->tty_files); |
| 1429 | file_list_unlock(); | 1466 | spin_unlock(&tty_files_lock); |
| 1430 | 1467 | ||
| 1431 | put_pid(tty->pgrp); | 1468 | put_pid(tty->pgrp); |
| 1432 | put_pid(tty->session); | 1469 | put_pid(tty->session); |
| @@ -1507,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) | |||
| 1507 | 1544 | ||
| 1508 | int tty_release(struct inode *inode, struct file *filp) | 1545 | int tty_release(struct inode *inode, struct file *filp) |
| 1509 | { | 1546 | { |
| 1510 | struct tty_struct *tty, *o_tty; | 1547 | struct tty_struct *tty = file_tty(filp); |
| 1548 | struct tty_struct *o_tty; | ||
| 1511 | int pty_master, tty_closing, o_tty_closing, do_sleep; | 1549 | int pty_master, tty_closing, o_tty_closing, do_sleep; |
| 1512 | int devpts; | 1550 | int devpts; |
| 1513 | int idx; | 1551 | int idx; |
| 1514 | char buf[64]; | 1552 | char buf[64]; |
| 1515 | 1553 | ||
| 1516 | tty = filp->private_data; | ||
| 1517 | if (tty_paranoia_check(tty, inode, "tty_release_dev")) | 1554 | if (tty_paranoia_check(tty, inode, "tty_release_dev")) |
| 1518 | return 0; | 1555 | return 0; |
| 1519 | 1556 | ||
| @@ -1671,8 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) | |||
| 1671 | * - do_tty_hangup no longer sees this file descriptor as | 1708 | * - do_tty_hangup no longer sees this file descriptor as |
| 1672 | * something that needs to be handled for hangups. | 1709 | * something that needs to be handled for hangups. |
| 1673 | */ | 1710 | */ |
| 1674 | file_kill(filp); | 1711 | tty_del_file(filp); |
| 1675 | filp->private_data = NULL; | ||
| 1676 | 1712 | ||
| 1677 | /* | 1713 | /* |
| 1678 | * Perform some housekeeping before deciding whether to return. | 1714 | * Perform some housekeeping before deciding whether to return. |
| @@ -1839,8 +1875,8 @@ got_driver: | |||
| 1839 | return PTR_ERR(tty); | 1875 | return PTR_ERR(tty); |
| 1840 | } | 1876 | } |
| 1841 | 1877 | ||
| 1842 | filp->private_data = tty; | 1878 | tty_add_file(tty, filp); |
| 1843 | file_move(filp, &tty->tty_files); | 1879 | |
| 1844 | check_tty_count(tty, "tty_open"); | 1880 | check_tty_count(tty, "tty_open"); |
| 1845 | if (tty->driver->type == TTY_DRIVER_TYPE_PTY && | 1881 | if (tty->driver->type == TTY_DRIVER_TYPE_PTY && |
| 1846 | tty->driver->subtype == PTY_TYPE_MASTER) | 1882 | tty->driver->subtype == PTY_TYPE_MASTER) |
| @@ -1916,11 +1952,10 @@ got_driver: | |||
| 1916 | 1952 | ||
| 1917 | static unsigned int tty_poll(struct file *filp, poll_table *wait) | 1953 | static unsigned int tty_poll(struct file *filp, poll_table *wait) |
| 1918 | { | 1954 | { |
| 1919 | struct tty_struct *tty; | 1955 | struct tty_struct *tty = file_tty(filp); |
| 1920 | struct tty_ldisc *ld; | 1956 | struct tty_ldisc *ld; |
| 1921 | int ret = 0; | 1957 | int ret = 0; |
| 1922 | 1958 | ||
| 1923 | tty = filp->private_data; | ||
| 1924 | if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) | 1959 | if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) |
| 1925 | return 0; | 1960 | return 0; |
| 1926 | 1961 | ||
| @@ -1933,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) | |||
| 1933 | 1968 | ||
| 1934 | static int __tty_fasync(int fd, struct file *filp, int on) | 1969 | static int __tty_fasync(int fd, struct file *filp, int on) |
| 1935 | { | 1970 | { |
| 1936 | struct tty_struct *tty; | 1971 | struct tty_struct *tty = file_tty(filp); |
| 1937 | unsigned long flags; | 1972 | unsigned long flags; |
| 1938 | int retval = 0; | 1973 | int retval = 0; |
| 1939 | 1974 | ||
| 1940 | tty = filp->private_data; | ||
| 1941 | if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) | 1975 | if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) |
| 1942 | goto out; | 1976 | goto out; |
| 1943 | 1977 | ||
| @@ -2491,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); | |||
| 2491 | */ | 2525 | */ |
| 2492 | long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 2526 | long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 2493 | { | 2527 | { |
| 2494 | struct tty_struct *tty, *real_tty; | 2528 | struct tty_struct *tty = file_tty(file); |
| 2529 | struct tty_struct *real_tty; | ||
| 2495 | void __user *p = (void __user *)arg; | 2530 | void __user *p = (void __user *)arg; |
| 2496 | int retval; | 2531 | int retval; |
| 2497 | struct tty_ldisc *ld; | 2532 | struct tty_ldisc *ld; |
| 2498 | struct inode *inode = file->f_dentry->d_inode; | 2533 | struct inode *inode = file->f_dentry->d_inode; |
| 2499 | 2534 | ||
| 2500 | tty = file->private_data; | ||
| 2501 | if (tty_paranoia_check(tty, inode, "tty_ioctl")) | 2535 | if (tty_paranoia_check(tty, inode, "tty_ioctl")) |
| 2502 | return -EINVAL; | 2536 | return -EINVAL; |
| 2503 | 2537 | ||
| @@ -2619,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, | |||
| 2619 | unsigned long arg) | 2653 | unsigned long arg) |
| 2620 | { | 2654 | { |
| 2621 | struct inode *inode = file->f_dentry->d_inode; | 2655 | struct inode *inode = file->f_dentry->d_inode; |
| 2622 | struct tty_struct *tty = file->private_data; | 2656 | struct tty_struct *tty = file_tty(file); |
| 2623 | struct tty_ldisc *ld; | 2657 | struct tty_ldisc *ld; |
| 2624 | int retval = -ENOIOCTLCMD; | 2658 | int retval = -ENOIOCTLCMD; |
| 2625 | 2659 | ||
| @@ -2711,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) | |||
| 2711 | if (!filp) | 2745 | if (!filp) |
| 2712 | continue; | 2746 | continue; |
| 2713 | if (filp->f_op->read == tty_read && | 2747 | if (filp->f_op->read == tty_read && |
| 2714 | filp->private_data == tty) { | 2748 | file_tty(filp) == tty) { |
| 2715 | printk(KERN_NOTICE "SAK: killed process %d" | 2749 | printk(KERN_NOTICE "SAK: killed process %d" |
| 2716 | " (%s): fd#%d opened to the tty\n", | 2750 | " (%s): fd#%d opened to the tty\n", |
| 2717 | task_pid_nr(p), p->comm, i); | 2751 | task_pid_nr(p), p->comm, i); |
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd50d638..8ec83d2dffb7 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c | |||
| @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le | |||
| 44 | return -ENOENT; | 44 | return -ENOENT; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | read_lock(¤t->fs->lock); | 47 | spin_lock(¤t->fs->lock); |
| 48 | path.mnt = mntget(current->fs->root.mnt); | 48 | path.mnt = mntget(current->fs->root.mnt); |
| 49 | read_unlock(¤t->fs->lock); | 49 | spin_unlock(¤t->fs->lock); |
| 50 | 50 | ||
| 51 | path.dentry = d; | 51 | path.dentry = d; |
| 52 | 52 | ||
| @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) | |||
| 91 | return -ENOENT; | 91 | return -ENOENT; |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | read_lock(¤t->fs->lock); | 94 | spin_lock(¤t->fs->lock); |
| 95 | root = dget(current->fs->root.dentry); | 95 | root = dget(current->fs->root.dentry); |
| 96 | read_unlock(¤t->fs->lock); | 96 | spin_unlock(¤t->fs->lock); |
| 97 | 97 | ||
| 98 | spin_lock(&dcache_lock); | 98 | spin_lock(&dcache_lock); |
| 99 | 99 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
| 770 | spin_unlock(lock); | 770 | spin_unlock(lock); |
| 771 | /* | 771 | /* |
| 772 | * Ensure any pending I/O completes so that | 772 | * Ensure any pending I/O completes so that |
| 773 | * ll_rw_block() actually writes the current | 773 | * write_dirty_buffer() actually writes the |
| 774 | * contents - it is a noop if I/O is still in | 774 | * current contents - it is a noop if I/O is |
| 775 | * flight on potentially older contents. | 775 | * still in flight on potentially older |
| 776 | * contents. | ||
| 776 | */ | 777 | */ |
| 777 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); | 778 | write_dirty_buffer(bh, WRITE_SYNC_PLUG); |
| 778 | 779 | ||
| 779 | /* | 780 | /* |
| 780 | * Kick off IO for the previous mapping. Note | 781 | * Kick off IO for the previous mapping. Note |
| @@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
| 2912 | BUG_ON(buffer_unwritten(bh)); | 2913 | BUG_ON(buffer_unwritten(bh)); |
| 2913 | 2914 | ||
| 2914 | /* | 2915 | /* |
| 2915 | * Mask in barrier bit for a write (could be either a WRITE or a | ||
| 2916 | * WRITE_SYNC | ||
| 2917 | */ | ||
| 2918 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
| 2919 | rw |= WRITE_BARRIER; | ||
| 2920 | |||
| 2921 | /* | ||
| 2922 | * Only clear out a write error when rewriting | 2916 | * Only clear out a write error when rewriting |
| 2923 | */ | 2917 | */ |
| 2924 | if (test_set_buffer_req(bh) && (rw & WRITE)) | 2918 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
| @@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); | |||
| 2956 | 2950 | ||
| 2957 | /** | 2951 | /** |
| 2958 | * ll_rw_block: low-level access to block devices (DEPRECATED) | 2952 | * ll_rw_block: low-level access to block devices (DEPRECATED) |
| 2959 | * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) | 2953 | * @rw: whether to %READ or %WRITE or maybe %READA (readahead) |
| 2960 | * @nr: number of &struct buffer_heads in the array | 2954 | * @nr: number of &struct buffer_heads in the array |
| 2961 | * @bhs: array of pointers to &struct buffer_head | 2955 | * @bhs: array of pointers to &struct buffer_head |
| 2962 | * | 2956 | * |
| 2963 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and | 2957 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and |
| 2964 | * requests an I/O operation on them, either a %READ or a %WRITE. The third | 2958 | * requests an I/O operation on them, either a %READ or a %WRITE. The third |
| 2965 | * %SWRITE is like %WRITE only we make sure that the *current* data in buffers | 2959 | * %READA option is described in the documentation for generic_make_request() |
| 2966 | * are sent to disk. The fourth %READA option is described in the documentation | 2960 | * which ll_rw_block() calls. |
| 2967 | * for generic_make_request() which ll_rw_block() calls. | ||
| 2968 | * | 2961 | * |
| 2969 | * This function drops any buffer that it cannot get a lock on (with the | 2962 | * This function drops any buffer that it cannot get a lock on (with the |
| 2970 | * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be | 2963 | * BH_Lock state bit), any buffer that appears to be clean when doing a write |
| 2971 | * clean when doing a write request, and any buffer that appears to be | 2964 | * request, and any buffer that appears to be up-to-date when doing read |
| 2972 | * up-to-date when doing read request. Further it marks as clean buffers that | 2965 | * request. Further it marks as clean buffers that are processed for |
| 2973 | * are processed for writing (the buffer cache won't assume that they are | 2966 | * writing (the buffer cache won't assume that they are actually clean |
| 2974 | * actually clean until the buffer gets unlocked). | 2967 | * until the buffer gets unlocked). |
| 2975 | * | 2968 | * |
| 2976 | * ll_rw_block sets b_end_io to simple completion handler that marks | 2969 | * ll_rw_block sets b_end_io to simple completion handler that marks |
| 2977 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 2970 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes |
| @@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
| 2987 | for (i = 0; i < nr; i++) { | 2980 | for (i = 0; i < nr; i++) { |
| 2988 | struct buffer_head *bh = bhs[i]; | 2981 | struct buffer_head *bh = bhs[i]; |
| 2989 | 2982 | ||
| 2990 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) | 2983 | if (!trylock_buffer(bh)) |
| 2991 | lock_buffer(bh); | ||
| 2992 | else if (!trylock_buffer(bh)) | ||
| 2993 | continue; | 2984 | continue; |
| 2994 | 2985 | if (rw == WRITE) { | |
| 2995 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || | ||
| 2996 | rw == SWRITE_SYNC_PLUG) { | ||
| 2997 | if (test_clear_buffer_dirty(bh)) { | 2986 | if (test_clear_buffer_dirty(bh)) { |
| 2998 | bh->b_end_io = end_buffer_write_sync; | 2987 | bh->b_end_io = end_buffer_write_sync; |
| 2999 | get_bh(bh); | 2988 | get_bh(bh); |
| 3000 | if (rw == SWRITE_SYNC) | 2989 | submit_bh(WRITE, bh); |
| 3001 | submit_bh(WRITE_SYNC, bh); | ||
| 3002 | else | ||
| 3003 | submit_bh(WRITE, bh); | ||
| 3004 | continue; | 2990 | continue; |
| 3005 | } | 2991 | } |
| 3006 | } else { | 2992 | } else { |
| @@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
| 3016 | } | 3002 | } |
| 3017 | EXPORT_SYMBOL(ll_rw_block); | 3003 | EXPORT_SYMBOL(ll_rw_block); |
| 3018 | 3004 | ||
| 3005 | void write_dirty_buffer(struct buffer_head *bh, int rw) | ||
| 3006 | { | ||
| 3007 | lock_buffer(bh); | ||
| 3008 | if (!test_clear_buffer_dirty(bh)) { | ||
| 3009 | unlock_buffer(bh); | ||
| 3010 | return; | ||
| 3011 | } | ||
| 3012 | bh->b_end_io = end_buffer_write_sync; | ||
| 3013 | get_bh(bh); | ||
| 3014 | submit_bh(rw, bh); | ||
| 3015 | } | ||
| 3016 | EXPORT_SYMBOL(write_dirty_buffer); | ||
| 3017 | |||
| 3019 | /* | 3018 | /* |
| 3020 | * For a data-integrity writeout, we need to wait upon any in-progress I/O | 3019 | * For a data-integrity writeout, we need to wait upon any in-progress I/O |
| 3021 | * and then start new I/O and then wait upon it. The caller must have a ref on | 3020 | * and then start new I/O and then wait upon it. The caller must have a ref on |
| 3022 | * the buffer_head. | 3021 | * the buffer_head. |
| 3023 | */ | 3022 | */ |
| 3024 | int sync_dirty_buffer(struct buffer_head *bh) | 3023 | int __sync_dirty_buffer(struct buffer_head *bh, int rw) |
| 3025 | { | 3024 | { |
| 3026 | int ret = 0; | 3025 | int ret = 0; |
| 3027 | 3026 | ||
| @@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
| 3030 | if (test_clear_buffer_dirty(bh)) { | 3029 | if (test_clear_buffer_dirty(bh)) { |
| 3031 | get_bh(bh); | 3030 | get_bh(bh); |
| 3032 | bh->b_end_io = end_buffer_write_sync; | 3031 | bh->b_end_io = end_buffer_write_sync; |
| 3033 | ret = submit_bh(WRITE_SYNC, bh); | 3032 | ret = submit_bh(rw, bh); |
| 3034 | wait_on_buffer(bh); | 3033 | wait_on_buffer(bh); |
| 3035 | if (buffer_eopnotsupp(bh)) { | 3034 | if (buffer_eopnotsupp(bh)) { |
| 3036 | clear_buffer_eopnotsupp(bh); | 3035 | clear_buffer_eopnotsupp(bh); |
| @@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
| 3043 | } | 3042 | } |
| 3044 | return ret; | 3043 | return ret; |
| 3045 | } | 3044 | } |
| 3045 | EXPORT_SYMBOL(__sync_dirty_buffer); | ||
| 3046 | |||
| 3047 | int sync_dirty_buffer(struct buffer_head *bh) | ||
| 3048 | { | ||
| 3049 | return __sync_dirty_buffer(bh, WRITE_SYNC); | ||
| 3050 | } | ||
| 3046 | EXPORT_SYMBOL(sync_dirty_buffer); | 3051 | EXPORT_SYMBOL(sync_dirty_buffer); |
| 3047 | 3052 | ||
| 3048 | /* | 3053 | /* |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130b366c..1e7a33028d33 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
| @@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
| 80 | } | 80 | } |
| 81 | } else { | 81 | } else { |
| 82 | inode = iget_locked(sb, CRAMINO(cramfs_inode)); | 82 | inode = iget_locked(sb, CRAMINO(cramfs_inode)); |
| 83 | if (inode) { | 83 | if (inode && (inode->i_state & I_NEW)) { |
| 84 | setup_inode(inode, cramfs_inode); | 84 | setup_inode(inode, cramfs_inode); |
| 85 | unlock_new_inode(inode); | 85 | unlock_new_inode(inode); |
| 86 | } | 86 | } |
diff --git a/fs/dcache.c b/fs/dcache.c index 4d13bf50b7b1..83293be48149 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); | |||
| 1332 | * d_lookup - search for a dentry | 1332 | * d_lookup - search for a dentry |
| 1333 | * @parent: parent dentry | 1333 | * @parent: parent dentry |
| 1334 | * @name: qstr of name we wish to find | 1334 | * @name: qstr of name we wish to find |
| 1335 | * Returns: dentry, or NULL | ||
| 1335 | * | 1336 | * |
| 1336 | * Searches the children of the parent dentry for the name in question. If | 1337 | * d_lookup searches the children of the parent dentry for the name in |
| 1337 | * the dentry is found its reference count is incremented and the dentry | 1338 | * question. If the dentry is found its reference count is incremented and the |
| 1338 | * is returned. The caller must use dput to free the entry when it has | 1339 | * dentry is returned. The caller must use dput to free the entry when it has |
| 1339 | * finished using it. %NULL is returned on failure. | 1340 | * finished using it. %NULL is returned if the dentry does not exist. |
| 1340 | * | ||
| 1341 | * __d_lookup is dcache_lock free. The hash list is protected using RCU. | ||
| 1342 | * Memory barriers are used while updating and doing lockless traversal. | ||
| 1343 | * To avoid races with d_move while rename is happening, d_lock is used. | ||
| 1344 | * | ||
| 1345 | * Overflows in memcmp(), while d_move, are avoided by keeping the length | ||
| 1346 | * and name pointer in one structure pointed by d_qstr. | ||
| 1347 | * | ||
| 1348 | * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while | ||
| 1349 | * lookup is going on. | ||
| 1350 | * | ||
| 1351 | * The dentry unused LRU is not updated even if lookup finds the required dentry | ||
| 1352 | * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, | ||
| 1353 | * select_parent and __dget_locked. This laziness saves lookup from dcache_lock | ||
| 1354 | * acquisition. | ||
| 1355 | * | ||
| 1356 | * d_lookup() is protected against the concurrent renames in some unrelated | ||
| 1357 | * directory using the seqlockt_t rename_lock. | ||
| 1358 | */ | 1341 | */ |
| 1359 | |||
| 1360 | struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | 1342 | struct dentry * d_lookup(struct dentry * parent, struct qstr * name) |
| 1361 | { | 1343 | { |
| 1362 | struct dentry * dentry = NULL; | 1344 | struct dentry * dentry = NULL; |
| @@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | |||
| 1372 | } | 1354 | } |
| 1373 | EXPORT_SYMBOL(d_lookup); | 1355 | EXPORT_SYMBOL(d_lookup); |
| 1374 | 1356 | ||
| 1357 | /* | ||
| 1358 | * __d_lookup - search for a dentry (racy) | ||
| 1359 | * @parent: parent dentry | ||
| 1360 | * @name: qstr of name we wish to find | ||
| 1361 | * Returns: dentry, or NULL | ||
| 1362 | * | ||
| 1363 | * __d_lookup is like d_lookup, however it may (rarely) return a | ||
| 1364 | * false-negative result due to unrelated rename activity. | ||
| 1365 | * | ||
| 1366 | * __d_lookup is slightly faster by avoiding rename_lock read seqlock, | ||
| 1367 | * however it must be used carefully, eg. with a following d_lookup in | ||
| 1368 | * the case of failure. | ||
| 1369 | * | ||
| 1370 | * __d_lookup callers must be commented. | ||
| 1371 | */ | ||
| 1375 | struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | 1372 | struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) |
| 1376 | { | 1373 | { |
| 1377 | unsigned int len = name->len; | 1374 | unsigned int len = name->len; |
| @@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
| 1382 | struct hlist_node *node; | 1379 | struct hlist_node *node; |
| 1383 | struct dentry *dentry; | 1380 | struct dentry *dentry; |
| 1384 | 1381 | ||
| 1382 | /* | ||
| 1383 | * The hash list is protected using RCU. | ||
| 1384 | * | ||
| 1385 | * Take d_lock when comparing a candidate dentry, to avoid races | ||
| 1386 | * with d_move(). | ||
| 1387 | * | ||
| 1388 | * It is possible that concurrent renames can mess up our list | ||
| 1389 | * walk here and result in missing our dentry, resulting in the | ||
| 1390 | * false-negative result. d_lookup() protects against concurrent | ||
| 1391 | * renames using rename_lock seqlock. | ||
| 1392 | * | ||
| 1393 | * See Documentation/vfs/dcache-locking.txt for more details. | ||
| 1394 | */ | ||
| 1385 | rcu_read_lock(); | 1395 | rcu_read_lock(); |
| 1386 | 1396 | ||
| 1387 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { | 1397 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { |
| @@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
| 1396 | 1406 | ||
| 1397 | /* | 1407 | /* |
| 1398 | * Recheck the dentry after taking the lock - d_move may have | 1408 | * Recheck the dentry after taking the lock - d_move may have |
| 1399 | * changed things. Don't bother checking the hash because we're | 1409 | * changed things. Don't bother checking the hash because |
| 1400 | * about to compare the whole name anyway. | 1410 | * we're about to compare the whole name anyway. |
| 1401 | */ | 1411 | */ |
| 1402 | if (dentry->d_parent != parent) | 1412 | if (dentry->d_parent != parent) |
| 1403 | goto next; | 1413 | goto next; |
| @@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, | |||
| 1925 | bool slash = false; | 1935 | bool slash = false; |
| 1926 | int error = 0; | 1936 | int error = 0; |
| 1927 | 1937 | ||
| 1928 | spin_lock(&vfsmount_lock); | 1938 | br_read_lock(vfsmount_lock); |
| 1929 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 1939 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
| 1930 | struct dentry * parent; | 1940 | struct dentry * parent; |
| 1931 | 1941 | ||
| @@ -1954,7 +1964,7 @@ out: | |||
| 1954 | if (!error && !slash) | 1964 | if (!error && !slash) |
| 1955 | error = prepend(buffer, buflen, "/", 1); | 1965 | error = prepend(buffer, buflen, "/", 1); |
| 1956 | 1966 | ||
| 1957 | spin_unlock(&vfsmount_lock); | 1967 | br_read_unlock(vfsmount_lock); |
| 1958 | return error; | 1968 | return error; |
| 1959 | 1969 | ||
| 1960 | global_root: | 1970 | global_root: |
| @@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) | |||
| 2292 | struct vfsmount *mnt = path1->mnt; | 2302 | struct vfsmount *mnt = path1->mnt; |
| 2293 | struct dentry *dentry = path1->dentry; | 2303 | struct dentry *dentry = path1->dentry; |
| 2294 | int res; | 2304 | int res; |
| 2295 | spin_lock(&vfsmount_lock); | 2305 | |
| 2306 | br_read_lock(vfsmount_lock); | ||
| 2296 | if (mnt != path2->mnt) { | 2307 | if (mnt != path2->mnt) { |
| 2297 | for (;;) { | 2308 | for (;;) { |
| 2298 | if (mnt->mnt_parent == mnt) { | 2309 | if (mnt->mnt_parent == mnt) { |
| 2299 | spin_unlock(&vfsmount_lock); | 2310 | br_read_unlock(vfsmount_lock); |
| 2300 | return 0; | 2311 | return 0; |
| 2301 | } | 2312 | } |
| 2302 | if (mnt->mnt_parent == path2->mnt) | 2313 | if (mnt->mnt_parent == path2->mnt) |
| @@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) | |||
| 2306 | dentry = mnt->mnt_mountpoint; | 2317 | dentry = mnt->mnt_mountpoint; |
| 2307 | } | 2318 | } |
| 2308 | res = is_subdir(dentry, path2->dentry); | 2319 | res = is_subdir(dentry, path2->dentry); |
| 2309 | spin_unlock(&vfsmount_lock); | 2320 | br_read_unlock(vfsmount_lock); |
| 2310 | return res; | 2321 | return res; |
| 2311 | } | 2322 | } |
| 2312 | EXPORT_SYMBOL(path_is_under); | 2323 | EXPORT_SYMBOL(path_is_under); |
| @@ -1118,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
| 1118 | bprm->unsafe = tracehook_unsafe_exec(p); | 1118 | bprm->unsafe = tracehook_unsafe_exec(p); |
| 1119 | 1119 | ||
| 1120 | n_fs = 1; | 1120 | n_fs = 1; |
| 1121 | write_lock(&p->fs->lock); | 1121 | spin_lock(&p->fs->lock); |
| 1122 | rcu_read_lock(); | 1122 | rcu_read_lock(); |
| 1123 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1123 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
| 1124 | if (t->fs == p->fs) | 1124 | if (t->fs == p->fs) |
| @@ -1135,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
| 1135 | res = 1; | 1135 | res = 1; |
| 1136 | } | 1136 | } |
| 1137 | } | 1137 | } |
| 1138 | write_unlock(&p->fs->lock); | 1138 | spin_unlock(&p->fs->lock); |
| 1139 | 1139 | ||
| 1140 | return res; | 1140 | return res; |
| 1141 | } | 1141 | } |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
| @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) | |||
| 250 | { | 250 | { |
| 251 | int i, err = 0; | 251 | int i, err = 0; |
| 252 | 252 | ||
| 253 | ll_rw_block(SWRITE, nr_bhs, bhs); | 253 | for (i = 0; i < nr_bhs; i++) |
| 254 | write_dirty_buffer(bhs[i], WRITE); | ||
| 255 | |||
| 254 | for (i = 0; i < nr_bhs; i++) { | 256 | for (i = 0; i < nr_bhs; i++) { |
| 255 | wait_on_buffer(bhs[i]); | 257 | wait_on_buffer(bhs[i]); |
| 256 | if (buffer_eopnotsupp(bhs[i])) { | 258 | if (buffer_eopnotsupp(bhs[i])) { |
diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -20,7 +20,9 @@ | |||
| 20 | #include <linux/cdev.h> | 20 | #include <linux/cdev.h> |
| 21 | #include <linux/fsnotify.h> | 21 | #include <linux/fsnotify.h> |
| 22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
| 23 | #include <linux/lglock.h> | ||
| 23 | #include <linux/percpu_counter.h> | 24 | #include <linux/percpu_counter.h> |
| 25 | #include <linux/percpu.h> | ||
| 24 | #include <linux/ima.h> | 26 | #include <linux/ima.h> |
| 25 | 27 | ||
| 26 | #include <asm/atomic.h> | 28 | #include <asm/atomic.h> |
| @@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { | |||
| 32 | .max_files = NR_FILE | 34 | .max_files = NR_FILE |
| 33 | }; | 35 | }; |
| 34 | 36 | ||
| 35 | /* public. Not pretty! */ | 37 | DECLARE_LGLOCK(files_lglock); |
| 36 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); | 38 | DEFINE_LGLOCK(files_lglock); |
| 37 | 39 | ||
| 38 | /* SLAB cache for file structures */ | 40 | /* SLAB cache for file structures */ |
| 39 | static struct kmem_cache *filp_cachep __read_mostly; | 41 | static struct kmem_cache *filp_cachep __read_mostly; |
| @@ -249,7 +251,7 @@ static void __fput(struct file *file) | |||
| 249 | cdev_put(inode->i_cdev); | 251 | cdev_put(inode->i_cdev); |
| 250 | fops_put(file->f_op); | 252 | fops_put(file->f_op); |
| 251 | put_pid(file->f_owner.pid); | 253 | put_pid(file->f_owner.pid); |
| 252 | file_kill(file); | 254 | file_sb_list_del(file); |
| 253 | if (file->f_mode & FMODE_WRITE) | 255 | if (file->f_mode & FMODE_WRITE) |
| 254 | drop_file_write_access(file); | 256 | drop_file_write_access(file); |
| 255 | file->f_path.dentry = NULL; | 257 | file->f_path.dentry = NULL; |
| @@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) | |||
| 328 | return file; | 330 | return file; |
| 329 | } | 331 | } |
| 330 | 332 | ||
| 331 | |||
| 332 | void put_filp(struct file *file) | 333 | void put_filp(struct file *file) |
| 333 | { | 334 | { |
| 334 | if (atomic_long_dec_and_test(&file->f_count)) { | 335 | if (atomic_long_dec_and_test(&file->f_count)) { |
| 335 | security_file_free(file); | 336 | security_file_free(file); |
| 336 | file_kill(file); | 337 | file_sb_list_del(file); |
| 337 | file_free(file); | 338 | file_free(file); |
| 338 | } | 339 | } |
| 339 | } | 340 | } |
| 340 | 341 | ||
| 341 | void file_move(struct file *file, struct list_head *list) | 342 | static inline int file_list_cpu(struct file *file) |
| 342 | { | 343 | { |
| 343 | if (!list) | 344 | #ifdef CONFIG_SMP |
| 344 | return; | 345 | return file->f_sb_list_cpu; |
| 345 | file_list_lock(); | 346 | #else |
| 346 | list_move(&file->f_u.fu_list, list); | 347 | return smp_processor_id(); |
| 347 | file_list_unlock(); | 348 | #endif |
| 349 | } | ||
| 350 | |||
| 351 | /* helper for file_sb_list_add to reduce ifdefs */ | ||
| 352 | static inline void __file_sb_list_add(struct file *file, struct super_block *sb) | ||
| 353 | { | ||
| 354 | struct list_head *list; | ||
| 355 | #ifdef CONFIG_SMP | ||
| 356 | int cpu; | ||
| 357 | cpu = smp_processor_id(); | ||
| 358 | file->f_sb_list_cpu = cpu; | ||
| 359 | list = per_cpu_ptr(sb->s_files, cpu); | ||
| 360 | #else | ||
| 361 | list = &sb->s_files; | ||
| 362 | #endif | ||
| 363 | list_add(&file->f_u.fu_list, list); | ||
| 348 | } | 364 | } |
| 349 | 365 | ||
| 350 | void file_kill(struct file *file) | 366 | /** |
| 367 | * file_sb_list_add - add a file to the sb's file list | ||
| 368 | * @file: file to add | ||
| 369 | * @sb: sb to add it to | ||
| 370 | * | ||
| 371 | * Use this function to associate a file with the superblock of the inode it | ||
| 372 | * refers to. | ||
| 373 | */ | ||
| 374 | void file_sb_list_add(struct file *file, struct super_block *sb) | ||
| 375 | { | ||
| 376 | lg_local_lock(files_lglock); | ||
| 377 | __file_sb_list_add(file, sb); | ||
| 378 | lg_local_unlock(files_lglock); | ||
| 379 | } | ||
| 380 | |||
| 381 | /** | ||
| 382 | * file_sb_list_del - remove a file from the sb's file list | ||
| 383 | * @file: file to remove | ||
| 384 | * @sb: sb to remove it from | ||
| 385 | * | ||
| 386 | * Use this function to remove a file from its superblock. | ||
| 387 | */ | ||
| 388 | void file_sb_list_del(struct file *file) | ||
| 351 | { | 389 | { |
| 352 | if (!list_empty(&file->f_u.fu_list)) { | 390 | if (!list_empty(&file->f_u.fu_list)) { |
| 353 | file_list_lock(); | 391 | lg_local_lock_cpu(files_lglock, file_list_cpu(file)); |
| 354 | list_del_init(&file->f_u.fu_list); | 392 | list_del_init(&file->f_u.fu_list); |
| 355 | file_list_unlock(); | 393 | lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); |
| 356 | } | 394 | } |
| 357 | } | 395 | } |
| 358 | 396 | ||
| 397 | #ifdef CONFIG_SMP | ||
| 398 | |||
| 399 | /* | ||
| 400 | * These macros iterate all files on all CPUs for a given superblock. | ||
| 401 | * files_lglock must be held globally. | ||
| 402 | */ | ||
| 403 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
| 404 | { \ | ||
| 405 | int i; \ | ||
| 406 | for_each_possible_cpu(i) { \ | ||
| 407 | struct list_head *list; \ | ||
| 408 | list = per_cpu_ptr((__sb)->s_files, i); \ | ||
| 409 | list_for_each_entry((__file), list, f_u.fu_list) | ||
| 410 | |||
| 411 | #define while_file_list_for_each_entry \ | ||
| 412 | } \ | ||
| 413 | } | ||
| 414 | |||
| 415 | #else | ||
| 416 | |||
| 417 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
| 418 | { \ | ||
| 419 | struct list_head *list; \ | ||
| 420 | list = &(sb)->s_files; \ | ||
| 421 | list_for_each_entry((__file), list, f_u.fu_list) | ||
| 422 | |||
| 423 | #define while_file_list_for_each_entry \ | ||
| 424 | } | ||
| 425 | |||
| 426 | #endif | ||
| 427 | |||
| 359 | int fs_may_remount_ro(struct super_block *sb) | 428 | int fs_may_remount_ro(struct super_block *sb) |
| 360 | { | 429 | { |
| 361 | struct file *file; | 430 | struct file *file; |
| 362 | |||
| 363 | /* Check that no files are currently opened for writing. */ | 431 | /* Check that no files are currently opened for writing. */ |
| 364 | file_list_lock(); | 432 | lg_global_lock(files_lglock); |
| 365 | list_for_each_entry(file, &sb->s_files, f_u.fu_list) { | 433 | do_file_list_for_each_entry(sb, file) { |
| 366 | struct inode *inode = file->f_path.dentry->d_inode; | 434 | struct inode *inode = file->f_path.dentry->d_inode; |
| 367 | 435 | ||
| 368 | /* File with pending delete? */ | 436 | /* File with pending delete? */ |
| @@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) | |||
| 372 | /* Writeable file? */ | 440 | /* Writeable file? */ |
| 373 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) | 441 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) |
| 374 | goto too_bad; | 442 | goto too_bad; |
| 375 | } | 443 | } while_file_list_for_each_entry; |
| 376 | file_list_unlock(); | 444 | lg_global_unlock(files_lglock); |
| 377 | return 1; /* Tis' cool bro. */ | 445 | return 1; /* Tis' cool bro. */ |
| 378 | too_bad: | 446 | too_bad: |
| 379 | file_list_unlock(); | 447 | lg_global_unlock(files_lglock); |
| 380 | return 0; | 448 | return 0; |
| 381 | } | 449 | } |
| 382 | 450 | ||
| @@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb) | |||
| 392 | struct file *f; | 460 | struct file *f; |
| 393 | 461 | ||
| 394 | retry: | 462 | retry: |
| 395 | file_list_lock(); | 463 | lg_global_lock(files_lglock); |
| 396 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | 464 | do_file_list_for_each_entry(sb, f) { |
| 397 | struct vfsmount *mnt; | 465 | struct vfsmount *mnt; |
| 398 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | 466 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) |
| 399 | continue; | 467 | continue; |
| @@ -408,16 +476,13 @@ retry: | |||
| 408 | continue; | 476 | continue; |
| 409 | file_release_write(f); | 477 | file_release_write(f); |
| 410 | mnt = mntget(f->f_path.mnt); | 478 | mnt = mntget(f->f_path.mnt); |
| 411 | file_list_unlock(); | 479 | /* This can sleep, so we can't hold the spinlock. */ |
| 412 | /* | 480 | lg_global_unlock(files_lglock); |
| 413 | * This can sleep, so we can't hold | ||
| 414 | * the file_list_lock() spinlock. | ||
| 415 | */ | ||
| 416 | mnt_drop_write(mnt); | 481 | mnt_drop_write(mnt); |
| 417 | mntput(mnt); | 482 | mntput(mnt); |
| 418 | goto retry; | 483 | goto retry; |
| 419 | } | 484 | } while_file_list_for_each_entry; |
| 420 | file_list_unlock(); | 485 | lg_global_unlock(files_lglock); |
| 421 | } | 486 | } |
| 422 | 487 | ||
| 423 | void __init files_init(unsigned long mempages) | 488 | void __init files_init(unsigned long mempages) |
| @@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages) | |||
| 437 | if (files_stat.max_files < NR_FILE) | 502 | if (files_stat.max_files < NR_FILE) |
| 438 | files_stat.max_files = NR_FILE; | 503 | files_stat.max_files = NR_FILE; |
| 439 | files_defer_init(); | 504 | files_defer_init(); |
| 505 | lg_lock_init(files_lglock); | ||
| 440 | percpu_counter_init(&nr_files, 0); | 506 | percpu_counter_init(&nr_files, 0); |
| 441 | } | 507 | } |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
| @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) | |||
| 13 | { | 13 | { |
| 14 | struct path old_root; | 14 | struct path old_root; |
| 15 | 15 | ||
| 16 | write_lock(&fs->lock); | 16 | spin_lock(&fs->lock); |
| 17 | old_root = fs->root; | 17 | old_root = fs->root; |
| 18 | fs->root = *path; | 18 | fs->root = *path; |
| 19 | path_get(path); | 19 | path_get(path); |
| 20 | write_unlock(&fs->lock); | 20 | spin_unlock(&fs->lock); |
| 21 | if (old_root.dentry) | 21 | if (old_root.dentry) |
| 22 | path_put(&old_root); | 22 | path_put(&old_root); |
| 23 | } | 23 | } |
| @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) | |||
| 30 | { | 30 | { |
| 31 | struct path old_pwd; | 31 | struct path old_pwd; |
| 32 | 32 | ||
| 33 | write_lock(&fs->lock); | 33 | spin_lock(&fs->lock); |
| 34 | old_pwd = fs->pwd; | 34 | old_pwd = fs->pwd; |
| 35 | fs->pwd = *path; | 35 | fs->pwd = *path; |
| 36 | path_get(path); | 36 | path_get(path); |
| 37 | write_unlock(&fs->lock); | 37 | spin_unlock(&fs->lock); |
| 38 | 38 | ||
| 39 | if (old_pwd.dentry) | 39 | if (old_pwd.dentry) |
| 40 | path_put(&old_pwd); | 40 | path_put(&old_pwd); |
| @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
| 51 | task_lock(p); | 51 | task_lock(p); |
| 52 | fs = p->fs; | 52 | fs = p->fs; |
| 53 | if (fs) { | 53 | if (fs) { |
| 54 | write_lock(&fs->lock); | 54 | spin_lock(&fs->lock); |
| 55 | if (fs->root.dentry == old_root->dentry | 55 | if (fs->root.dentry == old_root->dentry |
| 56 | && fs->root.mnt == old_root->mnt) { | 56 | && fs->root.mnt == old_root->mnt) { |
| 57 | path_get(new_root); | 57 | path_get(new_root); |
| @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
| 64 | fs->pwd = *new_root; | 64 | fs->pwd = *new_root; |
| 65 | count++; | 65 | count++; |
| 66 | } | 66 | } |
| 67 | write_unlock(&fs->lock); | 67 | spin_unlock(&fs->lock); |
| 68 | } | 68 | } |
| 69 | task_unlock(p); | 69 | task_unlock(p); |
| 70 | } while_each_thread(g, p); | 70 | } while_each_thread(g, p); |
| @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) | |||
| 87 | if (fs) { | 87 | if (fs) { |
| 88 | int kill; | 88 | int kill; |
| 89 | task_lock(tsk); | 89 | task_lock(tsk); |
| 90 | write_lock(&fs->lock); | 90 | spin_lock(&fs->lock); |
| 91 | tsk->fs = NULL; | 91 | tsk->fs = NULL; |
| 92 | kill = !--fs->users; | 92 | kill = !--fs->users; |
| 93 | write_unlock(&fs->lock); | 93 | spin_unlock(&fs->lock); |
| 94 | task_unlock(tsk); | 94 | task_unlock(tsk); |
| 95 | if (kill) | 95 | if (kill) |
| 96 | free_fs_struct(fs); | 96 | free_fs_struct(fs); |
| @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
| 104 | if (fs) { | 104 | if (fs) { |
| 105 | fs->users = 1; | 105 | fs->users = 1; |
| 106 | fs->in_exec = 0; | 106 | fs->in_exec = 0; |
| 107 | rwlock_init(&fs->lock); | 107 | spin_lock_init(&fs->lock); |
| 108 | fs->umask = old->umask; | 108 | fs->umask = old->umask; |
| 109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); | 109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); |
| 110 | } | 110 | } |
| @@ -121,10 +121,10 @@ int unshare_fs_struct(void) | |||
| 121 | return -ENOMEM; | 121 | return -ENOMEM; |
| 122 | 122 | ||
| 123 | task_lock(current); | 123 | task_lock(current); |
| 124 | write_lock(&fs->lock); | 124 | spin_lock(&fs->lock); |
| 125 | kill = !--fs->users; | 125 | kill = !--fs->users; |
| 126 | current->fs = new_fs; | 126 | current->fs = new_fs; |
| 127 | write_unlock(&fs->lock); | 127 | spin_unlock(&fs->lock); |
| 128 | task_unlock(current); | 128 | task_unlock(current); |
| 129 | 129 | ||
| 130 | if (kill) | 130 | if (kill) |
| @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); | |||
| 143 | /* to be mentioned only in INIT_TASK */ | 143 | /* to be mentioned only in INIT_TASK */ |
| 144 | struct fs_struct init_fs = { | 144 | struct fs_struct init_fs = { |
| 145 | .users = 1, | 145 | .users = 1, |
| 146 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), | 146 | .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), |
| 147 | .umask = 0022, | 147 | .umask = 0022, |
| 148 | }; | 148 | }; |
| 149 | 149 | ||
| @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) | |||
| 156 | 156 | ||
| 157 | task_lock(current); | 157 | task_lock(current); |
| 158 | 158 | ||
| 159 | write_lock(&init_fs.lock); | 159 | spin_lock(&init_fs.lock); |
| 160 | init_fs.users++; | 160 | init_fs.users++; |
| 161 | write_unlock(&init_fs.lock); | 161 | spin_unlock(&init_fs.lock); |
| 162 | 162 | ||
| 163 | write_lock(&fs->lock); | 163 | spin_lock(&fs->lock); |
| 164 | current->fs = &init_fs; | 164 | current->fs = &init_fs; |
| 165 | kill = !--fs->users; | 165 | kill = !--fs->users; |
| 166 | write_unlock(&fs->lock); | 166 | spin_unlock(&fs->lock); |
| 167 | 167 | ||
| 168 | task_unlock(current); | 168 | task_unlock(current); |
| 169 | if (kill) | 169 | if (kill) |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e564157..6bc9e3a5a693 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
| @@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
| 94 | if (error < 0) | 94 | if (error < 0) |
| 95 | goto failed; | 95 | goto failed; |
| 96 | inode->i_mode = mode; | 96 | inode->i_mode = mode; |
| 97 | inode->i_ctime = CURRENT_TIME; | ||
| 97 | if (error == 0) { | 98 | if (error == 0) { |
| 98 | posix_acl_release(acl); | 99 | posix_acl_release(acl); |
| 99 | acl = NULL; | 100 | acl = NULL; |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e55535a4e..f7dc9b5f9ef8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
| @@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) | |||
| 104 | __putname(name); | 104 | __putname(name); |
| 105 | return NULL; | 105 | return NULL; |
| 106 | } | 106 | } |
| 107 | strncpy(name, root, PATH_MAX); | 107 | strlcpy(name, root, PATH_MAX); |
| 108 | if (len > p - name) { | 108 | if (len > p - name) { |
| 109 | __putname(name); | 109 | __putname(name); |
| 110 | return NULL; | 110 | return NULL; |
| @@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 876 | char *path = dentry_name(dentry); | 876 | char *path = dentry_name(dentry); |
| 877 | int err = -ENOMEM; | 877 | int err = -ENOMEM; |
| 878 | if (path) { | 878 | if (path) { |
| 879 | int err = hostfs_do_readlink(path, link, PATH_MAX); | 879 | err = hostfs_do_readlink(path, link, PATH_MAX); |
| 880 | if (err == PATH_MAX) | 880 | if (err == PATH_MAX) |
| 881 | err = -E2BIG; | 881 | err = -E2BIG; |
| 882 | __putname(path); | 882 | __putname(path); |
diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..a6910e91cee8 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #include <linux/lglock.h> | ||
| 13 | |||
| 12 | struct super_block; | 14 | struct super_block; |
| 13 | struct linux_binprm; | 15 | struct linux_binprm; |
| 14 | struct path; | 16 | struct path; |
| @@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | |||
| 70 | 72 | ||
| 71 | extern void __init mnt_init(void); | 73 | extern void __init mnt_init(void); |
| 72 | 74 | ||
| 73 | extern spinlock_t vfsmount_lock; | 75 | DECLARE_BRLOCK(vfsmount_lock); |
| 76 | |||
| 74 | 77 | ||
| 75 | /* | 78 | /* |
| 76 | * fs_struct.c | 79 | * fs_struct.c |
| @@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *); | |||
| 80 | /* | 83 | /* |
| 81 | * file_table.c | 84 | * file_table.c |
| 82 | */ | 85 | */ |
| 86 | extern void file_sb_list_add(struct file *f, struct super_block *sb); | ||
| 87 | extern void file_sb_list_del(struct file *f); | ||
| 83 | extern void mark_files_ro(struct super_block *); | 88 | extern void mark_files_ro(struct super_block *); |
| 84 | extern struct file *get_empty_filp(void); | 89 | extern struct file *get_empty_filp(void); |
| 85 | 90 | ||
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
| @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
| 254 | { | 254 | { |
| 255 | int i; | 255 | int i; |
| 256 | 256 | ||
| 257 | ll_rw_block(SWRITE, *batch_count, bhs); | 257 | for (i = 0; i < *batch_count; i++) |
| 258 | write_dirty_buffer(bhs[i], WRITE); | ||
| 259 | |||
| 258 | for (i = 0; i < *batch_count; i++) { | 260 | for (i = 0; i < *batch_count; i++) { |
| 259 | struct buffer_head *bh = bhs[i]; | 261 | struct buffer_head *bh = bhs[i]; |
| 260 | clear_buffer_jwrite(bh); | 262 | clear_buffer_jwrite(bh); |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, | |||
| 119 | struct buffer_head *bh; | 119 | struct buffer_head *bh; |
| 120 | journal_header_t *header; | 120 | journal_header_t *header; |
| 121 | int ret; | 121 | int ret; |
| 122 | int barrier_done = 0; | ||
| 123 | 122 | ||
| 124 | if (is_journal_aborted(journal)) | 123 | if (is_journal_aborted(journal)) |
| 125 | return 0; | 124 | return 0; |
| @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, | |||
| 137 | 136 | ||
| 138 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "write commit block"); |
| 139 | set_buffer_dirty(bh); | 138 | set_buffer_dirty(bh); |
| 139 | |||
| 140 | if (journal->j_flags & JFS_BARRIER) { | 140 | if (journal->j_flags & JFS_BARRIER) { |
| 141 | set_buffer_ordered(bh); | 141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); |
| 142 | barrier_done = 1; | ||
| 143 | } | ||
| 144 | ret = sync_dirty_buffer(bh); | ||
| 145 | if (barrier_done) | ||
| 146 | clear_buffer_ordered(bh); | ||
| 147 | /* is it possible for another commit to fail at roughly | ||
| 148 | * the same time as this one? If so, we don't want to | ||
| 149 | * trust the barrier flag in the super, but instead want | ||
| 150 | * to remember if we sent a barrier request | ||
| 151 | */ | ||
| 152 | if (ret == -EOPNOTSUPP && barrier_done) { | ||
| 153 | char b[BDEVNAME_SIZE]; | ||
| 154 | 142 | ||
| 155 | printk(KERN_WARNING | 143 | /* |
| 156 | "JBD: barrier-based sync failed on %s - " | 144 | * Is it possible for another commit to fail at roughly |
| 157 | "disabling barriers\n", | 145 | * the same time as this one? If so, we don't want to |
| 158 | bdevname(journal->j_dev, b)); | 146 | * trust the barrier flag in the super, but instead want |
| 159 | spin_lock(&journal->j_state_lock); | 147 | * to remember if we sent a barrier request |
| 160 | journal->j_flags &= ~JFS_BARRIER; | 148 | */ |
| 161 | spin_unlock(&journal->j_state_lock); | 149 | if (ret == -EOPNOTSUPP) { |
| 150 | char b[BDEVNAME_SIZE]; | ||
| 162 | 151 | ||
| 163 | /* And try again, without the barrier */ | 152 | printk(KERN_WARNING |
| 164 | set_buffer_uptodate(bh); | 153 | "JBD: barrier-based sync failed on %s - " |
| 165 | set_buffer_dirty(bh); | 154 | "disabling barriers\n", |
| 155 | bdevname(journal->j_dev, b)); | ||
| 156 | spin_lock(&journal->j_state_lock); | ||
| 157 | journal->j_flags &= ~JFS_BARRIER; | ||
| 158 | spin_unlock(&journal->j_state_lock); | ||
| 159 | |||
| 160 | /* And try again, without the barrier */ | ||
| 161 | set_buffer_uptodate(bh); | ||
| 162 | set_buffer_dirty(bh); | ||
| 163 | ret = sync_dirty_buffer(bh); | ||
| 164 | } | ||
| 165 | } else { | ||
| 166 | ret = sync_dirty_buffer(bh); | 166 | ret = sync_dirty_buffer(bh); |
| 167 | } | 167 | } |
| 168 | |||
| 168 | put_bh(bh); /* One for getblk() */ | 169 | put_bh(bh); /* One for getblk() */ |
| 169 | journal_put_journal_head(descriptor); | 170 | journal_put_journal_head(descriptor); |
| 170 | 171 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
| 1024 | if (wait) | 1024 | if (wait) |
| 1025 | sync_dirty_buffer(bh); | 1025 | sync_dirty_buffer(bh); |
| 1026 | else | 1026 | else |
| 1027 | ll_rw_block(SWRITE, 1, &bh); | 1027 | write_dirty_buffer(bh, WRITE); |
| 1028 | 1028 | ||
| 1029 | out: | 1029 | out: |
| 1030 | /* If we have just flushed the log (by marking s_start==0), then | 1030 | /* If we have just flushed the log (by marking s_start==0), then |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
| @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, | |||
| 617 | set_buffer_jwrite(bh); | 617 | set_buffer_jwrite(bh); |
| 618 | BUFFER_TRACE(bh, "write"); | 618 | BUFFER_TRACE(bh, "write"); |
| 619 | set_buffer_dirty(bh); | 619 | set_buffer_dirty(bh); |
| 620 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); | 620 | write_dirty_buffer(bh, write_op); |
| 621 | } | 621 | } |
| 622 | #endif | 622 | #endif |
| 623 | 623 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
| 255 | { | 255 | { |
| 256 | int i; | 256 | int i; |
| 257 | 257 | ||
| 258 | ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); | 258 | for (i = 0; i < *batch_count; i++) |
| 259 | write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); | ||
| 260 | |||
| 259 | for (i = 0; i < *batch_count; i++) { | 261 | for (i = 0; i < *batch_count; i++) { |
| 260 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 262 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
| 261 | clear_buffer_jwrite(bh); | 263 | clear_buffer_jwrite(bh); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 101 | struct commit_header *tmp; | 101 | struct commit_header *tmp; |
| 102 | struct buffer_head *bh; | 102 | struct buffer_head *bh; |
| 103 | int ret; | 103 | int ret; |
| 104 | int barrier_done = 0; | ||
| 105 | struct timespec now = current_kernel_time(); | 104 | struct timespec now = current_kernel_time(); |
| 106 | 105 | ||
| 107 | if (is_journal_aborted(journal)) | 106 | if (is_journal_aborted(journal)) |
| @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 136 | if (journal->j_flags & JBD2_BARRIER && | 135 | if (journal->j_flags & JBD2_BARRIER && |
| 137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| 139 | set_buffer_ordered(bh); | 138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); |
| 140 | barrier_done = 1; | 139 | if (ret == -EOPNOTSUPP) { |
| 141 | } | 140 | printk(KERN_WARNING |
| 142 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 141 | "JBD2: Disabling barriers on %s, " |
| 143 | if (barrier_done) | 142 | "not supported by device\n", journal->j_devname); |
| 144 | clear_buffer_ordered(bh); | 143 | write_lock(&journal->j_state_lock); |
| 145 | 144 | journal->j_flags &= ~JBD2_BARRIER; | |
| 146 | /* is it possible for another commit to fail at roughly | 145 | write_unlock(&journal->j_state_lock); |
| 147 | * the same time as this one? If so, we don't want to | ||
| 148 | * trust the barrier flag in the super, but instead want | ||
| 149 | * to remember if we sent a barrier request | ||
| 150 | */ | ||
| 151 | if (ret == -EOPNOTSUPP && barrier_done) { | ||
| 152 | printk(KERN_WARNING | ||
| 153 | "JBD2: Disabling barriers on %s, " | ||
| 154 | "not supported by device\n", journal->j_devname); | ||
| 155 | write_lock(&journal->j_state_lock); | ||
| 156 | journal->j_flags &= ~JBD2_BARRIER; | ||
| 157 | write_unlock(&journal->j_state_lock); | ||
| 158 | 146 | ||
| 159 | /* And try again, without the barrier */ | 147 | /* And try again, without the barrier */ |
| 160 | lock_buffer(bh); | 148 | lock_buffer(bh); |
| 161 | set_buffer_uptodate(bh); | 149 | set_buffer_uptodate(bh); |
| 162 | clear_buffer_dirty(bh); | 150 | clear_buffer_dirty(bh); |
| 151 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
| 152 | } | ||
| 153 | } else { | ||
| 163 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 154 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
| 164 | } | 155 | } |
| 165 | *cbh = bh; | 156 | *cbh = bh; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
| 1124 | set_buffer_uptodate(bh); | 1124 | set_buffer_uptodate(bh); |
| 1125 | } | 1125 | } |
| 1126 | } else | 1126 | } else |
| 1127 | ll_rw_block(SWRITE, 1, &bh); | 1127 | write_dirty_buffer(bh, WRITE); |
| 1128 | 1128 | ||
| 1129 | out: | 1129 | out: |
| 1130 | /* If we have just flushed the log (by marking s_start==0), then | 1130 | /* If we have just flushed the log (by marking s_start==0), then |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
| @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, | |||
| 625 | set_buffer_jwrite(bh); | 625 | set_buffer_jwrite(bh); |
| 626 | BUFFER_TRACE(bh, "write"); | 626 | BUFFER_TRACE(bh, "write"); |
| 627 | set_buffer_dirty(bh); | 627 | set_buffer_dirty(bh); |
| 628 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); | 628 | write_dirty_buffer(bh, write_op); |
| 629 | } | 629 | } |
| 630 | #endif | 630 | #endif |
| 631 | 631 | ||
diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cdfd15b..93444747237b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -80,6 +80,7 @@ struct mb_cache { | |||
| 80 | struct list_head c_cache_list; | 80 | struct list_head c_cache_list; |
| 81 | const char *c_name; | 81 | const char *c_name; |
| 82 | atomic_t c_entry_count; | 82 | atomic_t c_entry_count; |
| 83 | int c_max_entries; | ||
| 83 | int c_bucket_bits; | 84 | int c_bucket_bits; |
| 84 | struct kmem_cache *c_entry_cache; | 85 | struct kmem_cache *c_entry_cache; |
| 85 | struct list_head *c_block_hash; | 86 | struct list_head *c_block_hash; |
| @@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) | |||
| 243 | if (!cache->c_entry_cache) | 244 | if (!cache->c_entry_cache) |
| 244 | goto fail2; | 245 | goto fail2; |
| 245 | 246 | ||
| 247 | /* | ||
| 248 | * Set an upper limit on the number of cache entries so that the hash | ||
| 249 | * chains won't grow too long. | ||
| 250 | */ | ||
| 251 | cache->c_max_entries = bucket_count << 4; | ||
| 252 | |||
| 246 | spin_lock(&mb_cache_spinlock); | 253 | spin_lock(&mb_cache_spinlock); |
| 247 | list_add(&cache->c_cache_list, &mb_cache_list); | 254 | list_add(&cache->c_cache_list, &mb_cache_list); |
| 248 | spin_unlock(&mb_cache_spinlock); | 255 | spin_unlock(&mb_cache_spinlock); |
| @@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 333 | kfree(cache); | 340 | kfree(cache); |
| 334 | } | 341 | } |
| 335 | 342 | ||
| 336 | |||
| 337 | /* | 343 | /* |
| 338 | * mb_cache_entry_alloc() | 344 | * mb_cache_entry_alloc() |
| 339 | * | 345 | * |
| @@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 345 | struct mb_cache_entry * | 351 | struct mb_cache_entry * |
| 346 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 352 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
| 347 | { | 353 | { |
| 348 | struct mb_cache_entry *ce; | 354 | struct mb_cache_entry *ce = NULL; |
| 349 | 355 | ||
| 350 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 356 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
| 351 | if (ce) { | 357 | spin_lock(&mb_cache_spinlock); |
| 358 | if (!list_empty(&mb_cache_lru_list)) { | ||
| 359 | ce = list_entry(mb_cache_lru_list.next, | ||
| 360 | struct mb_cache_entry, e_lru_list); | ||
| 361 | list_del_init(&ce->e_lru_list); | ||
| 362 | __mb_cache_entry_unhash(ce); | ||
| 363 | } | ||
| 364 | spin_unlock(&mb_cache_spinlock); | ||
| 365 | } | ||
| 366 | if (!ce) { | ||
| 367 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | ||
| 368 | if (!ce) | ||
| 369 | return NULL; | ||
| 352 | atomic_inc(&cache->c_entry_count); | 370 | atomic_inc(&cache->c_entry_count); |
| 353 | INIT_LIST_HEAD(&ce->e_lru_list); | 371 | INIT_LIST_HEAD(&ce->e_lru_list); |
| 354 | INIT_LIST_HEAD(&ce->e_block_list); | 372 | INIT_LIST_HEAD(&ce->e_block_list); |
| 355 | ce->e_cache = cache; | 373 | ce->e_cache = cache; |
| 356 | ce->e_used = 1 + MB_CACHE_WRITER; | ||
| 357 | ce->e_queued = 0; | 374 | ce->e_queued = 0; |
| 358 | } | 375 | } |
| 376 | ce->e_used = 1 + MB_CACHE_WRITER; | ||
| 359 | return ce; | 377 | return ce; |
| 360 | } | 378 | } |
| 361 | 379 | ||
diff --git a/fs/namei.c b/fs/namei.c index 17ea76bf2fbe..24896e833565 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -595,15 +595,16 @@ int follow_up(struct path *path) | |||
| 595 | { | 595 | { |
| 596 | struct vfsmount *parent; | 596 | struct vfsmount *parent; |
| 597 | struct dentry *mountpoint; | 597 | struct dentry *mountpoint; |
| 598 | spin_lock(&vfsmount_lock); | 598 | |
| 599 | br_read_lock(vfsmount_lock); | ||
| 599 | parent = path->mnt->mnt_parent; | 600 | parent = path->mnt->mnt_parent; |
| 600 | if (parent == path->mnt) { | 601 | if (parent == path->mnt) { |
| 601 | spin_unlock(&vfsmount_lock); | 602 | br_read_unlock(vfsmount_lock); |
| 602 | return 0; | 603 | return 0; |
| 603 | } | 604 | } |
| 604 | mntget(parent); | 605 | mntget(parent); |
| 605 | mountpoint = dget(path->mnt->mnt_mountpoint); | 606 | mountpoint = dget(path->mnt->mnt_mountpoint); |
| 606 | spin_unlock(&vfsmount_lock); | 607 | br_read_unlock(vfsmount_lock); |
| 607 | dput(path->dentry); | 608 | dput(path->dentry); |
| 608 | path->dentry = mountpoint; | 609 | path->dentry = mountpoint; |
| 609 | mntput(path->mnt); | 610 | mntput(path->mnt); |
| @@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) | |||
| 686 | } | 687 | } |
| 687 | 688 | ||
| 688 | /* | 689 | /* |
| 690 | * Allocate a dentry with name and parent, and perform a parent | ||
| 691 | * directory ->lookup on it. Returns the new dentry, or ERR_PTR | ||
| 692 | * on error. parent->d_inode->i_mutex must be held. d_lookup must | ||
| 693 | * have verified that no child exists while under i_mutex. | ||
| 694 | */ | ||
| 695 | static struct dentry *d_alloc_and_lookup(struct dentry *parent, | ||
| 696 | struct qstr *name, struct nameidata *nd) | ||
| 697 | { | ||
| 698 | struct inode *inode = parent->d_inode; | ||
| 699 | struct dentry *dentry; | ||
| 700 | struct dentry *old; | ||
| 701 | |||
| 702 | /* Don't create child dentry for a dead directory. */ | ||
| 703 | if (unlikely(IS_DEADDIR(inode))) | ||
| 704 | return ERR_PTR(-ENOENT); | ||
| 705 | |||
| 706 | dentry = d_alloc(parent, name); | ||
| 707 | if (unlikely(!dentry)) | ||
| 708 | return ERR_PTR(-ENOMEM); | ||
| 709 | |||
| 710 | old = inode->i_op->lookup(inode, dentry, nd); | ||
| 711 | if (unlikely(old)) { | ||
| 712 | dput(dentry); | ||
| 713 | dentry = old; | ||
| 714 | } | ||
| 715 | return dentry; | ||
| 716 | } | ||
| 717 | |||
| 718 | /* | ||
| 689 | * It's more convoluted than I'd like it to be, but... it's still fairly | 719 | * It's more convoluted than I'd like it to be, but... it's still fairly |
| 690 | * small and for now I'd prefer to have fast path as straight as possible. | 720 | * small and for now I'd prefer to have fast path as straight as possible. |
| 691 | * It _is_ time-critical. | 721 | * It _is_ time-critical. |
| @@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
| 706 | return err; | 736 | return err; |
| 707 | } | 737 | } |
| 708 | 738 | ||
| 739 | /* | ||
| 740 | * Rename seqlock is not required here because in the off chance | ||
| 741 | * of a false negative due to a concurrent rename, we're going to | ||
| 742 | * do the non-racy lookup, below. | ||
| 743 | */ | ||
| 709 | dentry = __d_lookup(nd->path.dentry, name); | 744 | dentry = __d_lookup(nd->path.dentry, name); |
| 710 | if (!dentry) | 745 | if (!dentry) |
| 711 | goto need_lookup; | 746 | goto need_lookup; |
| 747 | found: | ||
| 712 | if (dentry->d_op && dentry->d_op->d_revalidate) | 748 | if (dentry->d_op && dentry->d_op->d_revalidate) |
| 713 | goto need_revalidate; | 749 | goto need_revalidate; |
| 714 | done: | 750 | done: |
| @@ -724,56 +760,28 @@ need_lookup: | |||
| 724 | mutex_lock(&dir->i_mutex); | 760 | mutex_lock(&dir->i_mutex); |
| 725 | /* | 761 | /* |
| 726 | * First re-do the cached lookup just in case it was created | 762 | * First re-do the cached lookup just in case it was created |
| 727 | * while we waited for the directory semaphore.. | 763 | * while we waited for the directory semaphore, or the first |
| 764 | * lookup failed due to an unrelated rename. | ||
| 728 | * | 765 | * |
| 729 | * FIXME! This could use version numbering or similar to | 766 | * This could use version numbering or similar to avoid unnecessary |
| 730 | * avoid unnecessary cache lookups. | 767 | * cache lookups, but then we'd have to do the first lookup in the |
| 731 | * | 768 | * non-racy way. However in the common case here, everything should |
| 732 | * The "dcache_lock" is purely to protect the RCU list walker | 769 | * be hot in cache, so would it be a big win? |
| 733 | * from concurrent renames at this point (we mustn't get false | ||
| 734 | * negatives from the RCU list walk here, unlike the optimistic | ||
| 735 | * fast walk). | ||
| 736 | * | ||
| 737 | * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup | ||
| 738 | */ | 770 | */ |
| 739 | dentry = d_lookup(parent, name); | 771 | dentry = d_lookup(parent, name); |
| 740 | if (!dentry) { | 772 | if (likely(!dentry)) { |
| 741 | struct dentry *new; | 773 | dentry = d_alloc_and_lookup(parent, name, nd); |
| 742 | |||
| 743 | /* Don't create child dentry for a dead directory. */ | ||
| 744 | dentry = ERR_PTR(-ENOENT); | ||
| 745 | if (IS_DEADDIR(dir)) | ||
| 746 | goto out_unlock; | ||
| 747 | |||
| 748 | new = d_alloc(parent, name); | ||
| 749 | dentry = ERR_PTR(-ENOMEM); | ||
| 750 | if (new) { | ||
| 751 | dentry = dir->i_op->lookup(dir, new, nd); | ||
| 752 | if (dentry) | ||
| 753 | dput(new); | ||
| 754 | else | ||
| 755 | dentry = new; | ||
| 756 | } | ||
| 757 | out_unlock: | ||
| 758 | mutex_unlock(&dir->i_mutex); | 774 | mutex_unlock(&dir->i_mutex); |
| 759 | if (IS_ERR(dentry)) | 775 | if (IS_ERR(dentry)) |
| 760 | goto fail; | 776 | goto fail; |
| 761 | goto done; | 777 | goto done; |
| 762 | } | 778 | } |
| 763 | |||
| 764 | /* | 779 | /* |
| 765 | * Uhhuh! Nasty case: the cache was re-populated while | 780 | * Uhhuh! Nasty case: the cache was re-populated while |
| 766 | * we waited on the semaphore. Need to revalidate. | 781 | * we waited on the semaphore. Need to revalidate. |
| 767 | */ | 782 | */ |
| 768 | mutex_unlock(&dir->i_mutex); | 783 | mutex_unlock(&dir->i_mutex); |
| 769 | if (dentry->d_op && dentry->d_op->d_revalidate) { | 784 | goto found; |
| 770 | dentry = do_revalidate(dentry, nd); | ||
| 771 | if (!dentry) | ||
| 772 | dentry = ERR_PTR(-ENOENT); | ||
| 773 | } | ||
| 774 | if (IS_ERR(dentry)) | ||
| 775 | goto fail; | ||
| 776 | goto done; | ||
| 777 | 785 | ||
| 778 | need_revalidate: | 786 | need_revalidate: |
| 779 | dentry = do_revalidate(dentry, nd); | 787 | dentry = do_revalidate(dentry, nd); |
| @@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
| 1130 | goto out; | 1138 | goto out; |
| 1131 | } | 1139 | } |
| 1132 | 1140 | ||
| 1133 | dentry = __d_lookup(base, name); | 1141 | /* |
| 1134 | 1142 | * Don't bother with __d_lookup: callers are for creat as | |
| 1135 | /* lockess __d_lookup may fail due to concurrent d_move() | 1143 | * well as unlink, so a lot of the time it would cost |
| 1136 | * in some unrelated directory, so try with d_lookup | 1144 | * a double lookup. |
| 1137 | */ | 1145 | */ |
| 1138 | if (!dentry) | 1146 | dentry = d_lookup(base, name); |
| 1139 | dentry = d_lookup(base, name); | ||
| 1140 | 1147 | ||
| 1141 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) | 1148 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) |
| 1142 | dentry = do_revalidate(dentry, nd); | 1149 | dentry = do_revalidate(dentry, nd); |
| 1143 | 1150 | ||
| 1144 | if (!dentry) { | 1151 | if (!dentry) |
| 1145 | struct dentry *new; | 1152 | dentry = d_alloc_and_lookup(base, name, nd); |
| 1146 | |||
| 1147 | /* Don't create child dentry for a dead directory. */ | ||
| 1148 | dentry = ERR_PTR(-ENOENT); | ||
| 1149 | if (IS_DEADDIR(inode)) | ||
| 1150 | goto out; | ||
| 1151 | |||
| 1152 | new = d_alloc(base, name); | ||
| 1153 | dentry = ERR_PTR(-ENOMEM); | ||
| 1154 | if (!new) | ||
| 1155 | goto out; | ||
| 1156 | dentry = inode->i_op->lookup(inode, new, nd); | ||
| 1157 | if (!dentry) | ||
| 1158 | dentry = new; | ||
| 1159 | else | ||
| 1160 | dput(new); | ||
| 1161 | } | ||
| 1162 | out: | 1153 | out: |
| 1163 | return dentry; | 1154 | return dentry; |
| 1164 | } | 1155 | } |
diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb19c5b0..de402eb6eafb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include <linux/syscalls.h> | 11 | #include <linux/syscalls.h> |
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
| 14 | #include <linux/spinlock.h> | ||
| 15 | #include <linux/percpu.h> | ||
| 14 | #include <linux/smp_lock.h> | 16 | #include <linux/smp_lock.h> |
| 15 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 16 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
| @@ -38,12 +40,10 @@ | |||
| 38 | #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) | 40 | #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) |
| 39 | #define HASH_SIZE (1UL << HASH_SHIFT) | 41 | #define HASH_SIZE (1UL << HASH_SHIFT) |
| 40 | 42 | ||
| 41 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ | ||
| 42 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | ||
| 43 | |||
| 44 | static int event; | 43 | static int event; |
| 45 | static DEFINE_IDA(mnt_id_ida); | 44 | static DEFINE_IDA(mnt_id_ida); |
| 46 | static DEFINE_IDA(mnt_group_ida); | 45 | static DEFINE_IDA(mnt_group_ida); |
| 46 | static DEFINE_SPINLOCK(mnt_id_lock); | ||
| 47 | static int mnt_id_start = 0; | 47 | static int mnt_id_start = 0; |
| 48 | static int mnt_group_start = 1; | 48 | static int mnt_group_start = 1; |
| 49 | 49 | ||
| @@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; | |||
| 55 | struct kobject *fs_kobj; | 55 | struct kobject *fs_kobj; |
| 56 | EXPORT_SYMBOL_GPL(fs_kobj); | 56 | EXPORT_SYMBOL_GPL(fs_kobj); |
| 57 | 57 | ||
| 58 | /* | ||
| 59 | * vfsmount lock may be taken for read to prevent changes to the | ||
| 60 | * vfsmount hash, ie. during mountpoint lookups or walking back | ||
| 61 | * up the tree. | ||
| 62 | * | ||
| 63 | * It should be taken for write in all cases where the vfsmount | ||
| 64 | * tree or hash is modified or when a vfsmount structure is modified. | ||
| 65 | */ | ||
| 66 | DEFINE_BRLOCK(vfsmount_lock); | ||
| 67 | |||
| 58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 68 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
| 59 | { | 69 | { |
| 60 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); | 70 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); |
| @@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | |||
| 65 | 75 | ||
| 66 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) | 76 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) |
| 67 | 77 | ||
| 68 | /* allocation is serialized by namespace_sem */ | 78 | /* |
| 79 | * allocation is serialized by namespace_sem, but we need the spinlock to | ||
| 80 | * serialize with freeing. | ||
| 81 | */ | ||
| 69 | static int mnt_alloc_id(struct vfsmount *mnt) | 82 | static int mnt_alloc_id(struct vfsmount *mnt) |
| 70 | { | 83 | { |
| 71 | int res; | 84 | int res; |
| 72 | 85 | ||
| 73 | retry: | 86 | retry: |
| 74 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); | 87 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); |
| 75 | spin_lock(&vfsmount_lock); | 88 | spin_lock(&mnt_id_lock); |
| 76 | res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); | 89 | res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); |
| 77 | if (!res) | 90 | if (!res) |
| 78 | mnt_id_start = mnt->mnt_id + 1; | 91 | mnt_id_start = mnt->mnt_id + 1; |
| 79 | spin_unlock(&vfsmount_lock); | 92 | spin_unlock(&mnt_id_lock); |
| 80 | if (res == -EAGAIN) | 93 | if (res == -EAGAIN) |
| 81 | goto retry; | 94 | goto retry; |
| 82 | 95 | ||
| @@ -86,11 +99,11 @@ retry: | |||
| 86 | static void mnt_free_id(struct vfsmount *mnt) | 99 | static void mnt_free_id(struct vfsmount *mnt) |
| 87 | { | 100 | { |
| 88 | int id = mnt->mnt_id; | 101 | int id = mnt->mnt_id; |
| 89 | spin_lock(&vfsmount_lock); | 102 | spin_lock(&mnt_id_lock); |
| 90 | ida_remove(&mnt_id_ida, id); | 103 | ida_remove(&mnt_id_ida, id); |
| 91 | if (mnt_id_start > id) | 104 | if (mnt_id_start > id) |
| 92 | mnt_id_start = id; | 105 | mnt_id_start = id; |
| 93 | spin_unlock(&vfsmount_lock); | 106 | spin_unlock(&mnt_id_lock); |
| 94 | } | 107 | } |
| 95 | 108 | ||
| 96 | /* | 109 | /* |
| @@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
| 348 | { | 361 | { |
| 349 | int ret = 0; | 362 | int ret = 0; |
| 350 | 363 | ||
| 351 | spin_lock(&vfsmount_lock); | 364 | br_write_lock(vfsmount_lock); |
| 352 | mnt->mnt_flags |= MNT_WRITE_HOLD; | 365 | mnt->mnt_flags |= MNT_WRITE_HOLD; |
| 353 | /* | 366 | /* |
| 354 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store | 367 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
| @@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
| 382 | */ | 395 | */ |
| 383 | smp_wmb(); | 396 | smp_wmb(); |
| 384 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; | 397 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; |
| 385 | spin_unlock(&vfsmount_lock); | 398 | br_write_unlock(vfsmount_lock); |
| 386 | return ret; | 399 | return ret; |
| 387 | } | 400 | } |
| 388 | 401 | ||
| 389 | static void __mnt_unmake_readonly(struct vfsmount *mnt) | 402 | static void __mnt_unmake_readonly(struct vfsmount *mnt) |
| 390 | { | 403 | { |
| 391 | spin_lock(&vfsmount_lock); | 404 | br_write_lock(vfsmount_lock); |
| 392 | mnt->mnt_flags &= ~MNT_READONLY; | 405 | mnt->mnt_flags &= ~MNT_READONLY; |
| 393 | spin_unlock(&vfsmount_lock); | 406 | br_write_unlock(vfsmount_lock); |
| 394 | } | 407 | } |
| 395 | 408 | ||
| 396 | void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) | 409 | void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) |
| @@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) | |||
| 414 | /* | 427 | /* |
| 415 | * find the first or last mount at @dentry on vfsmount @mnt depending on | 428 | * find the first or last mount at @dentry on vfsmount @mnt depending on |
| 416 | * @dir. If @dir is set return the first mount else return the last mount. | 429 | * @dir. If @dir is set return the first mount else return the last mount. |
| 430 | * vfsmount_lock must be held for read or write. | ||
| 417 | */ | 431 | */ |
| 418 | struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | 432 | struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, |
| 419 | int dir) | 433 | int dir) |
| @@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | |||
| 443 | struct vfsmount *lookup_mnt(struct path *path) | 457 | struct vfsmount *lookup_mnt(struct path *path) |
| 444 | { | 458 | { |
| 445 | struct vfsmount *child_mnt; | 459 | struct vfsmount *child_mnt; |
| 446 | spin_lock(&vfsmount_lock); | 460 | |
| 461 | br_read_lock(vfsmount_lock); | ||
| 447 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) | 462 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) |
| 448 | mntget(child_mnt); | 463 | mntget(child_mnt); |
| 449 | spin_unlock(&vfsmount_lock); | 464 | br_read_unlock(vfsmount_lock); |
| 450 | return child_mnt; | 465 | return child_mnt; |
| 451 | } | 466 | } |
| 452 | 467 | ||
| @@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) | |||
| 455 | return mnt->mnt_ns == current->nsproxy->mnt_ns; | 470 | return mnt->mnt_ns == current->nsproxy->mnt_ns; |
| 456 | } | 471 | } |
| 457 | 472 | ||
| 473 | /* | ||
| 474 | * vfsmount lock must be held for write | ||
| 475 | */ | ||
| 458 | static void touch_mnt_namespace(struct mnt_namespace *ns) | 476 | static void touch_mnt_namespace(struct mnt_namespace *ns) |
| 459 | { | 477 | { |
| 460 | if (ns) { | 478 | if (ns) { |
| @@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) | |||
| 463 | } | 481 | } |
| 464 | } | 482 | } |
| 465 | 483 | ||
| 484 | /* | ||
| 485 | * vfsmount lock must be held for write | ||
| 486 | */ | ||
| 466 | static void __touch_mnt_namespace(struct mnt_namespace *ns) | 487 | static void __touch_mnt_namespace(struct mnt_namespace *ns) |
| 467 | { | 488 | { |
| 468 | if (ns && ns->event != event) { | 489 | if (ns && ns->event != event) { |
| @@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) | |||
| 471 | } | 492 | } |
| 472 | } | 493 | } |
| 473 | 494 | ||
| 495 | /* | ||
| 496 | * vfsmount lock must be held for write | ||
| 497 | */ | ||
| 474 | static void detach_mnt(struct vfsmount *mnt, struct path *old_path) | 498 | static void detach_mnt(struct vfsmount *mnt, struct path *old_path) |
| 475 | { | 499 | { |
| 476 | old_path->dentry = mnt->mnt_mountpoint; | 500 | old_path->dentry = mnt->mnt_mountpoint; |
| @@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) | |||
| 482 | old_path->dentry->d_mounted--; | 506 | old_path->dentry->d_mounted--; |
| 483 | } | 507 | } |
| 484 | 508 | ||
| 509 | /* | ||
| 510 | * vfsmount lock must be held for write | ||
| 511 | */ | ||
| 485 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | 512 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, |
| 486 | struct vfsmount *child_mnt) | 513 | struct vfsmount *child_mnt) |
| 487 | { | 514 | { |
| @@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | |||
| 490 | dentry->d_mounted++; | 517 | dentry->d_mounted++; |
| 491 | } | 518 | } |
| 492 | 519 | ||
| 520 | /* | ||
| 521 | * vfsmount lock must be held for write | ||
| 522 | */ | ||
| 493 | static void attach_mnt(struct vfsmount *mnt, struct path *path) | 523 | static void attach_mnt(struct vfsmount *mnt, struct path *path) |
| 494 | { | 524 | { |
| 495 | mnt_set_mountpoint(path->mnt, path->dentry, mnt); | 525 | mnt_set_mountpoint(path->mnt, path->dentry, mnt); |
| @@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) | |||
| 499 | } | 529 | } |
| 500 | 530 | ||
| 501 | /* | 531 | /* |
| 502 | * the caller must hold vfsmount_lock | 532 | * vfsmount lock must be held for write |
| 503 | */ | 533 | */ |
| 504 | static void commit_tree(struct vfsmount *mnt) | 534 | static void commit_tree(struct vfsmount *mnt) |
| 505 | { | 535 | { |
| @@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) | |||
| 623 | void mntput_no_expire(struct vfsmount *mnt) | 653 | void mntput_no_expire(struct vfsmount *mnt) |
| 624 | { | 654 | { |
| 625 | repeat: | 655 | repeat: |
| 626 | if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { | 656 | if (atomic_add_unless(&mnt->mnt_count, -1, 1)) |
| 627 | if (likely(!mnt->mnt_pinned)) { | 657 | return; |
| 628 | spin_unlock(&vfsmount_lock); | 658 | br_write_lock(vfsmount_lock); |
| 629 | __mntput(mnt); | 659 | if (!atomic_dec_and_test(&mnt->mnt_count)) { |
| 630 | return; | 660 | br_write_unlock(vfsmount_lock); |
| 631 | } | 661 | return; |
| 632 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | 662 | } |
| 633 | mnt->mnt_pinned = 0; | 663 | if (likely(!mnt->mnt_pinned)) { |
| 634 | spin_unlock(&vfsmount_lock); | 664 | br_write_unlock(vfsmount_lock); |
| 635 | acct_auto_close_mnt(mnt); | 665 | __mntput(mnt); |
| 636 | goto repeat; | 666 | return; |
| 637 | } | 667 | } |
| 668 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | ||
| 669 | mnt->mnt_pinned = 0; | ||
| 670 | br_write_unlock(vfsmount_lock); | ||
| 671 | acct_auto_close_mnt(mnt); | ||
| 672 | goto repeat; | ||
| 638 | } | 673 | } |
| 639 | |||
| 640 | EXPORT_SYMBOL(mntput_no_expire); | 674 | EXPORT_SYMBOL(mntput_no_expire); |
| 641 | 675 | ||
| 642 | void mnt_pin(struct vfsmount *mnt) | 676 | void mnt_pin(struct vfsmount *mnt) |
| 643 | { | 677 | { |
| 644 | spin_lock(&vfsmount_lock); | 678 | br_write_lock(vfsmount_lock); |
| 645 | mnt->mnt_pinned++; | 679 | mnt->mnt_pinned++; |
| 646 | spin_unlock(&vfsmount_lock); | 680 | br_write_unlock(vfsmount_lock); |
| 647 | } | 681 | } |
| 648 | 682 | ||
| 649 | EXPORT_SYMBOL(mnt_pin); | 683 | EXPORT_SYMBOL(mnt_pin); |
| 650 | 684 | ||
| 651 | void mnt_unpin(struct vfsmount *mnt) | 685 | void mnt_unpin(struct vfsmount *mnt) |
| 652 | { | 686 | { |
| 653 | spin_lock(&vfsmount_lock); | 687 | br_write_lock(vfsmount_lock); |
| 654 | if (mnt->mnt_pinned) { | 688 | if (mnt->mnt_pinned) { |
| 655 | atomic_inc(&mnt->mnt_count); | 689 | atomic_inc(&mnt->mnt_count); |
| 656 | mnt->mnt_pinned--; | 690 | mnt->mnt_pinned--; |
| 657 | } | 691 | } |
| 658 | spin_unlock(&vfsmount_lock); | 692 | br_write_unlock(vfsmount_lock); |
| 659 | } | 693 | } |
| 660 | 694 | ||
| 661 | EXPORT_SYMBOL(mnt_unpin); | 695 | EXPORT_SYMBOL(mnt_unpin); |
| @@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) | |||
| 746 | struct mnt_namespace *ns = p->ns; | 780 | struct mnt_namespace *ns = p->ns; |
| 747 | int res = 0; | 781 | int res = 0; |
| 748 | 782 | ||
| 749 | spin_lock(&vfsmount_lock); | 783 | br_read_lock(vfsmount_lock); |
| 750 | if (p->event != ns->event) { | 784 | if (p->event != ns->event) { |
| 751 | p->event = ns->event; | 785 | p->event = ns->event; |
| 752 | res = 1; | 786 | res = 1; |
| 753 | } | 787 | } |
| 754 | spin_unlock(&vfsmount_lock); | 788 | br_read_unlock(vfsmount_lock); |
| 755 | 789 | ||
| 756 | return res; | 790 | return res; |
| 757 | } | 791 | } |
| @@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) | |||
| 952 | int minimum_refs = 0; | 986 | int minimum_refs = 0; |
| 953 | struct vfsmount *p; | 987 | struct vfsmount *p; |
| 954 | 988 | ||
| 955 | spin_lock(&vfsmount_lock); | 989 | br_read_lock(vfsmount_lock); |
| 956 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 990 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
| 957 | actual_refs += atomic_read(&p->mnt_count); | 991 | actual_refs += atomic_read(&p->mnt_count); |
| 958 | minimum_refs += 2; | 992 | minimum_refs += 2; |
| 959 | } | 993 | } |
| 960 | spin_unlock(&vfsmount_lock); | 994 | br_read_unlock(vfsmount_lock); |
| 961 | 995 | ||
| 962 | if (actual_refs > minimum_refs) | 996 | if (actual_refs > minimum_refs) |
| 963 | return 0; | 997 | return 0; |
| @@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) | |||
| 984 | { | 1018 | { |
| 985 | int ret = 1; | 1019 | int ret = 1; |
| 986 | down_read(&namespace_sem); | 1020 | down_read(&namespace_sem); |
| 987 | spin_lock(&vfsmount_lock); | 1021 | br_read_lock(vfsmount_lock); |
| 988 | if (propagate_mount_busy(mnt, 2)) | 1022 | if (propagate_mount_busy(mnt, 2)) |
| 989 | ret = 0; | 1023 | ret = 0; |
| 990 | spin_unlock(&vfsmount_lock); | 1024 | br_read_unlock(vfsmount_lock); |
| 991 | up_read(&namespace_sem); | 1025 | up_read(&namespace_sem); |
| 992 | return ret; | 1026 | return ret; |
| 993 | } | 1027 | } |
| @@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) | |||
| 1003 | if (mnt->mnt_parent != mnt) { | 1037 | if (mnt->mnt_parent != mnt) { |
| 1004 | struct dentry *dentry; | 1038 | struct dentry *dentry; |
| 1005 | struct vfsmount *m; | 1039 | struct vfsmount *m; |
| 1006 | spin_lock(&vfsmount_lock); | 1040 | |
| 1041 | br_write_lock(vfsmount_lock); | ||
| 1007 | dentry = mnt->mnt_mountpoint; | 1042 | dentry = mnt->mnt_mountpoint; |
| 1008 | m = mnt->mnt_parent; | 1043 | m = mnt->mnt_parent; |
| 1009 | mnt->mnt_mountpoint = mnt->mnt_root; | 1044 | mnt->mnt_mountpoint = mnt->mnt_root; |
| 1010 | mnt->mnt_parent = mnt; | 1045 | mnt->mnt_parent = mnt; |
| 1011 | m->mnt_ghosts--; | 1046 | m->mnt_ghosts--; |
| 1012 | spin_unlock(&vfsmount_lock); | 1047 | br_write_unlock(vfsmount_lock); |
| 1013 | dput(dentry); | 1048 | dput(dentry); |
| 1014 | mntput(m); | 1049 | mntput(m); |
| 1015 | } | 1050 | } |
| @@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) | |||
| 1017 | } | 1052 | } |
| 1018 | } | 1053 | } |
| 1019 | 1054 | ||
| 1055 | /* | ||
| 1056 | * vfsmount lock must be held for write | ||
| 1057 | * namespace_sem must be held for write | ||
| 1058 | */ | ||
| 1020 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) | 1059 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) |
| 1021 | { | 1060 | { |
| 1022 | struct vfsmount *p; | 1061 | struct vfsmount *p; |
| @@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
| 1107 | } | 1146 | } |
| 1108 | 1147 | ||
| 1109 | down_write(&namespace_sem); | 1148 | down_write(&namespace_sem); |
| 1110 | spin_lock(&vfsmount_lock); | 1149 | br_write_lock(vfsmount_lock); |
| 1111 | event++; | 1150 | event++; |
| 1112 | 1151 | ||
| 1113 | if (!(flags & MNT_DETACH)) | 1152 | if (!(flags & MNT_DETACH)) |
| @@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
| 1119 | umount_tree(mnt, 1, &umount_list); | 1158 | umount_tree(mnt, 1, &umount_list); |
| 1120 | retval = 0; | 1159 | retval = 0; |
| 1121 | } | 1160 | } |
| 1122 | spin_unlock(&vfsmount_lock); | 1161 | br_write_unlock(vfsmount_lock); |
| 1123 | up_write(&namespace_sem); | 1162 | up_write(&namespace_sem); |
| 1124 | release_mounts(&umount_list); | 1163 | release_mounts(&umount_list); |
| 1125 | return retval; | 1164 | return retval; |
| @@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |||
| 1231 | q = clone_mnt(p, p->mnt_root, flag); | 1270 | q = clone_mnt(p, p->mnt_root, flag); |
| 1232 | if (!q) | 1271 | if (!q) |
| 1233 | goto Enomem; | 1272 | goto Enomem; |
| 1234 | spin_lock(&vfsmount_lock); | 1273 | br_write_lock(vfsmount_lock); |
| 1235 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1274 | list_add_tail(&q->mnt_list, &res->mnt_list); |
| 1236 | attach_mnt(q, &path); | 1275 | attach_mnt(q, &path); |
| 1237 | spin_unlock(&vfsmount_lock); | 1276 | br_write_unlock(vfsmount_lock); |
| 1238 | } | 1277 | } |
| 1239 | } | 1278 | } |
| 1240 | return res; | 1279 | return res; |
| 1241 | Enomem: | 1280 | Enomem: |
| 1242 | if (res) { | 1281 | if (res) { |
| 1243 | LIST_HEAD(umount_list); | 1282 | LIST_HEAD(umount_list); |
| 1244 | spin_lock(&vfsmount_lock); | 1283 | br_write_lock(vfsmount_lock); |
| 1245 | umount_tree(res, 0, &umount_list); | 1284 | umount_tree(res, 0, &umount_list); |
| 1246 | spin_unlock(&vfsmount_lock); | 1285 | br_write_unlock(vfsmount_lock); |
| 1247 | release_mounts(&umount_list); | 1286 | release_mounts(&umount_list); |
| 1248 | } | 1287 | } |
| 1249 | return NULL; | 1288 | return NULL; |
| @@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
| 1262 | { | 1301 | { |
| 1263 | LIST_HEAD(umount_list); | 1302 | LIST_HEAD(umount_list); |
| 1264 | down_write(&namespace_sem); | 1303 | down_write(&namespace_sem); |
| 1265 | spin_lock(&vfsmount_lock); | 1304 | br_write_lock(vfsmount_lock); |
| 1266 | umount_tree(mnt, 0, &umount_list); | 1305 | umount_tree(mnt, 0, &umount_list); |
| 1267 | spin_unlock(&vfsmount_lock); | 1306 | br_write_unlock(vfsmount_lock); |
| 1268 | up_write(&namespace_sem); | 1307 | up_write(&namespace_sem); |
| 1269 | release_mounts(&umount_list); | 1308 | release_mounts(&umount_list); |
| 1270 | } | 1309 | } |
| @@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
| 1392 | if (err) | 1431 | if (err) |
| 1393 | goto out_cleanup_ids; | 1432 | goto out_cleanup_ids; |
| 1394 | 1433 | ||
| 1395 | spin_lock(&vfsmount_lock); | 1434 | br_write_lock(vfsmount_lock); |
| 1396 | 1435 | ||
| 1397 | if (IS_MNT_SHARED(dest_mnt)) { | 1436 | if (IS_MNT_SHARED(dest_mnt)) { |
| 1398 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1437 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
| @@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
| 1411 | list_del_init(&child->mnt_hash); | 1450 | list_del_init(&child->mnt_hash); |
| 1412 | commit_tree(child); | 1451 | commit_tree(child); |
| 1413 | } | 1452 | } |
| 1414 | spin_unlock(&vfsmount_lock); | 1453 | br_write_unlock(vfsmount_lock); |
| 1454 | |||
| 1415 | return 0; | 1455 | return 0; |
| 1416 | 1456 | ||
| 1417 | out_cleanup_ids: | 1457 | out_cleanup_ids: |
| @@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) | |||
| 1466 | goto out_unlock; | 1506 | goto out_unlock; |
| 1467 | } | 1507 | } |
| 1468 | 1508 | ||
| 1469 | spin_lock(&vfsmount_lock); | 1509 | br_write_lock(vfsmount_lock); |
| 1470 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) | 1510 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
| 1471 | change_mnt_propagation(m, type); | 1511 | change_mnt_propagation(m, type); |
| 1472 | spin_unlock(&vfsmount_lock); | 1512 | br_write_unlock(vfsmount_lock); |
| 1473 | 1513 | ||
| 1474 | out_unlock: | 1514 | out_unlock: |
| 1475 | up_write(&namespace_sem); | 1515 | up_write(&namespace_sem); |
| @@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, | |||
| 1513 | err = graft_tree(mnt, path); | 1553 | err = graft_tree(mnt, path); |
| 1514 | if (err) { | 1554 | if (err) { |
| 1515 | LIST_HEAD(umount_list); | 1555 | LIST_HEAD(umount_list); |
| 1516 | spin_lock(&vfsmount_lock); | 1556 | |
| 1557 | br_write_lock(vfsmount_lock); | ||
| 1517 | umount_tree(mnt, 0, &umount_list); | 1558 | umount_tree(mnt, 0, &umount_list); |
| 1518 | spin_unlock(&vfsmount_lock); | 1559 | br_write_unlock(vfsmount_lock); |
| 1519 | release_mounts(&umount_list); | 1560 | release_mounts(&umount_list); |
| 1520 | } | 1561 | } |
| 1521 | 1562 | ||
| @@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, | |||
| 1568 | else | 1609 | else |
| 1569 | err = do_remount_sb(sb, flags, data, 0); | 1610 | err = do_remount_sb(sb, flags, data, 0); |
| 1570 | if (!err) { | 1611 | if (!err) { |
| 1571 | spin_lock(&vfsmount_lock); | 1612 | br_write_lock(vfsmount_lock); |
| 1572 | mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; | 1613 | mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; |
| 1573 | path->mnt->mnt_flags = mnt_flags; | 1614 | path->mnt->mnt_flags = mnt_flags; |
| 1574 | spin_unlock(&vfsmount_lock); | 1615 | br_write_unlock(vfsmount_lock); |
| 1575 | } | 1616 | } |
| 1576 | up_write(&sb->s_umount); | 1617 | up_write(&sb->s_umount); |
| 1577 | if (!err) { | 1618 | if (!err) { |
| 1578 | spin_lock(&vfsmount_lock); | 1619 | br_write_lock(vfsmount_lock); |
| 1579 | touch_mnt_namespace(path->mnt->mnt_ns); | 1620 | touch_mnt_namespace(path->mnt->mnt_ns); |
| 1580 | spin_unlock(&vfsmount_lock); | 1621 | br_write_unlock(vfsmount_lock); |
| 1581 | } | 1622 | } |
| 1582 | return err; | 1623 | return err; |
| 1583 | } | 1624 | } |
| @@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
| 1754 | return; | 1795 | return; |
| 1755 | 1796 | ||
| 1756 | down_write(&namespace_sem); | 1797 | down_write(&namespace_sem); |
| 1757 | spin_lock(&vfsmount_lock); | 1798 | br_write_lock(vfsmount_lock); |
| 1758 | 1799 | ||
| 1759 | /* extract from the expiration list every vfsmount that matches the | 1800 | /* extract from the expiration list every vfsmount that matches the |
| 1760 | * following criteria: | 1801 | * following criteria: |
| @@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
| 1773 | touch_mnt_namespace(mnt->mnt_ns); | 1814 | touch_mnt_namespace(mnt->mnt_ns); |
| 1774 | umount_tree(mnt, 1, &umounts); | 1815 | umount_tree(mnt, 1, &umounts); |
| 1775 | } | 1816 | } |
| 1776 | spin_unlock(&vfsmount_lock); | 1817 | br_write_unlock(vfsmount_lock); |
| 1777 | up_write(&namespace_sem); | 1818 | up_write(&namespace_sem); |
| 1778 | 1819 | ||
| 1779 | release_mounts(&umounts); | 1820 | release_mounts(&umounts); |
| @@ -1830,6 +1871,8 @@ resume: | |||
| 1830 | /* | 1871 | /* |
| 1831 | * process a list of expirable mountpoints with the intent of discarding any | 1872 | * process a list of expirable mountpoints with the intent of discarding any |
| 1832 | * submounts of a specific parent mountpoint | 1873 | * submounts of a specific parent mountpoint |
| 1874 | * | ||
| 1875 | * vfsmount_lock must be held for write | ||
| 1833 | */ | 1876 | */ |
| 1834 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) | 1877 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) |
| 1835 | { | 1878 | { |
| @@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
| 2048 | kfree(new_ns); | 2091 | kfree(new_ns); |
| 2049 | return ERR_PTR(-ENOMEM); | 2092 | return ERR_PTR(-ENOMEM); |
| 2050 | } | 2093 | } |
| 2051 | spin_lock(&vfsmount_lock); | 2094 | br_write_lock(vfsmount_lock); |
| 2052 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); | 2095 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); |
| 2053 | spin_unlock(&vfsmount_lock); | 2096 | br_write_unlock(vfsmount_lock); |
| 2054 | 2097 | ||
| 2055 | /* | 2098 | /* |
| 2056 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts | 2099 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts |
| @@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
| 2244 | goto out2; /* not attached */ | 2287 | goto out2; /* not attached */ |
| 2245 | /* make sure we can reach put_old from new_root */ | 2288 | /* make sure we can reach put_old from new_root */ |
| 2246 | tmp = old.mnt; | 2289 | tmp = old.mnt; |
| 2247 | spin_lock(&vfsmount_lock); | 2290 | br_write_lock(vfsmount_lock); |
| 2248 | if (tmp != new.mnt) { | 2291 | if (tmp != new.mnt) { |
| 2249 | for (;;) { | 2292 | for (;;) { |
| 2250 | if (tmp->mnt_parent == tmp) | 2293 | if (tmp->mnt_parent == tmp) |
| @@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
| 2264 | /* mount new_root on / */ | 2307 | /* mount new_root on / */ |
| 2265 | attach_mnt(new.mnt, &root_parent); | 2308 | attach_mnt(new.mnt, &root_parent); |
| 2266 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2309 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
| 2267 | spin_unlock(&vfsmount_lock); | 2310 | br_write_unlock(vfsmount_lock); |
| 2268 | chroot_fs_refs(&root, &new); | 2311 | chroot_fs_refs(&root, &new); |
| 2269 | error = 0; | 2312 | error = 0; |
| 2270 | path_put(&root_parent); | 2313 | path_put(&root_parent); |
| @@ -2279,7 +2322,7 @@ out1: | |||
| 2279 | out0: | 2322 | out0: |
| 2280 | return error; | 2323 | return error; |
| 2281 | out3: | 2324 | out3: |
| 2282 | spin_unlock(&vfsmount_lock); | 2325 | br_write_unlock(vfsmount_lock); |
| 2283 | goto out2; | 2326 | goto out2; |
| 2284 | } | 2327 | } |
| 2285 | 2328 | ||
| @@ -2326,6 +2369,8 @@ void __init mnt_init(void) | |||
| 2326 | for (u = 0; u < HASH_SIZE; u++) | 2369 | for (u = 0; u < HASH_SIZE; u++) |
| 2327 | INIT_LIST_HEAD(&mount_hashtable[u]); | 2370 | INIT_LIST_HEAD(&mount_hashtable[u]); |
| 2328 | 2371 | ||
| 2372 | br_lock_init(vfsmount_lock); | ||
| 2373 | |||
| 2329 | err = sysfs_init(); | 2374 | err = sysfs_init(); |
| 2330 | if (err) | 2375 | if (err) |
| 2331 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 2376 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
| @@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
| 2344 | if (!atomic_dec_and_test(&ns->count)) | 2389 | if (!atomic_dec_and_test(&ns->count)) |
| 2345 | return; | 2390 | return; |
| 2346 | down_write(&namespace_sem); | 2391 | down_write(&namespace_sem); |
| 2347 | spin_lock(&vfsmount_lock); | 2392 | br_write_lock(vfsmount_lock); |
| 2348 | umount_tree(ns->root, 0, &umount_list); | 2393 | umount_tree(ns->root, 0, &umount_list); |
| 2349 | spin_unlock(&vfsmount_lock); | 2394 | br_write_unlock(vfsmount_lock); |
| 2350 | up_write(&namespace_sem); | 2395 | up_write(&namespace_sem); |
| 2351 | release_mounts(&umount_list); | 2396 | release_mounts(&umount_list); |
| 2352 | kfree(ns); | 2397 | kfree(ns); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index bee60c04109a..922263393c76 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
| @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) | |||
| 175 | { | 175 | { |
| 176 | struct the_nilfs *nilfs = sbi->s_nilfs; | 176 | struct the_nilfs *nilfs = sbi->s_nilfs; |
| 177 | int err; | 177 | int err; |
| 178 | int barrier_done = 0; | ||
| 179 | 178 | ||
| 180 | if (nilfs_test_opt(sbi, BARRIER)) { | ||
| 181 | set_buffer_ordered(nilfs->ns_sbh[0]); | ||
| 182 | barrier_done = 1; | ||
| 183 | } | ||
| 184 | retry: | 179 | retry: |
| 185 | set_buffer_dirty(nilfs->ns_sbh[0]); | 180 | set_buffer_dirty(nilfs->ns_sbh[0]); |
| 186 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | 181 | |
| 187 | if (err == -EOPNOTSUPP && barrier_done) { | 182 | if (nilfs_test_opt(sbi, BARRIER)) { |
| 188 | nilfs_warning(sbi->s_super, __func__, | 183 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], |
| 189 | "barrier-based sync failed. " | 184 | WRITE_SYNC | WRITE_BARRIER); |
| 190 | "disabling barriers\n"); | 185 | if (err == -EOPNOTSUPP) { |
| 191 | nilfs_clear_opt(sbi, BARRIER); | 186 | nilfs_warning(sbi->s_super, __func__, |
| 192 | barrier_done = 0; | 187 | "barrier-based sync failed. " |
| 193 | clear_buffer_ordered(nilfs->ns_sbh[0]); | 188 | "disabling barriers\n"); |
| 194 | goto retry; | 189 | nilfs_clear_opt(sbi, BARRIER); |
| 190 | goto retry; | ||
| 191 | } | ||
| 192 | } else { | ||
| 193 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | ||
| 195 | } | 194 | } |
| 195 | |||
| 196 | if (unlikely(err)) { | 196 | if (unlikely(err)) { |
| 197 | printk(KERN_ERR | 197 | printk(KERN_ERR |
| 198 | "NILFS: unable to write superblock (err=%d)\n", err); | 198 | "NILFS: unable to write superblock (err=%d)\n", err); |
| @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
| 675 | f->f_path.mnt = mnt; | 675 | f->f_path.mnt = mnt; |
| 676 | f->f_pos = 0; | 676 | f->f_pos = 0; |
| 677 | f->f_op = fops_get(inode->i_fop); | 677 | f->f_op = fops_get(inode->i_fop); |
| 678 | file_move(f, &inode->i_sb->s_files); | 678 | file_sb_list_add(f, inode->i_sb); |
| 679 | 679 | ||
| 680 | error = security_dentry_open(f, cred); | 680 | error = security_dentry_open(f, cred); |
| 681 | if (error) | 681 | if (error) |
| @@ -721,7 +721,7 @@ cleanup_all: | |||
| 721 | mnt_drop_write(mnt); | 721 | mnt_drop_write(mnt); |
| 722 | } | 722 | } |
| 723 | } | 723 | } |
| 724 | file_kill(f); | 724 | file_sb_list_del(f); |
| 725 | f->f_path.dentry = NULL; | 725 | f->f_path.dentry = NULL; |
| 726 | f->f_path.mnt = NULL; | 726 | f->f_path.mnt = NULL; |
| 727 | cleanup_file: | 727 | cleanup_file: |
diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a83149..8066b8dd748f 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
| @@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) | |||
| 126 | return 0; | 126 | return 0; |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | /* | ||
| 130 | * vfsmount lock must be held for write | ||
| 131 | */ | ||
| 129 | void change_mnt_propagation(struct vfsmount *mnt, int type) | 132 | void change_mnt_propagation(struct vfsmount *mnt, int type) |
| 130 | { | 133 | { |
| 131 | if (type == MS_SHARED) { | 134 | if (type == MS_SHARED) { |
| @@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, | |||
| 270 | prev_src_mnt = child; | 273 | prev_src_mnt = child; |
| 271 | } | 274 | } |
| 272 | out: | 275 | out: |
| 273 | spin_lock(&vfsmount_lock); | 276 | br_write_lock(vfsmount_lock); |
| 274 | while (!list_empty(&tmp_list)) { | 277 | while (!list_empty(&tmp_list)) { |
| 275 | child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); | 278 | child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); |
| 276 | umount_tree(child, 0, &umount_list); | 279 | umount_tree(child, 0, &umount_list); |
| 277 | } | 280 | } |
| 278 | spin_unlock(&vfsmount_lock); | 281 | br_write_unlock(vfsmount_lock); |
| 279 | release_mounts(&umount_list); | 282 | release_mounts(&umount_list); |
| 280 | return ret; | 283 | return ret; |
| 281 | } | 284 | } |
| @@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) | |||
| 296 | * other mounts its parent propagates to. | 299 | * other mounts its parent propagates to. |
| 297 | * Check if any of these mounts that **do not have submounts** | 300 | * Check if any of these mounts that **do not have submounts** |
| 298 | * have more references than 'refcnt'. If so return busy. | 301 | * have more references than 'refcnt'. If so return busy. |
| 302 | * | ||
| 303 | * vfsmount lock must be held for read or write | ||
| 299 | */ | 304 | */ |
| 300 | int propagate_mount_busy(struct vfsmount *mnt, int refcnt) | 305 | int propagate_mount_busy(struct vfsmount *mnt, int refcnt) |
| 301 | { | 306 | { |
| @@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) | |||
| 353 | * collect all mounts that receive propagation from the mount in @list, | 358 | * collect all mounts that receive propagation from the mount in @list, |
| 354 | * and return these additional mounts in the same list. | 359 | * and return these additional mounts in the same list. |
| 355 | * @list: the list of mounts to be unmounted. | 360 | * @list: the list of mounts to be unmounted. |
| 361 | * | ||
| 362 | * vfsmount lock must be held for write | ||
| 356 | */ | 363 | */ |
| 357 | int propagate_umount(struct list_head *list) | 364 | int propagate_umount(struct list_head *list) |
| 358 | { | 365 | { |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413dcbe1..caa758377d66 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) | |||
| 83 | dquot_drop(inode); | 83 | dquot_drop(inode); |
| 84 | inode->i_blocks = 0; | 84 | inode->i_blocks = 0; |
| 85 | reiserfs_write_unlock_once(inode->i_sb, depth); | 85 | reiserfs_write_unlock_once(inode->i_sb, depth); |
| 86 | return; | ||
| 86 | 87 | ||
| 87 | no_delete: | 88 | no_delete: |
| 88 | end_writeback(inode); | 89 | end_writeback(inode); |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
| @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, | |||
| 2311 | /* flush out the real blocks */ | 2311 | /* flush out the real blocks */ |
| 2312 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2312 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
| 2313 | set_buffer_dirty(real_blocks[i]); | 2313 | set_buffer_dirty(real_blocks[i]); |
| 2314 | ll_rw_block(SWRITE, 1, real_blocks + i); | 2314 | write_dirty_buffer(real_blocks[i], WRITE); |
| 2315 | } | 2315 | } |
| 2316 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2316 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
| 2317 | wait_on_buffer(real_blocks[i]); | 2317 | wait_on_buffer(real_blocks[i]); |
diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
| 54 | s = NULL; | 54 | s = NULL; |
| 55 | goto out; | 55 | goto out; |
| 56 | } | 56 | } |
| 57 | #ifdef CONFIG_SMP | ||
| 58 | s->s_files = alloc_percpu(struct list_head); | ||
| 59 | if (!s->s_files) { | ||
| 60 | security_sb_free(s); | ||
| 61 | kfree(s); | ||
| 62 | s = NULL; | ||
| 63 | goto out; | ||
| 64 | } else { | ||
| 65 | int i; | ||
| 66 | |||
| 67 | for_each_possible_cpu(i) | ||
| 68 | INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); | ||
| 69 | } | ||
| 70 | #else | ||
| 57 | INIT_LIST_HEAD(&s->s_files); | 71 | INIT_LIST_HEAD(&s->s_files); |
| 72 | #endif | ||
| 58 | INIT_LIST_HEAD(&s->s_instances); | 73 | INIT_LIST_HEAD(&s->s_instances); |
| 59 | INIT_HLIST_HEAD(&s->s_anon); | 74 | INIT_HLIST_HEAD(&s->s_anon); |
| 60 | INIT_LIST_HEAD(&s->s_inodes); | 75 | INIT_LIST_HEAD(&s->s_inodes); |
| @@ -108,6 +123,9 @@ out: | |||
| 108 | */ | 123 | */ |
| 109 | static inline void destroy_super(struct super_block *s) | 124 | static inline void destroy_super(struct super_block *s) |
| 110 | { | 125 | { |
| 126 | #ifdef CONFIG_SMP | ||
| 127 | free_percpu(s->s_files); | ||
| 128 | #endif | ||
| 111 | security_sb_free(s); | 129 | security_sb_free(s); |
| 112 | kfree(s->s_subtype); | 130 | kfree(s->s_subtype); |
| 113 | kfree(s->s_options); | 131 | kfree(s->s_options); |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
| @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
| 114 | 114 | ||
| 115 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 115 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 116 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 116 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 117 | if (sb->s_flags & MS_SYNCHRONOUS) { | 117 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 118 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 118 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 119 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 120 | } | ||
| 121 | sb->s_dirt = 1; | 119 | sb->s_dirt = 1; |
| 122 | 120 | ||
| 123 | unlock_super (sb); | 121 | unlock_super (sb); |
| @@ -207,10 +205,8 @@ do_more: | |||
| 207 | 205 | ||
| 208 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 206 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 209 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 207 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 210 | if (sb->s_flags & MS_SYNCHRONOUS) { | 208 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 211 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 209 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 212 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 213 | } | ||
| 214 | 210 | ||
| 215 | if (overflow) { | 211 | if (overflow) { |
| 216 | fragment += count; | 212 | fragment += count; |
| @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
| 558 | 554 | ||
| 559 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 555 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 560 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 556 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 561 | if (sb->s_flags & MS_SYNCHRONOUS) { | 557 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 562 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 558 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 563 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 564 | } | ||
| 565 | sb->s_dirt = 1; | 559 | sb->s_dirt = 1; |
| 566 | 560 | ||
| 567 | UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); | 561 | UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); |
| @@ -680,10 +674,8 @@ cg_found: | |||
| 680 | succed: | 674 | succed: |
| 681 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 675 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 682 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 676 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 683 | if (sb->s_flags & MS_SYNCHRONOUS) { | 677 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 684 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 678 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 685 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 686 | } | ||
| 687 | sb->s_dirt = 1; | 679 | sb->s_dirt = 1; |
| 688 | 680 | ||
| 689 | result += cgno * uspi->s_fpg; | 681 | result += cgno * uspi->s_fpg; |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
| @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) | |||
| 113 | 113 | ||
| 114 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 114 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 115 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 115 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 116 | if (sb->s_flags & MS_SYNCHRONOUS) { | 116 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 117 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 117 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 118 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 119 | } | ||
| 120 | 118 | ||
| 121 | sb->s_dirt = 1; | 119 | sb->s_dirt = 1; |
| 122 | unlock_super (sb); | 120 | unlock_super (sb); |
| @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, | |||
| 156 | 154 | ||
| 157 | fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); | 155 | fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); |
| 158 | ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); | 156 | ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); |
| 159 | if (sb->s_flags & MS_SYNCHRONOUS) { | 157 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 160 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 158 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 161 | ubh_wait_on_buffer(UCPI_UBH(ucpi)); | ||
| 162 | } | ||
| 163 | 159 | ||
| 164 | UFSD("EXIT\n"); | 160 | UFSD("EXIT\n"); |
| 165 | } | 161 | } |
| @@ -290,10 +286,8 @@ cg_found: | |||
| 290 | } | 286 | } |
| 291 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 287 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
| 292 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 288 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
| 293 | if (sb->s_flags & MS_SYNCHRONOUS) { | 289 | if (sb->s_flags & MS_SYNCHRONOUS) |
| 294 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 290 | ubh_sync_block(UCPI_UBH(ucpi)); |
| 295 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
| 296 | } | ||
| 297 | sb->s_dirt = 1; | 291 | sb->s_dirt = 1; |
| 298 | 292 | ||
| 299 | inode->i_ino = cg * uspi->s_ipg + bit; | 293 | inode->i_ino = cg * uspi->s_ipg + bit; |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
| @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) | |||
| 243 | ubh_bforget(ind_ubh); | 243 | ubh_bforget(ind_ubh); |
| 244 | ind_ubh = NULL; | 244 | ind_ubh = NULL; |
| 245 | } | 245 | } |
| 246 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { | 246 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) |
| 247 | ubh_ll_rw_block(SWRITE, ind_ubh); | 247 | ubh_sync_block(ind_ubh); |
| 248 | ubh_wait_on_buffer (ind_ubh); | ||
| 249 | } | ||
| 250 | ubh_brelse (ind_ubh); | 248 | ubh_brelse (ind_ubh); |
| 251 | 249 | ||
| 252 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 250 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
| @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) | |||
| 307 | ubh_bforget(dind_bh); | 305 | ubh_bforget(dind_bh); |
| 308 | dind_bh = NULL; | 306 | dind_bh = NULL; |
| 309 | } | 307 | } |
| 310 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { | 308 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) |
| 311 | ubh_ll_rw_block(SWRITE, dind_bh); | 309 | ubh_sync_block(dind_bh); |
| 312 | ubh_wait_on_buffer (dind_bh); | ||
| 313 | } | ||
| 314 | ubh_brelse (dind_bh); | 310 | ubh_brelse (dind_bh); |
| 315 | 311 | ||
| 316 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 312 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
| @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) | |||
| 367 | ubh_bforget(tind_bh); | 363 | ubh_bforget(tind_bh); |
| 368 | tind_bh = NULL; | 364 | tind_bh = NULL; |
| 369 | } | 365 | } |
| 370 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { | 366 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) |
| 371 | ubh_ll_rw_block(SWRITE, tind_bh); | 367 | ubh_sync_block(tind_bh); |
| 372 | ubh_wait_on_buffer (tind_bh); | ||
| 373 | } | ||
| 374 | ubh_brelse (tind_bh); | 368 | ubh_brelse (tind_bh); |
| 375 | 369 | ||
| 376 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 370 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c | |||
| @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) | |||
| 113 | } | 113 | } |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) | 116 | void ubh_sync_block(struct ufs_buffer_head *ubh) |
| 117 | { | 117 | { |
| 118 | if (!ubh) | 118 | if (ubh) { |
| 119 | return; | 119 | unsigned i; |
| 120 | 120 | ||
| 121 | ll_rw_block(rw, ubh->count, ubh->bh); | 121 | for (i = 0; i < ubh->count; i++) |
| 122 | } | 122 | write_dirty_buffer(ubh->bh[i], WRITE); |
| 123 | 123 | ||
| 124 | void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) | 124 | for (i = 0; i < ubh->count; i++) |
| 125 | { | 125 | wait_on_buffer(ubh->bh[i]); |
| 126 | unsigned i; | 126 | } |
| 127 | if (!ubh) | ||
| 128 | return; | ||
| 129 | for ( i = 0; i < ubh->count; i++ ) | ||
| 130 | wait_on_buffer (ubh->bh[i]); | ||
| 131 | } | 127 | } |
| 132 | 128 | ||
| 133 | void ubh_bforget (struct ufs_buffer_head * ubh) | 129 | void ubh_bforget (struct ufs_buffer_head * ubh) |
diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h | |||
| @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); | |||
| 269 | extern void ubh_brelse_uspi (struct ufs_sb_private_info *); | 269 | extern void ubh_brelse_uspi (struct ufs_sb_private_info *); |
| 270 | extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); | 270 | extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); |
| 271 | extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); | 271 | extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); |
| 272 | extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); | 272 | extern void ubh_sync_block(struct ufs_buffer_head *); |
| 273 | extern void ubh_wait_on_buffer (struct ufs_buffer_head *); | ||
| 274 | extern void ubh_bforget (struct ufs_buffer_head *); | 273 | extern void ubh_bforget (struct ufs_buffer_head *); |
| 275 | extern int ubh_buffer_dirty (struct ufs_buffer_head *); | 274 | extern int ubh_buffer_dirty (struct ufs_buffer_head *); |
| 276 | #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) | 275 | #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) |
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h | |||
| @@ -32,7 +32,6 @@ enum bh_state_bits { | |||
| 32 | BH_Delay, /* Buffer is not yet allocated on disk */ | 32 | BH_Delay, /* Buffer is not yet allocated on disk */ |
| 33 | BH_Boundary, /* Block is followed by a discontiguity */ | 33 | BH_Boundary, /* Block is followed by a discontiguity */ |
| 34 | BH_Write_EIO, /* I/O error on write */ | 34 | BH_Write_EIO, /* I/O error on write */ |
| 35 | BH_Ordered, /* ordered write */ | ||
| 36 | BH_Eopnotsupp, /* operation not supported (barrier) */ | 35 | BH_Eopnotsupp, /* operation not supported (barrier) */ |
| 37 | BH_Unwritten, /* Buffer is allocated on disk but not written */ | 36 | BH_Unwritten, /* Buffer is allocated on disk but not written */ |
| 38 | BH_Quiet, /* Buffer Error Prinks to be quiet */ | 37 | BH_Quiet, /* Buffer Error Prinks to be quiet */ |
| @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) | |||
| 125 | BUFFER_FNS(Delay, delay) | 124 | BUFFER_FNS(Delay, delay) |
| 126 | BUFFER_FNS(Boundary, boundary) | 125 | BUFFER_FNS(Boundary, boundary) |
| 127 | BUFFER_FNS(Write_EIO, write_io_error) | 126 | BUFFER_FNS(Write_EIO, write_io_error) |
| 128 | BUFFER_FNS(Ordered, ordered) | ||
| 129 | BUFFER_FNS(Eopnotsupp, eopnotsupp) | 127 | BUFFER_FNS(Eopnotsupp, eopnotsupp) |
| 130 | BUFFER_FNS(Unwritten, unwritten) | 128 | BUFFER_FNS(Unwritten, unwritten) |
| 131 | 129 | ||
| @@ -183,6 +181,8 @@ void unlock_buffer(struct buffer_head *bh); | |||
| 183 | void __lock_buffer(struct buffer_head *bh); | 181 | void __lock_buffer(struct buffer_head *bh); |
| 184 | void ll_rw_block(int, int, struct buffer_head * bh[]); | 182 | void ll_rw_block(int, int, struct buffer_head * bh[]); |
| 185 | int sync_dirty_buffer(struct buffer_head *bh); | 183 | int sync_dirty_buffer(struct buffer_head *bh); |
| 184 | int __sync_dirty_buffer(struct buffer_head *bh, int rw); | ||
| 185 | void write_dirty_buffer(struct buffer_head *bh, int rw); | ||
| 186 | int submit_bh(int, struct buffer_head *); | 186 | int submit_bh(int, struct buffer_head *); |
| 187 | void write_boundary_block(struct block_device *bdev, | 187 | void write_boundary_block(struct block_device *bdev, |
| 188 | sector_t bblock, unsigned blocksize); | 188 | sector_t bblock, unsigned blocksize); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..76041b614758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -125,9 +125,6 @@ struct inodes_stat_t { | |||
| 125 | * block layer could (in theory) choose to ignore this | 125 | * block layer could (in theory) choose to ignore this |
| 126 | * request if it runs into resource problems. | 126 | * request if it runs into resource problems. |
| 127 | * WRITE A normal async write. Device will be plugged. | 127 | * WRITE A normal async write. Device will be plugged. |
| 128 | * SWRITE Like WRITE, but a special case for ll_rw_block() that | ||
| 129 | * tells it to lock the buffer first. Normally a buffer | ||
| 130 | * must be locked before doing IO. | ||
| 131 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down | 128 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down |
| 132 | * the hint that someone will be waiting on this IO | 129 | * the hint that someone will be waiting on this IO |
| 133 | * shortly. The device must still be unplugged explicitly, | 130 | * shortly. The device must still be unplugged explicitly, |
| @@ -138,9 +135,6 @@ struct inodes_stat_t { | |||
| 138 | * immediately after submission. The write equivalent | 135 | * immediately after submission. The write equivalent |
| 139 | * of READ_SYNC. | 136 | * of READ_SYNC. |
| 140 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. | 137 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. |
| 141 | * SWRITE_SYNC | ||
| 142 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. | ||
| 143 | * See SWRITE. | ||
| 144 | * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all | 138 | * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all |
| 145 | * previously submitted writes must be safely on storage | 139 | * previously submitted writes must be safely on storage |
| 146 | * before this one is started. Also guarantees that when | 140 | * before this one is started. Also guarantees that when |
| @@ -155,7 +149,6 @@ struct inodes_stat_t { | |||
| 155 | #define READ 0 | 149 | #define READ 0 |
| 156 | #define WRITE RW_MASK | 150 | #define WRITE RW_MASK |
| 157 | #define READA RWA_MASK | 151 | #define READA RWA_MASK |
| 158 | #define SWRITE (WRITE | READA) | ||
| 159 | 152 | ||
| 160 | #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) | 153 | #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) |
| 161 | #define READ_META (READ | REQ_META) | 154 | #define READ_META (READ | REQ_META) |
| @@ -165,8 +158,6 @@ struct inodes_stat_t { | |||
| 165 | #define WRITE_META (WRITE | REQ_META) | 158 | #define WRITE_META (WRITE | REQ_META) |
| 166 | #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 159 | #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
| 167 | REQ_HARDBARRIER) | 160 | REQ_HARDBARRIER) |
| 168 | #define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) | ||
| 169 | #define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) | ||
| 170 | 161 | ||
| 171 | /* | 162 | /* |
| 172 | * These aren't really reads or writes, they pass down information about | 163 | * These aren't really reads or writes, they pass down information about |
| @@ -929,6 +920,9 @@ struct file { | |||
| 929 | #define f_vfsmnt f_path.mnt | 920 | #define f_vfsmnt f_path.mnt |
| 930 | const struct file_operations *f_op; | 921 | const struct file_operations *f_op; |
| 931 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ | 922 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ |
| 923 | #ifdef CONFIG_SMP | ||
| 924 | int f_sb_list_cpu; | ||
| 925 | #endif | ||
| 932 | atomic_long_t f_count; | 926 | atomic_long_t f_count; |
| 933 | unsigned int f_flags; | 927 | unsigned int f_flags; |
| 934 | fmode_t f_mode; | 928 | fmode_t f_mode; |
| @@ -953,9 +947,6 @@ struct file { | |||
| 953 | unsigned long f_mnt_write_state; | 947 | unsigned long f_mnt_write_state; |
| 954 | #endif | 948 | #endif |
| 955 | }; | 949 | }; |
| 956 | extern spinlock_t files_lock; | ||
| 957 | #define file_list_lock() spin_lock(&files_lock); | ||
| 958 | #define file_list_unlock() spin_unlock(&files_lock); | ||
| 959 | 950 | ||
| 960 | #define get_file(x) atomic_long_inc(&(x)->f_count) | 951 | #define get_file(x) atomic_long_inc(&(x)->f_count) |
| 961 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) | 952 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) |
| @@ -1346,7 +1337,11 @@ struct super_block { | |||
| 1346 | 1337 | ||
| 1347 | struct list_head s_inodes; /* all inodes */ | 1338 | struct list_head s_inodes; /* all inodes */ |
| 1348 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1339 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
| 1340 | #ifdef CONFIG_SMP | ||
| 1341 | struct list_head __percpu *s_files; | ||
| 1342 | #else | ||
| 1349 | struct list_head s_files; | 1343 | struct list_head s_files; |
| 1344 | #endif | ||
| 1350 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1345 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
| 1351 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1346 | struct list_head s_dentry_lru; /* unused dentry lru */ |
| 1352 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1347 | int s_nr_dentry_unused; /* # of dentry on lru */ |
| @@ -2197,8 +2192,6 @@ static inline void insert_inode_hash(struct inode *inode) { | |||
| 2197 | __insert_inode_hash(inode, inode->i_ino); | 2192 | __insert_inode_hash(inode, inode->i_ino); |
| 2198 | } | 2193 | } |
| 2199 | 2194 | ||
| 2200 | extern void file_move(struct file *f, struct list_head *list); | ||
| 2201 | extern void file_kill(struct file *f); | ||
| 2202 | #ifdef CONFIG_BLOCK | 2195 | #ifdef CONFIG_BLOCK |
| 2203 | extern void submit_bio(int, struct bio *); | 2196 | extern void submit_bio(int, struct bio *); |
| 2204 | extern int bdev_read_only(struct block_device *); | 2197 | extern int bdev_read_only(struct block_device *); |
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d5202138..a42b5bf02f8b 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | struct fs_struct { | 6 | struct fs_struct { |
| 7 | int users; | 7 | int users; |
| 8 | rwlock_t lock; | 8 | spinlock_t lock; |
| 9 | int umask; | 9 | int umask; |
| 10 | int in_exec; | 10 | int in_exec; |
| 11 | struct path root, pwd; | 11 | struct path root, pwd; |
| @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); | |||
| 23 | 23 | ||
| 24 | static inline void get_fs_root(struct fs_struct *fs, struct path *root) | 24 | static inline void get_fs_root(struct fs_struct *fs, struct path *root) |
| 25 | { | 25 | { |
| 26 | read_lock(&fs->lock); | 26 | spin_lock(&fs->lock); |
| 27 | *root = fs->root; | 27 | *root = fs->root; |
| 28 | path_get(root); | 28 | path_get(root); |
| 29 | read_unlock(&fs->lock); | 29 | spin_unlock(&fs->lock); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) | 32 | static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) |
| 33 | { | 33 | { |
| 34 | read_lock(&fs->lock); | 34 | spin_lock(&fs->lock); |
| 35 | *pwd = fs->pwd; | 35 | *pwd = fs->pwd; |
| 36 | path_get(pwd); | 36 | path_get(pwd); |
| 37 | read_unlock(&fs->lock); | 37 | spin_unlock(&fs->lock); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, | 40 | static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, |
| 41 | struct path *pwd) | 41 | struct path *pwd) |
| 42 | { | 42 | { |
| 43 | read_lock(&fs->lock); | 43 | spin_lock(&fs->lock); |
| 44 | *root = fs->root; | 44 | *root = fs->root; |
| 45 | path_get(root); | 45 | path_get(root); |
| 46 | *pwd = fs->pwd; | 46 | *pwd = fs->pwd; |
| 47 | path_get(pwd); | 47 | path_get(pwd); |
| 48 | read_unlock(&fs->lock); | 48 | spin_unlock(&fs->lock); |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | #endif /* _LINUX_FS_STRUCT_H */ | 51 | #endif /* _LINUX_FS_STRUCT_H */ |
diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 000000000000..b288cb713b90 --- /dev/null +++ b/include/linux/lglock.h | |||
| @@ -0,0 +1,172 @@ | |||
| 1 | /* | ||
| 2 | * Specialised local-global spinlock. Can only be declared as global variables | ||
| 3 | * to avoid overhead and keep things simple (and we don't want to start using | ||
| 4 | * these inside dynamically allocated structures). | ||
| 5 | * | ||
| 6 | * "local/global locks" (lglocks) can be used to: | ||
| 7 | * | ||
| 8 | * - Provide fast exclusive access to per-CPU data, with exclusive access to | ||
| 9 | * another CPU's data allowed but possibly subject to contention, and to | ||
| 10 | * provide very slow exclusive access to all per-CPU data. | ||
| 11 | * - Or to provide very fast and scalable read serialisation, and to provide | ||
| 12 | * very slow exclusive serialisation of data (not necessarily per-CPU data). | ||
| 13 | * | ||
| 14 | * Brlocks are also implemented as a short-hand notation for the latter use | ||
| 15 | * case. | ||
| 16 | * | ||
| 17 | * Copyright 2009, 2010, Nick Piggin, Novell Inc. | ||
| 18 | */ | ||
| 19 | #ifndef __LINUX_LGLOCK_H | ||
| 20 | #define __LINUX_LGLOCK_H | ||
| 21 | |||
| 22 | #include <linux/spinlock.h> | ||
| 23 | #include <linux/lockdep.h> | ||
| 24 | #include <linux/percpu.h> | ||
| 25 | |||
| 26 | /* can make br locks by using local lock for read side, global lock for write */ | ||
| 27 | #define br_lock_init(name) name##_lock_init() | ||
| 28 | #define br_read_lock(name) name##_local_lock() | ||
| 29 | #define br_read_unlock(name) name##_local_unlock() | ||
| 30 | #define br_write_lock(name) name##_global_lock_online() | ||
| 31 | #define br_write_unlock(name) name##_global_unlock_online() | ||
| 32 | |||
| 33 | #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) | ||
| 34 | #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) | ||
| 35 | |||
| 36 | |||
| 37 | #define lg_lock_init(name) name##_lock_init() | ||
| 38 | #define lg_local_lock(name) name##_local_lock() | ||
| 39 | #define lg_local_unlock(name) name##_local_unlock() | ||
| 40 | #define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) | ||
| 41 | #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) | ||
| 42 | #define lg_global_lock(name) name##_global_lock() | ||
| 43 | #define lg_global_unlock(name) name##_global_unlock() | ||
| 44 | #define lg_global_lock_online(name) name##_global_lock_online() | ||
| 45 | #define lg_global_unlock_online(name) name##_global_unlock_online() | ||
| 46 | |||
| 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 48 | #define LOCKDEP_INIT_MAP lockdep_init_map | ||
| 49 | |||
| 50 | #define DEFINE_LGLOCK_LOCKDEP(name) \ | ||
| 51 | struct lock_class_key name##_lock_key; \ | ||
| 52 | struct lockdep_map name##_lock_dep_map; \ | ||
| 53 | EXPORT_SYMBOL(name##_lock_dep_map) | ||
| 54 | |||
| 55 | #else | ||
| 56 | #define LOCKDEP_INIT_MAP(a, b, c, d) | ||
| 57 | |||
| 58 | #define DEFINE_LGLOCK_LOCKDEP(name) | ||
| 59 | #endif | ||
| 60 | |||
| 61 | |||
| 62 | #define DECLARE_LGLOCK(name) \ | ||
| 63 | extern void name##_lock_init(void); \ | ||
| 64 | extern void name##_local_lock(void); \ | ||
| 65 | extern void name##_local_unlock(void); \ | ||
| 66 | extern void name##_local_lock_cpu(int cpu); \ | ||
| 67 | extern void name##_local_unlock_cpu(int cpu); \ | ||
| 68 | extern void name##_global_lock(void); \ | ||
| 69 | extern void name##_global_unlock(void); \ | ||
| 70 | extern void name##_global_lock_online(void); \ | ||
| 71 | extern void name##_global_unlock_online(void); \ | ||
| 72 | |||
| 73 | #define DEFINE_LGLOCK(name) \ | ||
| 74 | \ | ||
| 75 | DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ | ||
| 76 | DEFINE_LGLOCK_LOCKDEP(name); \ | ||
| 77 | \ | ||
| 78 | void name##_lock_init(void) { \ | ||
| 79 | int i; \ | ||
| 80 | LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ | ||
| 81 | for_each_possible_cpu(i) { \ | ||
| 82 | arch_spinlock_t *lock; \ | ||
| 83 | lock = &per_cpu(name##_lock, i); \ | ||
| 84 | *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ | ||
| 85 | } \ | ||
| 86 | } \ | ||
| 87 | EXPORT_SYMBOL(name##_lock_init); \ | ||
| 88 | \ | ||
| 89 | void name##_local_lock(void) { \ | ||
| 90 | arch_spinlock_t *lock; \ | ||
| 91 | preempt_disable(); \ | ||
| 92 | rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ | ||
| 93 | lock = &__get_cpu_var(name##_lock); \ | ||
| 94 | arch_spin_lock(lock); \ | ||
| 95 | } \ | ||
| 96 | EXPORT_SYMBOL(name##_local_lock); \ | ||
| 97 | \ | ||
| 98 | void name##_local_unlock(void) { \ | ||
| 99 | arch_spinlock_t *lock; \ | ||
| 100 | rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ | ||
| 101 | lock = &__get_cpu_var(name##_lock); \ | ||
| 102 | arch_spin_unlock(lock); \ | ||
| 103 | preempt_enable(); \ | ||
| 104 | } \ | ||
| 105 | EXPORT_SYMBOL(name##_local_unlock); \ | ||
| 106 | \ | ||
| 107 | void name##_local_lock_cpu(int cpu) { \ | ||
| 108 | arch_spinlock_t *lock; \ | ||
| 109 | preempt_disable(); \ | ||
| 110 | rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ | ||
| 111 | lock = &per_cpu(name##_lock, cpu); \ | ||
| 112 | arch_spin_lock(lock); \ | ||
| 113 | } \ | ||
| 114 | EXPORT_SYMBOL(name##_local_lock_cpu); \ | ||
| 115 | \ | ||
| 116 | void name##_local_unlock_cpu(int cpu) { \ | ||
| 117 | arch_spinlock_t *lock; \ | ||
| 118 | rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ | ||
| 119 | lock = &per_cpu(name##_lock, cpu); \ | ||
| 120 | arch_spin_unlock(lock); \ | ||
| 121 | preempt_enable(); \ | ||
| 122 | } \ | ||
| 123 | EXPORT_SYMBOL(name##_local_unlock_cpu); \ | ||
| 124 | \ | ||
| 125 | void name##_global_lock_online(void) { \ | ||
| 126 | int i; \ | ||
| 127 | preempt_disable(); \ | ||
| 128 | rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ | ||
| 129 | for_each_online_cpu(i) { \ | ||
| 130 | arch_spinlock_t *lock; \ | ||
| 131 | lock = &per_cpu(name##_lock, i); \ | ||
| 132 | arch_spin_lock(lock); \ | ||
| 133 | } \ | ||
| 134 | } \ | ||
| 135 | EXPORT_SYMBOL(name##_global_lock_online); \ | ||
| 136 | \ | ||
| 137 | void name##_global_unlock_online(void) { \ | ||
| 138 | int i; \ | ||
| 139 | rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ | ||
| 140 | for_each_online_cpu(i) { \ | ||
| 141 | arch_spinlock_t *lock; \ | ||
| 142 | lock = &per_cpu(name##_lock, i); \ | ||
| 143 | arch_spin_unlock(lock); \ | ||
| 144 | } \ | ||
| 145 | preempt_enable(); \ | ||
| 146 | } \ | ||
| 147 | EXPORT_SYMBOL(name##_global_unlock_online); \ | ||
| 148 | \ | ||
| 149 | void name##_global_lock(void) { \ | ||
| 150 | int i; \ | ||
| 151 | preempt_disable(); \ | ||
| 152 | rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ | ||
| 153 | for_each_online_cpu(i) { \ | ||
| 154 | arch_spinlock_t *lock; \ | ||
| 155 | lock = &per_cpu(name##_lock, i); \ | ||
| 156 | arch_spin_lock(lock); \ | ||
| 157 | } \ | ||
| 158 | } \ | ||
| 159 | EXPORT_SYMBOL(name##_global_lock); \ | ||
| 160 | \ | ||
| 161 | void name##_global_unlock(void) { \ | ||
| 162 | int i; \ | ||
| 163 | rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ | ||
| 164 | for_each_online_cpu(i) { \ | ||
| 165 | arch_spinlock_t *lock; \ | ||
| 166 | lock = &per_cpu(name##_lock, i); \ | ||
| 167 | arch_spin_unlock(lock); \ | ||
| 168 | } \ | ||
| 169 | preempt_enable(); \ | ||
| 170 | } \ | ||
| 171 | EXPORT_SYMBOL(name##_global_unlock); | ||
| 172 | #endif | ||
diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3ddc62..67d64e6efe7a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h | |||
| @@ -329,6 +329,13 @@ struct tty_struct { | |||
| 329 | struct tty_port *port; | 329 | struct tty_port *port; |
| 330 | }; | 330 | }; |
| 331 | 331 | ||
| 332 | /* Each of a tty's open files has private_data pointing to tty_file_private */ | ||
| 333 | struct tty_file_private { | ||
| 334 | struct tty_struct *tty; | ||
| 335 | struct file *file; | ||
| 336 | struct list_head list; | ||
| 337 | }; | ||
| 338 | |||
| 332 | /* tty magic number */ | 339 | /* tty magic number */ |
| 333 | #define TTY_MAGIC 0x5401 | 340 | #define TTY_MAGIC 0x5401 |
| 334 | 341 | ||
| @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); | |||
| 458 | extern struct tty_struct *get_current_tty(void); | 465 | extern struct tty_struct *get_current_tty(void); |
| 459 | extern void tty_default_fops(struct file_operations *fops); | 466 | extern void tty_default_fops(struct file_operations *fops); |
| 460 | extern struct tty_struct *alloc_tty_struct(void); | 467 | extern struct tty_struct *alloc_tty_struct(void); |
| 468 | extern void tty_add_file(struct tty_struct *tty, struct file *file); | ||
| 461 | extern void free_tty_struct(struct tty_struct *tty); | 469 | extern void free_tty_struct(struct tty_struct *tty); |
| 462 | extern void initialize_tty_struct(struct tty_struct *tty, | 470 | extern void initialize_tty_struct(struct tty_struct *tty, |
| 463 | struct tty_driver *driver, int idx); | 471 | struct tty_driver *driver, int idx); |
| @@ -470,6 +478,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); | |||
| 470 | extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); | 478 | extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); |
| 471 | 479 | ||
| 472 | extern struct mutex tty_mutex; | 480 | extern struct mutex tty_mutex; |
| 481 | extern spinlock_t tty_files_lock; | ||
| 473 | 482 | ||
| 474 | extern void tty_write_unlock(struct tty_struct *tty); | 483 | extern void tty_write_unlock(struct tty_struct *tty); |
| 475 | extern int tty_write_lock(struct tty_struct *tty, int ndelay); | 484 | extern int tty_write_lock(struct tty_struct *tty, int ndelay); |
diff --git a/kernel/fork.c b/kernel/fork.c index 98b450876f93..856eac3ec52e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | |||
| 752 | struct fs_struct *fs = current->fs; | 752 | struct fs_struct *fs = current->fs; |
| 753 | if (clone_flags & CLONE_FS) { | 753 | if (clone_flags & CLONE_FS) { |
| 754 | /* tsk->fs is already what we want */ | 754 | /* tsk->fs is already what we want */ |
| 755 | write_lock(&fs->lock); | 755 | spin_lock(&fs->lock); |
| 756 | if (fs->in_exec) { | 756 | if (fs->in_exec) { |
| 757 | write_unlock(&fs->lock); | 757 | spin_unlock(&fs->lock); |
| 758 | return -EAGAIN; | 758 | return -EAGAIN; |
| 759 | } | 759 | } |
| 760 | fs->users++; | 760 | fs->users++; |
| 761 | write_unlock(&fs->lock); | 761 | spin_unlock(&fs->lock); |
| 762 | return 0; | 762 | return 0; |
| 763 | } | 763 | } |
| 764 | tsk->fs = copy_fs_struct(fs); | 764 | tsk->fs = copy_fs_struct(fs); |
| @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
| 1676 | 1676 | ||
| 1677 | if (new_fs) { | 1677 | if (new_fs) { |
| 1678 | fs = current->fs; | 1678 | fs = current->fs; |
| 1679 | write_lock(&fs->lock); | 1679 | spin_lock(&fs->lock); |
| 1680 | current->fs = new_fs; | 1680 | current->fs = new_fs; |
| 1681 | if (--fs->users) | 1681 | if (--fs->users) |
| 1682 | new_fs = NULL; | 1682 | new_fs = NULL; |
| 1683 | else | 1683 | else |
| 1684 | new_fs = fs; | 1684 | new_fs = fs; |
| 1685 | write_unlock(&fs->lock); | 1685 | spin_unlock(&fs->lock); |
| 1686 | } | 1686 | } |
| 1687 | 1687 | ||
| 1688 | if (new_mm) { | 1688 | if (new_mm) { |
diff --git a/security/apparmor/path.c b/security/apparmor/path.c index 96bab9469d48..19358dc14605 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c | |||
| @@ -62,19 +62,14 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, | |||
| 62 | int deleted, connected; | 62 | int deleted, connected; |
| 63 | int error = 0; | 63 | int error = 0; |
| 64 | 64 | ||
| 65 | /* Get the root we want to resolve too */ | 65 | /* Get the root we want to resolve too, released below */ |
| 66 | if (flags & PATH_CHROOT_REL) { | 66 | if (flags & PATH_CHROOT_REL) { |
| 67 | /* resolve paths relative to chroot */ | 67 | /* resolve paths relative to chroot */ |
| 68 | read_lock(¤t->fs->lock); | 68 | get_fs_root(current->fs, &root); |
| 69 | root = current->fs->root; | ||
| 70 | /* released below */ | ||
| 71 | path_get(&root); | ||
| 72 | read_unlock(¤t->fs->lock); | ||
| 73 | } else { | 69 | } else { |
| 74 | /* resolve paths relative to namespace */ | 70 | /* resolve paths relative to namespace */ |
| 75 | root.mnt = current->nsproxy->mnt_ns->root; | 71 | root.mnt = current->nsproxy->mnt_ns->root; |
| 76 | root.dentry = root.mnt->mnt_root; | 72 | root.dentry = root.mnt->mnt_root; |
| 77 | /* released below */ | ||
| 78 | path_get(&root); | 73 | path_get(&root); |
| 79 | } | 74 | } |
| 80 | 75 | ||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f96e54f..4796ddd4e721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -2170,8 +2170,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, | |||
| 2170 | 2170 | ||
| 2171 | tty = get_current_tty(); | 2171 | tty = get_current_tty(); |
| 2172 | if (tty) { | 2172 | if (tty) { |
| 2173 | file_list_lock(); | 2173 | spin_lock(&tty_files_lock); |
| 2174 | if (!list_empty(&tty->tty_files)) { | 2174 | if (!list_empty(&tty->tty_files)) { |
| 2175 | struct tty_file_private *file_priv; | ||
| 2175 | struct inode *inode; | 2176 | struct inode *inode; |
| 2176 | 2177 | ||
| 2177 | /* Revalidate access to controlling tty. | 2178 | /* Revalidate access to controlling tty. |
| @@ -2179,14 +2180,16 @@ static inline void flush_unauthorized_files(const struct cred *cred, | |||
| 2179 | than using file_has_perm, as this particular open | 2180 | than using file_has_perm, as this particular open |
| 2180 | file may belong to another process and we are only | 2181 | file may belong to another process and we are only |
| 2181 | interested in the inode-based check here. */ | 2182 | interested in the inode-based check here. */ |
| 2182 | file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); | 2183 | file_priv = list_first_entry(&tty->tty_files, |
| 2184 | struct tty_file_private, list); | ||
| 2185 | file = file_priv->file; | ||
| 2183 | inode = file->f_path.dentry->d_inode; | 2186 | inode = file->f_path.dentry->d_inode; |
| 2184 | if (inode_has_perm(cred, inode, | 2187 | if (inode_has_perm(cred, inode, |
| 2185 | FILE__READ | FILE__WRITE, NULL)) { | 2188 | FILE__READ | FILE__WRITE, NULL)) { |
| 2186 | drop_tty = 1; | 2189 | drop_tty = 1; |
| 2187 | } | 2190 | } |
| 2188 | } | 2191 | } |
| 2189 | file_list_unlock(); | 2192 | spin_unlock(&tty_files_lock); |
| 2190 | tty_kref_put(tty); | 2193 | tty_kref_put(tty); |
| 2191 | } | 2194 | } |
| 2192 | /* Reset controlling tty. */ | 2195 | /* Reset controlling tty. */ |
