diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-14 05:55:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-04-21 19:19:29 -0400 |
commit | 989a2979205dd34269382b357e6d4b4b6956b889 (patch) | |
tree | 2f504e9f4d8d418dd8fb2d042b076c1318232360 | |
parent | e5700aff144fbbba46be40049f0c55fb57283777 (diff) |
fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.
fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.
Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.
We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | fs/fcntl.c | 66 | ||||
-rw-r--r-- | include/linux/fs.h | 12 | ||||
-rw-r--r-- | net/socket.c | 73 |
3 files changed, 59 insertions, 92 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c index 452d02f9075e..0a140741b39e 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown) | |||
614 | return ret; | 614 | return ret; |
615 | } | 615 | } |
616 | 616 | ||
617 | static DEFINE_RWLOCK(fasync_lock); | 617 | static DEFINE_SPINLOCK(fasync_lock); |
618 | static struct kmem_cache *fasync_cache __read_mostly; | 618 | static struct kmem_cache *fasync_cache __read_mostly; |
619 | 619 | ||
620 | static void fasync_free_rcu(struct rcu_head *head) | ||
621 | { | ||
622 | kmem_cache_free(fasync_cache, | ||
623 | container_of(head, struct fasync_struct, fa_rcu)); | ||
624 | } | ||
625 | |||
620 | /* | 626 | /* |
621 | * Remove a fasync entry. If successfully removed, return | 627 | * Remove a fasync entry. If successfully removed, return |
622 | * positive and clear the FASYNC flag. If no entry exists, | 628 | * positive and clear the FASYNC flag. If no entry exists, |
@@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly; | |||
625 | * NOTE! It is very important that the FASYNC flag always | 631 | * NOTE! It is very important that the FASYNC flag always |
626 | * match the state "is the filp on a fasync list". | 632 | * match the state "is the filp on a fasync list". |
627 | * | 633 | * |
628 | * We always take the 'filp->f_lock', in since fasync_lock | ||
629 | * needs to be irq-safe. | ||
630 | */ | 634 | */ |
631 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | 635 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) |
632 | { | 636 | { |
@@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | |||
634 | int result = 0; | 638 | int result = 0; |
635 | 639 | ||
636 | spin_lock(&filp->f_lock); | 640 | spin_lock(&filp->f_lock); |
637 | write_lock_irq(&fasync_lock); | 641 | spin_lock(&fasync_lock); |
638 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 642 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
639 | if (fa->fa_file != filp) | 643 | if (fa->fa_file != filp) |
640 | continue; | 644 | continue; |
645 | |||
646 | spin_lock_irq(&fa->fa_lock); | ||
647 | fa->fa_file = NULL; | ||
648 | spin_unlock_irq(&fa->fa_lock); | ||
649 | |||
641 | *fp = fa->fa_next; | 650 | *fp = fa->fa_next; |
642 | kmem_cache_free(fasync_cache, fa); | 651 | call_rcu(&fa->fa_rcu, fasync_free_rcu); |
643 | filp->f_flags &= ~FASYNC; | 652 | filp->f_flags &= ~FASYNC; |
644 | result = 1; | 653 | result = 1; |
645 | break; | 654 | break; |
646 | } | 655 | } |
647 | write_unlock_irq(&fasync_lock); | 656 | spin_unlock(&fasync_lock); |
648 | spin_unlock(&filp->f_lock); | 657 | spin_unlock(&filp->f_lock); |
649 | return result; | 658 | return result; |
650 | } | 659 | } |
@@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa | |||
666 | return -ENOMEM; | 675 | return -ENOMEM; |
667 | 676 | ||
668 | spin_lock(&filp->f_lock); | 677 | spin_lock(&filp->f_lock); |
669 | write_lock_irq(&fasync_lock); | 678 | spin_lock(&fasync_lock); |
670 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 679 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
671 | if (fa->fa_file != filp) | 680 | if (fa->fa_file != filp) |
672 | continue; | 681 | continue; |
682 | |||
683 | spin_lock_irq(&fa->fa_lock); | ||
673 | fa->fa_fd = fd; | 684 | fa->fa_fd = fd; |
685 | spin_unlock_irq(&fa->fa_lock); | ||
686 | |||
674 | kmem_cache_free(fasync_cache, new); | 687 | kmem_cache_free(fasync_cache, new); |
675 | goto out; | 688 | goto out; |
676 | } | 689 | } |
677 | 690 | ||
691 | spin_lock_init(&new->fa_lock); | ||
678 | new->magic = FASYNC_MAGIC; | 692 | new->magic = FASYNC_MAGIC; |
679 | new->fa_file = filp; | 693 | new->fa_file = filp; |
680 | new->fa_fd = fd; | 694 | new->fa_fd = fd; |
681 | new->fa_next = *fapp; | 695 | new->fa_next = *fapp; |
682 | *fapp = new; | 696 | rcu_assign_pointer(*fapp, new); |
683 | result = 1; | 697 | result = 1; |
684 | filp->f_flags |= FASYNC; | 698 | filp->f_flags |= FASYNC; |
685 | 699 | ||
686 | out: | 700 | out: |
687 | write_unlock_irq(&fasync_lock); | 701 | spin_unlock(&fasync_lock); |
688 | spin_unlock(&filp->f_lock); | 702 | spin_unlock(&filp->f_lock); |
689 | return result; | 703 | return result; |
690 | } | 704 | } |
@@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap | |||
704 | 718 | ||
705 | EXPORT_SYMBOL(fasync_helper); | 719 | EXPORT_SYMBOL(fasync_helper); |
706 | 720 | ||
707 | void __kill_fasync(struct fasync_struct *fa, int sig, int band) | 721 | /* |
722 | * rcu_read_lock() is held | ||
723 | */ | ||
724 | static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) | ||
708 | { | 725 | { |
709 | while (fa) { | 726 | while (fa) { |
710 | struct fown_struct * fown; | 727 | struct fown_struct *fown; |
711 | if (fa->magic != FASYNC_MAGIC) { | 728 | if (fa->magic != FASYNC_MAGIC) { |
712 | printk(KERN_ERR "kill_fasync: bad magic number in " | 729 | printk(KERN_ERR "kill_fasync: bad magic number in " |
713 | "fasync_struct!\n"); | 730 | "fasync_struct!\n"); |
714 | return; | 731 | return; |
715 | } | 732 | } |
716 | fown = &fa->fa_file->f_owner; | 733 | spin_lock(&fa->fa_lock); |
717 | /* Don't send SIGURG to processes which have not set a | 734 | if (fa->fa_file) { |
718 | queued signum: SIGURG has its own default signalling | 735 | fown = &fa->fa_file->f_owner; |
719 | mechanism. */ | 736 | /* Don't send SIGURG to processes which have not set a |
720 | if (!(sig == SIGURG && fown->signum == 0)) | 737 | queued signum: SIGURG has its own default signalling |
721 | send_sigio(fown, fa->fa_fd, band); | 738 | mechanism. */ |
722 | fa = fa->fa_next; | 739 | if (!(sig == SIGURG && fown->signum == 0)) |
740 | send_sigio(fown, fa->fa_fd, band); | ||
741 | } | ||
742 | spin_unlock(&fa->fa_lock); | ||
743 | fa = rcu_dereference(fa->fa_next); | ||
723 | } | 744 | } |
724 | } | 745 | } |
725 | 746 | ||
726 | EXPORT_SYMBOL(__kill_fasync); | ||
727 | |||
728 | void kill_fasync(struct fasync_struct **fp, int sig, int band) | 747 | void kill_fasync(struct fasync_struct **fp, int sig, int band) |
729 | { | 748 | { |
730 | /* First a quick test without locking: usually | 749 | /* First a quick test without locking: usually |
731 | * the list is empty. | 750 | * the list is empty. |
732 | */ | 751 | */ |
733 | if (*fp) { | 752 | if (*fp) { |
734 | read_lock(&fasync_lock); | 753 | rcu_read_lock(); |
735 | /* reread *fp after obtaining the lock */ | 754 | kill_fasync_rcu(rcu_dereference(*fp), sig, band); |
736 | __kill_fasync(*fp, sig, band); | 755 | rcu_read_unlock(); |
737 | read_unlock(&fasync_lock); | ||
738 | } | 756 | } |
739 | } | 757 | } |
740 | EXPORT_SYMBOL(kill_fasync); | 758 | EXPORT_SYMBOL(kill_fasync); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 39d57bc6cc71..018d382f6f92 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start, | |||
1280 | 1280 | ||
1281 | 1281 | ||
1282 | struct fasync_struct { | 1282 | struct fasync_struct { |
1283 | int magic; | 1283 | spinlock_t fa_lock; |
1284 | int fa_fd; | 1284 | int magic; |
1285 | struct fasync_struct *fa_next; /* singly linked list */ | 1285 | int fa_fd; |
1286 | struct file *fa_file; | 1286 | struct fasync_struct *fa_next; /* singly linked list */ |
1287 | struct file *fa_file; | ||
1288 | struct rcu_head fa_rcu; | ||
1287 | }; | 1289 | }; |
1288 | 1290 | ||
1289 | #define FASYNC_MAGIC 0x4601 | 1291 | #define FASYNC_MAGIC 0x4601 |
@@ -1292,8 +1294,6 @@ struct fasync_struct { | |||
1292 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); | 1294 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1293 | /* can be called from interrupts */ | 1295 | /* can be called from interrupts */ |
1294 | extern void kill_fasync(struct fasync_struct **, int, int); | 1296 | extern void kill_fasync(struct fasync_struct **, int, int); |
1295 | /* only for net: no internal synchronization */ | ||
1296 | extern void __kill_fasync(struct fasync_struct *, int, int); | ||
1297 | 1297 | ||
1298 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); | 1298 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1299 | extern int f_setown(struct file *filp, unsigned long arg, int force); | 1299 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
diff --git a/net/socket.c b/net/socket.c index 35bc198bbf68..9822081eab38 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp) | |||
1067 | * 1. fasync_list is modified only under process context socket lock | 1067 | * 1. fasync_list is modified only under process context socket lock |
1068 | * i.e. under semaphore. | 1068 | * i.e. under semaphore. |
1069 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) | 1069 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) |
1070 | * or under socket lock. | 1070 | * or under socket lock |
1071 | * 3. fasync_list can be used from softirq context, so that | ||
1072 | * modification under socket lock have to be enhanced with | ||
1073 | * write_lock_bh(&sk->sk_callback_lock). | ||
1074 | * --ANK (990710) | ||
1075 | */ | 1071 | */ |
1076 | 1072 | ||
1077 | static int sock_fasync(int fd, struct file *filp, int on) | 1073 | static int sock_fasync(int fd, struct file *filp, int on) |
1078 | { | 1074 | { |
1079 | struct fasync_struct *fa, *fna = NULL, **prev; | 1075 | struct socket *sock = filp->private_data; |
1080 | struct socket *sock; | 1076 | struct sock *sk = sock->sk; |
1081 | struct sock *sk; | ||
1082 | |||
1083 | if (on) { | ||
1084 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); | ||
1085 | if (fna == NULL) | ||
1086 | return -ENOMEM; | ||
1087 | } | ||
1088 | |||
1089 | sock = filp->private_data; | ||
1090 | 1077 | ||
1091 | sk = sock->sk; | 1078 | if (sk == NULL) |
1092 | if (sk == NULL) { | ||
1093 | kfree(fna); | ||
1094 | return -EINVAL; | 1079 | return -EINVAL; |
1095 | } | ||
1096 | 1080 | ||
1097 | lock_sock(sk); | 1081 | lock_sock(sk); |
1098 | 1082 | ||
1099 | spin_lock(&filp->f_lock); | 1083 | fasync_helper(fd, filp, on, &sock->fasync_list); |
1100 | if (on) | ||
1101 | filp->f_flags |= FASYNC; | ||
1102 | else | ||
1103 | filp->f_flags &= ~FASYNC; | ||
1104 | spin_unlock(&filp->f_lock); | ||
1105 | |||
1106 | prev = &(sock->fasync_list); | ||
1107 | 1084 | ||
1108 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) | 1085 | if (!sock->fasync_list) |
1109 | if (fa->fa_file == filp) | 1086 | sock_reset_flag(sk, SOCK_FASYNC); |
1110 | break; | 1087 | else |
1111 | |||
1112 | if (on) { | ||
1113 | if (fa != NULL) { | ||
1114 | write_lock_bh(&sk->sk_callback_lock); | ||
1115 | fa->fa_fd = fd; | ||
1116 | write_unlock_bh(&sk->sk_callback_lock); | ||
1117 | |||
1118 | kfree(fna); | ||
1119 | goto out; | ||
1120 | } | ||
1121 | fna->fa_file = filp; | ||
1122 | fna->fa_fd = fd; | ||
1123 | fna->magic = FASYNC_MAGIC; | ||
1124 | fna->fa_next = sock->fasync_list; | ||
1125 | write_lock_bh(&sk->sk_callback_lock); | ||
1126 | sock->fasync_list = fna; | ||
1127 | sock_set_flag(sk, SOCK_FASYNC); | 1088 | sock_set_flag(sk, SOCK_FASYNC); |
1128 | write_unlock_bh(&sk->sk_callback_lock); | ||
1129 | } else { | ||
1130 | if (fa != NULL) { | ||
1131 | write_lock_bh(&sk->sk_callback_lock); | ||
1132 | *prev = fa->fa_next; | ||
1133 | if (!sock->fasync_list) | ||
1134 | sock_reset_flag(sk, SOCK_FASYNC); | ||
1135 | write_unlock_bh(&sk->sk_callback_lock); | ||
1136 | kfree(fa); | ||
1137 | } | ||
1138 | } | ||
1139 | 1089 | ||
1140 | out: | 1090 | release_sock(sk); |
1141 | release_sock(sock->sk); | ||
1142 | return 0; | 1091 | return 0; |
1143 | } | 1092 | } |
1144 | 1093 | ||
@@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1159 | /* fall through */ | 1108 | /* fall through */ |
1160 | case SOCK_WAKE_IO: | 1109 | case SOCK_WAKE_IO: |
1161 | call_kill: | 1110 | call_kill: |
1162 | __kill_fasync(sock->fasync_list, SIGIO, band); | 1111 | kill_fasync(&sock->fasync_list, SIGIO, band); |
1163 | break; | 1112 | break; |
1164 | case SOCK_WAKE_URG: | 1113 | case SOCK_WAKE_URG: |
1165 | __kill_fasync(sock->fasync_list, SIGURG, band); | 1114 | kill_fasync(&sock->fasync_list, SIGURG, band); |
1166 | } | 1115 | } |
1167 | return 0; | 1116 | return 0; |
1168 | } | 1117 | } |