aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-14 05:55:35 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-21 19:19:29 -0400
commit989a2979205dd34269382b357e6d4b4b6956b889 (patch)
tree2f504e9f4d8d418dd8fb2d042b076c1318232360
parente5700aff144fbbba46be40049f0c55fb57283777 (diff)
fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to avoid cache line ping pongs on SMP. fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short section instead during whole list scan. Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync() doesnt need its own implementation and can use fasync_helper(), to reduce code size and complexity. We can remove __kill_fasync() direct use in net/socket.c, and rename it to kill_fasync_rcu(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--fs/fcntl.c66
-rw-r--r--include/linux/fs.h12
-rw-r--r--net/socket.c73
3 files changed, 59 insertions, 92 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 452d02f9075e..0a140741b39e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
614 return ret; 614 return ret;
615} 615}
616 616
617static DEFINE_RWLOCK(fasync_lock); 617static DEFINE_SPINLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620static void fasync_free_rcu(struct rcu_head *head)
621{
622 kmem_cache_free(fasync_cache,
623 container_of(head, struct fasync_struct, fa_rcu));
624}
625
620/* 626/*
621 * Remove a fasync entry. If successfully removed, return 627 * Remove a fasync entry. If successfully removed, return
622 * positive and clear the FASYNC flag. If no entry exists, 628 * positive and clear the FASYNC flag. If no entry exists,
@@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
625 * NOTE! It is very important that the FASYNC flag always 631 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list". 632 * match the state "is the filp on a fasync list".
627 * 633 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
630 */ 634 */
631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 635static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
632{ 636{
@@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
634 int result = 0; 638 int result = 0;
635 639
636 spin_lock(&filp->f_lock); 640 spin_lock(&filp->f_lock);
637 write_lock_irq(&fasync_lock); 641 spin_lock(&fasync_lock);
638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 642 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
639 if (fa->fa_file != filp) 643 if (fa->fa_file != filp)
640 continue; 644 continue;
645
646 spin_lock_irq(&fa->fa_lock);
647 fa->fa_file = NULL;
648 spin_unlock_irq(&fa->fa_lock);
649
641 *fp = fa->fa_next; 650 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa); 651 call_rcu(&fa->fa_rcu, fasync_free_rcu);
643 filp->f_flags &= ~FASYNC; 652 filp->f_flags &= ~FASYNC;
644 result = 1; 653 result = 1;
645 break; 654 break;
646 } 655 }
647 write_unlock_irq(&fasync_lock); 656 spin_unlock(&fasync_lock);
648 spin_unlock(&filp->f_lock); 657 spin_unlock(&filp->f_lock);
649 return result; 658 return result;
650} 659}
@@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
666 return -ENOMEM; 675 return -ENOMEM;
667 676
668 spin_lock(&filp->f_lock); 677 spin_lock(&filp->f_lock);
669 write_lock_irq(&fasync_lock); 678 spin_lock(&fasync_lock);
670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 679 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
671 if (fa->fa_file != filp) 680 if (fa->fa_file != filp)
672 continue; 681 continue;
682
683 spin_lock_irq(&fa->fa_lock);
673 fa->fa_fd = fd; 684 fa->fa_fd = fd;
685 spin_unlock_irq(&fa->fa_lock);
686
674 kmem_cache_free(fasync_cache, new); 687 kmem_cache_free(fasync_cache, new);
675 goto out; 688 goto out;
676 } 689 }
677 690
691 spin_lock_init(&new->fa_lock);
678 new->magic = FASYNC_MAGIC; 692 new->magic = FASYNC_MAGIC;
679 new->fa_file = filp; 693 new->fa_file = filp;
680 new->fa_fd = fd; 694 new->fa_fd = fd;
681 new->fa_next = *fapp; 695 new->fa_next = *fapp;
682 *fapp = new; 696 rcu_assign_pointer(*fapp, new);
683 result = 1; 697 result = 1;
684 filp->f_flags |= FASYNC; 698 filp->f_flags |= FASYNC;
685 699
686out: 700out:
687 write_unlock_irq(&fasync_lock); 701 spin_unlock(&fasync_lock);
688 spin_unlock(&filp->f_lock); 702 spin_unlock(&filp->f_lock);
689 return result; 703 return result;
690} 704}
@@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
704 718
705EXPORT_SYMBOL(fasync_helper); 719EXPORT_SYMBOL(fasync_helper);
706 720
707void __kill_fasync(struct fasync_struct *fa, int sig, int band) 721/*
722 * rcu_read_lock() is held
723 */
724static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
708{ 725{
709 while (fa) { 726 while (fa) {
710 struct fown_struct * fown; 727 struct fown_struct *fown;
711 if (fa->magic != FASYNC_MAGIC) { 728 if (fa->magic != FASYNC_MAGIC) {
712 printk(KERN_ERR "kill_fasync: bad magic number in " 729 printk(KERN_ERR "kill_fasync: bad magic number in "
713 "fasync_struct!\n"); 730 "fasync_struct!\n");
714 return; 731 return;
715 } 732 }
716 fown = &fa->fa_file->f_owner; 733 spin_lock(&fa->fa_lock);
717 /* Don't send SIGURG to processes which have not set a 734 if (fa->fa_file) {
718 queued signum: SIGURG has its own default signalling 735 fown = &fa->fa_file->f_owner;
719 mechanism. */ 736 /* Don't send SIGURG to processes which have not set a
720 if (!(sig == SIGURG && fown->signum == 0)) 737 queued signum: SIGURG has its own default signalling
721 send_sigio(fown, fa->fa_fd, band); 738 mechanism. */
722 fa = fa->fa_next; 739 if (!(sig == SIGURG && fown->signum == 0))
740 send_sigio(fown, fa->fa_fd, band);
741 }
742 spin_unlock(&fa->fa_lock);
743 fa = rcu_dereference(fa->fa_next);
723 } 744 }
724} 745}
725 746
726EXPORT_SYMBOL(__kill_fasync);
727
728void kill_fasync(struct fasync_struct **fp, int sig, int band) 747void kill_fasync(struct fasync_struct **fp, int sig, int band)
729{ 748{
730 /* First a quick test without locking: usually 749 /* First a quick test without locking: usually
731 * the list is empty. 750 * the list is empty.
732 */ 751 */
733 if (*fp) { 752 if (*fp) {
734 read_lock(&fasync_lock); 753 rcu_read_lock();
735 /* reread *fp after obtaining the lock */ 754 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
736 __kill_fasync(*fp, sig, band); 755 rcu_read_unlock();
737 read_unlock(&fasync_lock);
738 } 756 }
739} 757}
740EXPORT_SYMBOL(kill_fasync); 758EXPORT_SYMBOL(kill_fasync);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc6cc71..018d382f6f92 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
1280 1280
1281 1281
1282struct fasync_struct { 1282struct fasync_struct {
1283 int magic; 1283 spinlock_t fa_lock;
1284 int fa_fd; 1284 int magic;
1285 struct fasync_struct *fa_next; /* singly linked list */ 1285 int fa_fd;
1286 struct file *fa_file; 1286 struct fasync_struct *fa_next; /* singly linked list */
1287 struct file *fa_file;
1288 struct rcu_head fa_rcu;
1287}; 1289};
1288 1290
1289#define FASYNC_MAGIC 0x4601 1291#define FASYNC_MAGIC 0x4601
@@ -1292,8 +1294,6 @@ struct fasync_struct {
1292extern int fasync_helper(int, struct file *, int, struct fasync_struct **); 1294extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1293/* can be called from interrupts */ 1295/* can be called from interrupts */
1294extern void kill_fasync(struct fasync_struct **, int, int); 1296extern void kill_fasync(struct fasync_struct **, int, int);
1295/* only for net: no internal synchronization */
1296extern void __kill_fasync(struct fasync_struct *, int, int);
1297 1297
1298extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); 1298extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1299extern int f_setown(struct file *filp, unsigned long arg, int force); 1299extern int f_setown(struct file *filp, unsigned long arg, int force);
diff --git a/net/socket.c b/net/socket.c
index 35bc198bbf68..9822081eab38 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
1067 * 1. fasync_list is modified only under process context socket lock 1067 * 1. fasync_list is modified only under process context socket lock
1068 * i.e. under semaphore. 1068 * i.e. under semaphore.
1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1070 * or under socket lock. 1070 * or under socket lock
1071 * 3. fasync_list can be used from softirq context, so that
1072 * modification under socket lock have to be enhanced with
1073 * write_lock_bh(&sk->sk_callback_lock).
1074 * --ANK (990710)
1075 */ 1071 */
1076 1072
1077static int sock_fasync(int fd, struct file *filp, int on) 1073static int sock_fasync(int fd, struct file *filp, int on)
1078{ 1074{
1079 struct fasync_struct *fa, *fna = NULL, **prev; 1075 struct socket *sock = filp->private_data;
1080 struct socket *sock; 1076 struct sock *sk = sock->sk;
1081 struct sock *sk;
1082
1083 if (on) {
1084 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1085 if (fna == NULL)
1086 return -ENOMEM;
1087 }
1088
1089 sock = filp->private_data;
1090 1077
1091 sk = sock->sk; 1078 if (sk == NULL)
1092 if (sk == NULL) {
1093 kfree(fna);
1094 return -EINVAL; 1079 return -EINVAL;
1095 }
1096 1080
1097 lock_sock(sk); 1081 lock_sock(sk);
1098 1082
1099 spin_lock(&filp->f_lock); 1083 fasync_helper(fd, filp, on, &sock->fasync_list);
1100 if (on)
1101 filp->f_flags |= FASYNC;
1102 else
1103 filp->f_flags &= ~FASYNC;
1104 spin_unlock(&filp->f_lock);
1105
1106 prev = &(sock->fasync_list);
1107 1084
1108 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1085 if (!sock->fasync_list)
1109 if (fa->fa_file == filp) 1086 sock_reset_flag(sk, SOCK_FASYNC);
1110 break; 1087 else
1111
1112 if (on) {
1113 if (fa != NULL) {
1114 write_lock_bh(&sk->sk_callback_lock);
1115 fa->fa_fd = fd;
1116 write_unlock_bh(&sk->sk_callback_lock);
1117
1118 kfree(fna);
1119 goto out;
1120 }
1121 fna->fa_file = filp;
1122 fna->fa_fd = fd;
1123 fna->magic = FASYNC_MAGIC;
1124 fna->fa_next = sock->fasync_list;
1125 write_lock_bh(&sk->sk_callback_lock);
1126 sock->fasync_list = fna;
1127 sock_set_flag(sk, SOCK_FASYNC); 1088 sock_set_flag(sk, SOCK_FASYNC);
1128 write_unlock_bh(&sk->sk_callback_lock);
1129 } else {
1130 if (fa != NULL) {
1131 write_lock_bh(&sk->sk_callback_lock);
1132 *prev = fa->fa_next;
1133 if (!sock->fasync_list)
1134 sock_reset_flag(sk, SOCK_FASYNC);
1135 write_unlock_bh(&sk->sk_callback_lock);
1136 kfree(fa);
1137 }
1138 }
1139 1089
1140out: 1090 release_sock(sk);
1141 release_sock(sock->sk);
1142 return 0; 1091 return 0;
1143} 1092}
1144 1093
@@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
1159 /* fall through */ 1108 /* fall through */
1160 case SOCK_WAKE_IO: 1109 case SOCK_WAKE_IO:
1161call_kill: 1110call_kill:
1162 __kill_fasync(sock->fasync_list, SIGIO, band); 1111 kill_fasync(&sock->fasync_list, SIGIO, band);
1163 break; 1112 break;
1164 case SOCK_WAKE_URG: 1113 case SOCK_WAKE_URG:
1165 __kill_fasync(sock->fasync_list, SIGURG, band); 1114 kill_fasync(&sock->fasync_list, SIGURG, band);
1166 } 1115 }
1167 return 0; 1116 return 0;
1168} 1117}