diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2006-09-01 03:23:39 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-09-22 17:54:19 -0400 |
commit | 55737fda0bc73cb20f702301d8b52938a5a43630 (patch) | |
tree | a605776f772aa189a655da9e5854db5d95e844b7 | |
parent | 757dbb494be3309fe41ce4c62f8057d8b41d8897 (diff) |
[NET]: socket family using RCU
Replace the gross custom locking done in socket code for net_family[]
with simple RCU usage. Some reordering necessary to avoid sleep issues
with sock_alloc.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/socket.c | 174 |
1 files changed, 76 insertions, 98 deletions
diff --git a/net/socket.c b/net/socket.c index 156f2efa4e4a..b5a3fcb9ed6d 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -59,11 +59,11 @@ | |||
59 | */ | 59 | */ |
60 | 60 | ||
61 | #include <linux/mm.h> | 61 | #include <linux/mm.h> |
62 | #include <linux/smp_lock.h> | ||
63 | #include <linux/socket.h> | 62 | #include <linux/socket.h> |
64 | #include <linux/file.h> | 63 | #include <linux/file.h> |
65 | #include <linux/net.h> | 64 | #include <linux/net.h> |
66 | #include <linux/interrupt.h> | 65 | #include <linux/interrupt.h> |
66 | #include <linux/rcupdate.h> | ||
67 | #include <linux/netdevice.h> | 67 | #include <linux/netdevice.h> |
68 | #include <linux/proc_fs.h> | 68 | #include <linux/proc_fs.h> |
69 | #include <linux/seq_file.h> | 69 | #include <linux/seq_file.h> |
@@ -146,51 +146,8 @@ static struct file_operations socket_file_ops = { | |||
146 | * The protocol list. Each protocol is registered in here. | 146 | * The protocol list. Each protocol is registered in here. |
147 | */ | 147 | */ |
148 | 148 | ||
149 | static struct net_proto_family *net_families[NPROTO]; | ||
150 | |||
151 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
152 | static atomic_t net_family_lockct = ATOMIC_INIT(0); | ||
153 | static DEFINE_SPINLOCK(net_family_lock); | 149 | static DEFINE_SPINLOCK(net_family_lock); |
154 | 150 | static const struct net_proto_family *net_families[NPROTO]; | |
155 | /* The strategy is: modifications net_family vector are short, do not | ||
156 | sleep and veeery rare, but read access should be free of any exclusive | ||
157 | locks. | ||
158 | */ | ||
159 | |||
160 | static void net_family_write_lock(void) | ||
161 | { | ||
162 | spin_lock(&net_family_lock); | ||
163 | while (atomic_read(&net_family_lockct) != 0) { | ||
164 | spin_unlock(&net_family_lock); | ||
165 | |||
166 | yield(); | ||
167 | |||
168 | spin_lock(&net_family_lock); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static __inline__ void net_family_write_unlock(void) | ||
173 | { | ||
174 | spin_unlock(&net_family_lock); | ||
175 | } | ||
176 | |||
177 | static __inline__ void net_family_read_lock(void) | ||
178 | { | ||
179 | atomic_inc(&net_family_lockct); | ||
180 | spin_unlock_wait(&net_family_lock); | ||
181 | } | ||
182 | |||
183 | static __inline__ void net_family_read_unlock(void) | ||
184 | { | ||
185 | atomic_dec(&net_family_lockct); | ||
186 | } | ||
187 | |||
188 | #else | ||
189 | #define net_family_write_lock() do { } while(0) | ||
190 | #define net_family_write_unlock() do { } while(0) | ||
191 | #define net_family_read_lock() do { } while(0) | ||
192 | #define net_family_read_unlock() do { } while(0) | ||
193 | #endif | ||
194 | 151 | ||
195 | /* | 152 | /* |
196 | * Statistics counters of the socket lists | 153 | * Statistics counters of the socket lists |
@@ -1138,6 +1095,7 @@ static int __sock_create(int family, int type, int protocol, | |||
1138 | { | 1095 | { |
1139 | int err; | 1096 | int err; |
1140 | struct socket *sock; | 1097 | struct socket *sock; |
1098 | const struct net_proto_family *pf; | ||
1141 | 1099 | ||
1142 | /* | 1100 | /* |
1143 | * Check protocol is in range | 1101 | * Check protocol is in range |
@@ -1166,6 +1124,21 @@ static int __sock_create(int family, int type, int protocol, | |||
1166 | if (err) | 1124 | if (err) |
1167 | return err; | 1125 | return err; |
1168 | 1126 | ||
1127 | /* | ||
1128 | * Allocate the socket and allow the family to set things up. if | ||
1129 | * the protocol is 0, the family is instructed to select an appropriate | ||
1130 | * default. | ||
1131 | */ | ||
1132 | sock = sock_alloc(); | ||
1133 | if (!sock) { | ||
1134 | if (net_ratelimit()) | ||
1135 | printk(KERN_WARNING "socket: no more sockets\n"); | ||
1136 | return -ENFILE; /* Not exactly a match, but its the | ||
1137 | closest posix thing */ | ||
1138 | } | ||
1139 | |||
1140 | sock->type = type; | ||
1141 | |||
1169 | #if defined(CONFIG_KMOD) | 1142 | #if defined(CONFIG_KMOD) |
1170 | /* Attempt to load a protocol module if the find failed. | 1143 | /* Attempt to load a protocol module if the find failed. |
1171 | * | 1144 | * |
@@ -1173,72 +1146,61 @@ static int __sock_create(int family, int type, int protocol, | |||
1173 | * requested real, full-featured networking support upon configuration. | 1146 | * requested real, full-featured networking support upon configuration. |
1174 | * Otherwise module support will break! | 1147 | * Otherwise module support will break! |
1175 | */ | 1148 | */ |
1176 | if (net_families[family] == NULL) { | 1149 | if (net_families[family] == NULL) |
1177 | request_module("net-pf-%d", family); | 1150 | request_module("net-pf-%d", family); |
1178 | } | ||
1179 | #endif | 1151 | #endif |
1180 | 1152 | ||
1181 | net_family_read_lock(); | 1153 | rcu_read_lock(); |
1182 | if (net_families[family] == NULL) { | 1154 | pf = rcu_dereference(net_families[family]); |
1183 | err = -EAFNOSUPPORT; | 1155 | err = -EAFNOSUPPORT; |
1184 | goto out; | 1156 | if (!pf) |
1185 | } | 1157 | goto out_release; |
1186 | |||
1187 | /* | ||
1188 | * Allocate the socket and allow the family to set things up. if | ||
1189 | * the protocol is 0, the family is instructed to select an appropriate | ||
1190 | * default. | ||
1191 | */ | ||
1192 | |||
1193 | if (!(sock = sock_alloc())) { | ||
1194 | if (net_ratelimit()) | ||
1195 | printk(KERN_WARNING "socket: no more sockets\n"); | ||
1196 | err = -ENFILE; /* Not exactly a match, but its the | ||
1197 | closest posix thing */ | ||
1198 | goto out; | ||
1199 | } | ||
1200 | |||
1201 | sock->type = type; | ||
1202 | 1158 | ||
1203 | /* | 1159 | /* |
1204 | * We will call the ->create function, that possibly is in a loadable | 1160 | * We will call the ->create function, that possibly is in a loadable |
1205 | * module, so we have to bump that loadable module refcnt first. | 1161 | * module, so we have to bump that loadable module refcnt first. |
1206 | */ | 1162 | */ |
1207 | err = -EAFNOSUPPORT; | 1163 | if (!try_module_get(pf->owner)) |
1208 | if (!try_module_get(net_families[family]->owner)) | ||
1209 | goto out_release; | 1164 | goto out_release; |
1210 | 1165 | ||
1211 | if ((err = net_families[family]->create(sock, protocol)) < 0) { | 1166 | /* Now protected by module ref count */ |
1212 | sock->ops = NULL; | 1167 | rcu_read_unlock(); |
1168 | |||
1169 | err = pf->create(sock, protocol); | ||
1170 | if (err < 0) | ||
1213 | goto out_module_put; | 1171 | goto out_module_put; |
1214 | } | ||
1215 | 1172 | ||
1216 | /* | 1173 | /* |
1217 | * Now to bump the refcnt of the [loadable] module that owns this | 1174 | * Now to bump the refcnt of the [loadable] module that owns this |
1218 | * socket at sock_release time we decrement its refcnt. | 1175 | * socket at sock_release time we decrement its refcnt. |
1219 | */ | 1176 | */ |
1220 | if (!try_module_get(sock->ops->owner)) { | 1177 | if (!try_module_get(sock->ops->owner)) |
1221 | sock->ops = NULL; | 1178 | goto out_module_busy; |
1222 | goto out_module_put; | 1179 | |
1223 | } | ||
1224 | /* | 1180 | /* |
1225 | * Now that we're done with the ->create function, the [loadable] | 1181 | * Now that we're done with the ->create function, the [loadable] |
1226 | * module can have its refcnt decremented | 1182 | * module can have its refcnt decremented |
1227 | */ | 1183 | */ |
1228 | module_put(net_families[family]->owner); | 1184 | module_put(pf->owner); |
1229 | *res = sock; | ||
1230 | err = security_socket_post_create(sock, family, type, protocol, kern); | 1185 | err = security_socket_post_create(sock, family, type, protocol, kern); |
1231 | if (err) | 1186 | if (err) |
1232 | goto out_release; | 1187 | goto out_release; |
1188 | *res = sock; | ||
1233 | 1189 | ||
1234 | out: | 1190 | return 0; |
1235 | net_family_read_unlock(); | 1191 | |
1236 | return err; | 1192 | out_module_busy: |
1193 | err = -EAFNOSUPPORT; | ||
1237 | out_module_put: | 1194 | out_module_put: |
1238 | module_put(net_families[family]->owner); | 1195 | sock->ops = NULL; |
1239 | out_release: | 1196 | module_put(pf->owner); |
1197 | out_sock_release: | ||
1240 | sock_release(sock); | 1198 | sock_release(sock); |
1241 | goto out; | 1199 | return err; |
1200 | |||
1201 | out_release: | ||
1202 | rcu_read_unlock(); | ||
1203 | goto out_sock_release; | ||
1242 | } | 1204 | } |
1243 | 1205 | ||
1244 | int sock_create(int family, int type, int protocol, struct socket **res) | 1206 | int sock_create(int family, int type, int protocol, struct socket **res) |
@@ -2109,12 +2071,15 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) | |||
2109 | 2071 | ||
2110 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ | 2072 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ |
2111 | 2073 | ||
2112 | /* | 2074 | /** |
2075 | * sock_register - add a socket protocol handler | ||
2076 | * @ops: description of protocol | ||
2077 | * | ||
2113 | * This function is called by a protocol handler that wants to | 2078 | * This function is called by a protocol handler that wants to |
2114 | * advertise its address family, and have it linked into the | 2079 | * advertise its address family, and have it linked into the |
2115 | * SOCKET module. | 2080 | * socket interface. The value ops->family coresponds to the |
2081 | * socket system call protocol family. | ||
2116 | */ | 2082 | */ |
2117 | |||
2118 | int sock_register(struct net_proto_family *ops) | 2083 | int sock_register(struct net_proto_family *ops) |
2119 | { | 2084 | { |
2120 | int err; | 2085 | int err; |
@@ -2124,31 +2089,44 @@ int sock_register(struct net_proto_family *ops) | |||
2124 | NPROTO); | 2089 | NPROTO); |
2125 | return -ENOBUFS; | 2090 | return -ENOBUFS; |
2126 | } | 2091 | } |
2127 | net_family_write_lock(); | 2092 | |
2128 | err = -EEXIST; | 2093 | spin_lock(&net_family_lock); |
2129 | if (net_families[ops->family] == NULL) { | 2094 | if (net_families[ops->family]) |
2095 | err = -EEXIST; | ||
2096 | else { | ||
2130 | net_families[ops->family] = ops; | 2097 | net_families[ops->family] = ops; |
2131 | err = 0; | 2098 | err = 0; |
2132 | } | 2099 | } |
2133 | net_family_write_unlock(); | 2100 | spin_unlock(&net_family_lock); |
2101 | |||
2134 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); | 2102 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); |
2135 | return err; | 2103 | return err; |
2136 | } | 2104 | } |
2137 | 2105 | ||
2138 | /* | 2106 | /** |
2107 | * sock_unregister - remove a protocol handler | ||
2108 | * @family: protocol family to remove | ||
2109 | * | ||
2139 | * This function is called by a protocol handler that wants to | 2110 | * This function is called by a protocol handler that wants to |
2140 | * remove its address family, and have it unlinked from the | 2111 | * remove its address family, and have it unlinked from the |
2141 | * SOCKET module. | 2112 | * new socket creation. |
2113 | * | ||
2114 | * If protocol handler is a module, then it can use module reference | ||
2115 | * counts to protect against new references. If protocol handler is not | ||
2116 | * a module then it needs to provide its own protection in | ||
2117 | * the ops->create routine. | ||
2142 | */ | 2118 | */ |
2143 | |||
2144 | int sock_unregister(int family) | 2119 | int sock_unregister(int family) |
2145 | { | 2120 | { |
2146 | if (family < 0 || family >= NPROTO) | 2121 | if (family < 0 || family >= NPROTO) |
2147 | return -1; | 2122 | return -EINVAL; |
2148 | 2123 | ||
2149 | net_family_write_lock(); | 2124 | spin_lock(&net_family_lock); |
2150 | net_families[family] = NULL; | 2125 | net_families[family] = NULL; |
2151 | net_family_write_unlock(); | 2126 | spin_unlock(&net_family_lock); |
2127 | |||
2128 | synchronize_rcu(); | ||
2129 | |||
2152 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); | 2130 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); |
2153 | return 0; | 2131 | return 0; |
2154 | } | 2132 | } |