diff options
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r-- | net/sunrpc/svcsock.c | 138 |
1 files changed, 63 insertions, 75 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e6bb1b0563ec..db589d187170 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -66,8 +66,8 @@ | |||
66 | * after a clear, the socket must be read/accepted | 66 | * after a clear, the socket must be read/accepted |
67 | * if this succeeds, it must be set again. | 67 | * if this succeeds, it must be set again. |
68 | * SK_CLOSE can set at any time. It is never cleared. | 68 | * SK_CLOSE can set at any time. It is never cleared. |
69 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | 69 | * xpt_ref contains a bias of '1' until SK_DEAD is set. |
70 | * so when sk_inuse hits zero, we know the socket is dead | 70 | * so when xprt_ref hits zero, we know the transport is dead |
71 | * and no-one is using it. | 71 | * and no-one is using it. |
72 | * SK_DEAD can only be set while SK_BUSY is held which ensures | 72 | * SK_DEAD can only be set while SK_BUSY is held which ensures |
73 | * no other thread will be using the socket or will try to | 73 | * no other thread will be using the socket or will try to |
@@ -285,7 +285,7 @@ svc_sock_enqueue(struct svc_sock *svsk) | |||
285 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | 285 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", |
286 | rqstp, rqstp->rq_sock); | 286 | rqstp, rqstp->rq_sock); |
287 | rqstp->rq_sock = svsk; | 287 | rqstp->rq_sock = svsk; |
288 | atomic_inc(&svsk->sk_inuse); | 288 | svc_xprt_get(&svsk->sk_xprt); |
289 | rqstp->rq_reserved = serv->sv_max_mesg; | 289 | rqstp->rq_reserved = serv->sv_max_mesg; |
290 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 290 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
291 | BUG_ON(svsk->sk_pool != pool); | 291 | BUG_ON(svsk->sk_pool != pool); |
@@ -316,7 +316,7 @@ svc_sock_dequeue(struct svc_pool *pool) | |||
316 | list_del_init(&svsk->sk_ready); | 316 | list_del_init(&svsk->sk_ready); |
317 | 317 | ||
318 | dprintk("svc: socket %p dequeued, inuse=%d\n", | 318 | dprintk("svc: socket %p dequeued, inuse=%d\n", |
319 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | 319 | svsk->sk_sk, atomic_read(&svsk->sk_xprt.xpt_ref.refcount)); |
320 | 320 | ||
321 | return svsk; | 321 | return svsk; |
322 | } | 322 | } |
@@ -359,19 +359,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space) | |||
359 | } | 359 | } |
360 | } | 360 | } |
361 | 361 | ||
362 | /* | ||
363 | * Release a socket after use. | ||
364 | */ | ||
365 | static inline void | ||
366 | svc_sock_put(struct svc_sock *svsk) | ||
367 | { | ||
368 | if (atomic_dec_and_test(&svsk->sk_inuse)) { | ||
369 | BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags)); | ||
370 | module_put(svsk->sk_xprt.xpt_class->xcl_owner); | ||
371 | svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt); | ||
372 | } | ||
373 | } | ||
374 | |||
375 | static void | 362 | static void |
376 | svc_sock_release(struct svc_rqst *rqstp) | 363 | svc_sock_release(struct svc_rqst *rqstp) |
377 | { | 364 | { |
@@ -398,7 +385,7 @@ svc_sock_release(struct svc_rqst *rqstp) | |||
398 | svc_reserve(rqstp, 0); | 385 | svc_reserve(rqstp, 0); |
399 | rqstp->rq_sock = NULL; | 386 | rqstp->rq_sock = NULL; |
400 | 387 | ||
401 | svc_sock_put(svsk); | 388 | svc_xprt_put(&svsk->sk_xprt); |
402 | } | 389 | } |
403 | 390 | ||
404 | /* | 391 | /* |
@@ -1128,50 +1115,6 @@ failed: | |||
1128 | } | 1115 | } |
1129 | 1116 | ||
1130 | /* | 1117 | /* |
1131 | * Make sure that we don't have too many active connections. If we | ||
1132 | * have, something must be dropped. | ||
1133 | * | ||
1134 | * There's no point in trying to do random drop here for DoS | ||
1135 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | ||
1136 | * attacker can easily beat that. | ||
1137 | * | ||
1138 | * The only somewhat efficient mechanism would be if drop old | ||
1139 | * connections from the same IP first. But right now we don't even | ||
1140 | * record the client IP in svc_sock. | ||
1141 | */ | ||
1142 | static void svc_check_conn_limits(struct svc_serv *serv) | ||
1143 | { | ||
1144 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
1145 | struct svc_sock *svsk = NULL; | ||
1146 | spin_lock_bh(&serv->sv_lock); | ||
1147 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1148 | if (net_ratelimit()) { | ||
1149 | /* Try to help the admin */ | ||
1150 | printk(KERN_NOTICE "%s: too many open TCP " | ||
1151 | "sockets, consider increasing the " | ||
1152 | "number of nfsd threads\n", | ||
1153 | serv->sv_name); | ||
1154 | } | ||
1155 | /* | ||
1156 | * Always select the oldest socket. It's not fair, | ||
1157 | * but so is life | ||
1158 | */ | ||
1159 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
1160 | struct svc_sock, | ||
1161 | sk_list); | ||
1162 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1163 | atomic_inc(&svsk->sk_inuse); | ||
1164 | } | ||
1165 | spin_unlock_bh(&serv->sv_lock); | ||
1166 | |||
1167 | if (svsk) { | ||
1168 | svc_sock_enqueue(svsk); | ||
1169 | svc_sock_put(svsk); | ||
1170 | } | ||
1171 | } | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1175 | * Receive data from a TCP socket. | 1118 | * Receive data from a TCP socket. |
1176 | */ | 1119 | */ |
1177 | static int | 1120 | static int |
@@ -1497,6 +1440,50 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1497 | } | 1440 | } |
1498 | 1441 | ||
1499 | /* | 1442 | /* |
1443 | * Make sure that we don't have too many active connections. If we | ||
1444 | * have, something must be dropped. | ||
1445 | * | ||
1446 | * There's no point in trying to do random drop here for DoS | ||
1447 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | ||
1448 | * attacker can easily beat that. | ||
1449 | * | ||
1450 | * The only somewhat efficient mechanism would be if drop old | ||
1451 | * connections from the same IP first. But right now we don't even | ||
1452 | * record the client IP in svc_sock. | ||
1453 | */ | ||
1454 | static void svc_check_conn_limits(struct svc_serv *serv) | ||
1455 | { | ||
1456 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
1457 | struct svc_sock *svsk = NULL; | ||
1458 | spin_lock_bh(&serv->sv_lock); | ||
1459 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1460 | if (net_ratelimit()) { | ||
1461 | /* Try to help the admin */ | ||
1462 | printk(KERN_NOTICE "%s: too many open TCP " | ||
1463 | "sockets, consider increasing the " | ||
1464 | "number of nfsd threads\n", | ||
1465 | serv->sv_name); | ||
1466 | } | ||
1467 | /* | ||
1468 | * Always select the oldest socket. It's not fair, | ||
1469 | * but so is life | ||
1470 | */ | ||
1471 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
1472 | struct svc_sock, | ||
1473 | sk_list); | ||
1474 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1475 | svc_xprt_get(&svsk->sk_xprt); | ||
1476 | } | ||
1477 | spin_unlock_bh(&serv->sv_lock); | ||
1478 | |||
1479 | if (svsk) { | ||
1480 | svc_sock_enqueue(svsk); | ||
1481 | svc_xprt_put(&svsk->sk_xprt); | ||
1482 | } | ||
1483 | } | ||
1484 | } | ||
1485 | |||
1486 | /* | ||
1500 | * Receive the next request on any socket. This code is carefully | 1487 | * Receive the next request on any socket. This code is carefully |
1501 | * organised not to touch any cachelines in the shared svc_serv | 1488 | * organised not to touch any cachelines in the shared svc_serv |
1502 | * structure, only cachelines in the local svc_pool. | 1489 | * structure, only cachelines in the local svc_pool. |
@@ -1556,7 +1543,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1556 | spin_lock_bh(&pool->sp_lock); | 1543 | spin_lock_bh(&pool->sp_lock); |
1557 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | 1544 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { |
1558 | rqstp->rq_sock = svsk; | 1545 | rqstp->rq_sock = svsk; |
1559 | atomic_inc(&svsk->sk_inuse); | 1546 | svc_xprt_get(&svsk->sk_xprt); |
1560 | rqstp->rq_reserved = serv->sv_max_mesg; | 1547 | rqstp->rq_reserved = serv->sv_max_mesg; |
1561 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | 1548 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); |
1562 | } else { | 1549 | } else { |
@@ -1605,7 +1592,8 @@ svc_recv(struct svc_rqst *rqstp, long timeout) | |||
1605 | svc_sock_received(svsk); | 1592 | svc_sock_received(svsk); |
1606 | } else { | 1593 | } else { |
1607 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | 1594 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", |
1608 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | 1595 | rqstp, pool->sp_id, svsk, |
1596 | atomic_read(&svsk->sk_xprt.xpt_ref.refcount)); | ||
1609 | len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp); | 1597 | len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp); |
1610 | dprintk("svc: got len=%d\n", len); | 1598 | dprintk("svc: got len=%d\n", len); |
1611 | } | 1599 | } |
@@ -1702,9 +1690,10 @@ svc_age_temp_sockets(unsigned long closure) | |||
1702 | 1690 | ||
1703 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | 1691 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) |
1704 | continue; | 1692 | continue; |
1705 | if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) | 1693 | if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1 |
1694 | || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1706 | continue; | 1695 | continue; |
1707 | atomic_inc(&svsk->sk_inuse); | 1696 | svc_xprt_get(&svsk->sk_xprt); |
1708 | list_move(le, &to_be_aged); | 1697 | list_move(le, &to_be_aged); |
1709 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1698 | set_bit(SK_CLOSE, &svsk->sk_flags); |
1710 | set_bit(SK_DETACHED, &svsk->sk_flags); | 1699 | set_bit(SK_DETACHED, &svsk->sk_flags); |
@@ -1722,7 +1711,7 @@ svc_age_temp_sockets(unsigned long closure) | |||
1722 | 1711 | ||
1723 | /* a thread will dequeue and close it soon */ | 1712 | /* a thread will dequeue and close it soon */ |
1724 | svc_sock_enqueue(svsk); | 1713 | svc_sock_enqueue(svsk); |
1725 | svc_sock_put(svsk); | 1714 | svc_xprt_put(&svsk->sk_xprt); |
1726 | } | 1715 | } |
1727 | 1716 | ||
1728 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | 1717 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); |
@@ -1767,7 +1756,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1767 | svsk->sk_odata = inet->sk_data_ready; | 1756 | svsk->sk_odata = inet->sk_data_ready; |
1768 | svsk->sk_owspace = inet->sk_write_space; | 1757 | svsk->sk_owspace = inet->sk_write_space; |
1769 | svsk->sk_server = serv; | 1758 | svsk->sk_server = serv; |
1770 | atomic_set(&svsk->sk_inuse, 1); | ||
1771 | svsk->sk_lastrecv = get_seconds(); | 1759 | svsk->sk_lastrecv = get_seconds(); |
1772 | spin_lock_init(&svsk->sk_lock); | 1760 | spin_lock_init(&svsk->sk_lock); |
1773 | INIT_LIST_HEAD(&svsk->sk_deferred); | 1761 | INIT_LIST_HEAD(&svsk->sk_deferred); |
@@ -1953,10 +1941,10 @@ svc_delete_socket(struct svc_sock *svsk) | |||
1953 | * is about to be destroyed (in svc_destroy). | 1941 | * is about to be destroyed (in svc_destroy). |
1954 | */ | 1942 | */ |
1955 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { | 1943 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { |
1956 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | 1944 | BUG_ON(atomic_read(&svsk->sk_xprt.xpt_ref.refcount) < 2); |
1957 | atomic_dec(&svsk->sk_inuse); | ||
1958 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1945 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
1959 | serv->sv_tmpcnt--; | 1946 | serv->sv_tmpcnt--; |
1947 | svc_xprt_put(&svsk->sk_xprt); | ||
1960 | } | 1948 | } |
1961 | 1949 | ||
1962 | spin_unlock_bh(&serv->sv_lock); | 1950 | spin_unlock_bh(&serv->sv_lock); |
@@ -1969,10 +1957,10 @@ static void svc_close_socket(struct svc_sock *svsk) | |||
1969 | /* someone else will have to effect the close */ | 1957 | /* someone else will have to effect the close */ |
1970 | return; | 1958 | return; |
1971 | 1959 | ||
1972 | atomic_inc(&svsk->sk_inuse); | 1960 | svc_xprt_get(&svsk->sk_xprt); |
1973 | svc_delete_socket(svsk); | 1961 | svc_delete_socket(svsk); |
1974 | clear_bit(SK_BUSY, &svsk->sk_flags); | 1962 | clear_bit(SK_BUSY, &svsk->sk_flags); |
1975 | svc_sock_put(svsk); | 1963 | svc_xprt_put(&svsk->sk_xprt); |
1976 | } | 1964 | } |
1977 | 1965 | ||
1978 | void svc_force_close_socket(struct svc_sock *svsk) | 1966 | void svc_force_close_socket(struct svc_sock *svsk) |
@@ -1998,7 +1986,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | |||
1998 | struct svc_sock *svsk; | 1986 | struct svc_sock *svsk; |
1999 | 1987 | ||
2000 | if (too_many) { | 1988 | if (too_many) { |
2001 | svc_sock_put(dr->svsk); | 1989 | svc_xprt_put(&dr->svsk->sk_xprt); |
2002 | kfree(dr); | 1990 | kfree(dr); |
2003 | return; | 1991 | return; |
2004 | } | 1992 | } |
@@ -2010,7 +1998,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | |||
2010 | spin_unlock(&svsk->sk_lock); | 1998 | spin_unlock(&svsk->sk_lock); |
2011 | set_bit(SK_DEFERRED, &svsk->sk_flags); | 1999 | set_bit(SK_DEFERRED, &svsk->sk_flags); |
2012 | svc_sock_enqueue(svsk); | 2000 | svc_sock_enqueue(svsk); |
2013 | svc_sock_put(svsk); | 2001 | svc_xprt_put(&svsk->sk_xprt); |
2014 | } | 2002 | } |
2015 | 2003 | ||
2016 | static struct cache_deferred_req * | 2004 | static struct cache_deferred_req * |
@@ -2040,7 +2028,7 @@ svc_defer(struct cache_req *req) | |||
2040 | dr->argslen = rqstp->rq_arg.len >> 2; | 2028 | dr->argslen = rqstp->rq_arg.len >> 2; |
2041 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | 2029 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); |
2042 | } | 2030 | } |
2043 | atomic_inc(&rqstp->rq_sock->sk_inuse); | 2031 | svc_xprt_get(rqstp->rq_xprt); |
2044 | dr->svsk = rqstp->rq_sock; | 2032 | dr->svsk = rqstp->rq_sock; |
2045 | 2033 | ||
2046 | dr->handle.revisit = svc_revisit; | 2034 | dr->handle.revisit = svc_revisit; |