aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c288
1 files changed, 152 insertions, 136 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cc9423de7311..c41ddba02e9d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -64,7 +64,6 @@
64 * 2 of the License, or (at your option) any later version. 64 * 2 of the License, or (at your option) any later version.
65 */ 65 */
66 66
67#include <linux/config.h>
68#include <linux/module.h> 67#include <linux/module.h>
69#include <asm/uaccess.h> 68#include <asm/uaccess.h>
70#include <asm/system.h> 69#include <asm/system.h>
@@ -105,6 +104,7 @@
105#include <net/icmp.h> 104#include <net/icmp.h>
106#include <net/xfrm.h> 105#include <net/xfrm.h>
107#include <net/ip_mp_alg.h> 106#include <net/ip_mp_alg.h>
107#include <net/netevent.h>
108#ifdef CONFIG_SYSCTL 108#ifdef CONFIG_SYSCTL
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
@@ -206,21 +206,27 @@ __u8 ip_tos2prio[16] = {
206struct rt_hash_bucket { 206struct rt_hash_bucket {
207 struct rtable *chain; 207 struct rtable *chain;
208}; 208};
209#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 209#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
210 defined(CONFIG_PROVE_LOCKING)
210/* 211/*
211 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks 212 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
212 * The size of this table is a power of two and depends on the number of CPUS. 213 * The size of this table is a power of two and depends on the number of CPUS.
214 * (on lockdep we have a quite big spinlock_t, so keep the size down there)
213 */ 215 */
214#if NR_CPUS >= 32 216#ifdef CONFIG_LOCKDEP
215#define RT_HASH_LOCK_SZ 4096 217# define RT_HASH_LOCK_SZ 256
216#elif NR_CPUS >= 16
217#define RT_HASH_LOCK_SZ 2048
218#elif NR_CPUS >= 8
219#define RT_HASH_LOCK_SZ 1024
220#elif NR_CPUS >= 4
221#define RT_HASH_LOCK_SZ 512
222#else 218#else
223#define RT_HASH_LOCK_SZ 256 219# if NR_CPUS >= 32
220# define RT_HASH_LOCK_SZ 4096
221# elif NR_CPUS >= 16
222# define RT_HASH_LOCK_SZ 2048
223# elif NR_CPUS >= 8
224# define RT_HASH_LOCK_SZ 1024
225# elif NR_CPUS >= 4
226# define RT_HASH_LOCK_SZ 512
227# else
228# define RT_HASH_LOCK_SZ 256
229# endif
224#endif 230#endif
225 231
226static spinlock_t *rt_hash_locks; 232static spinlock_t *rt_hash_locks;
@@ -244,7 +250,7 @@ static unsigned int rt_hash_rnd;
244 250
245static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 251static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
246#define RT_CACHE_STAT_INC(field) \ 252#define RT_CACHE_STAT_INC(field) \
247 (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++) 253 (__raw_get_cpu_var(rt_cache_stat).field++)
248 254
249static int rt_intern_hash(unsigned hash, struct rtable *rth, 255static int rt_intern_hash(unsigned hash, struct rtable *rth,
250 struct rtable **res); 256 struct rtable **res);
@@ -255,6 +261,10 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
255 & rt_hash_mask); 261 & rt_hash_mask);
256} 262}
257 263
264#define rt_hash(daddr, saddr, idx) \
265 rt_hash_code((__force u32)(__be32)(daddr),\
266 (__force u32)(__be32)(saddr) ^ ((idx) << 5))
267
258#ifdef CONFIG_PROC_FS 268#ifdef CONFIG_PROC_FS
259struct rt_cache_iter_state { 269struct rt_cache_iter_state {
260 int bucket; 270 int bucket;
@@ -1068,7 +1078,7 @@ static void ip_select_fb_ident(struct iphdr *iph)
1068 u32 salt; 1078 u32 salt;
1069 1079
1070 spin_lock_bh(&ip_fb_id_lock); 1080 spin_lock_bh(&ip_fb_id_lock);
1071 salt = secure_ip_id(ip_fallback_id ^ iph->daddr); 1081 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
1072 iph->id = htons(salt & 0xFFFF); 1082 iph->id = htons(salt & 0xFFFF);
1073 ip_fallback_id = salt; 1083 ip_fallback_id = salt;
1074 spin_unlock_bh(&ip_fb_id_lock); 1084 spin_unlock_bh(&ip_fb_id_lock);
@@ -1112,14 +1122,15 @@ static void rt_del(unsigned hash, struct rtable *rt)
1112 spin_unlock_bh(rt_hash_lock_addr(hash)); 1122 spin_unlock_bh(rt_hash_lock_addr(hash));
1113} 1123}
1114 1124
1115void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1125void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1116 u32 saddr, struct net_device *dev) 1126 __be32 saddr, struct net_device *dev)
1117{ 1127{
1118 int i, k; 1128 int i, k;
1119 struct in_device *in_dev = in_dev_get(dev); 1129 struct in_device *in_dev = in_dev_get(dev);
1120 struct rtable *rth, **rthp; 1130 struct rtable *rth, **rthp;
1121 u32 skeys[2] = { saddr, 0 }; 1131 __be32 skeys[2] = { saddr, 0 };
1122 int ikeys[2] = { dev->ifindex, 0 }; 1132 int ikeys[2] = { dev->ifindex, 0 };
1133 struct netevent_redirect netevent;
1123 1134
1124 if (!in_dev) 1135 if (!in_dev)
1125 return; 1136 return;
@@ -1140,8 +1151,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1140 1151
1141 for (i = 0; i < 2; i++) { 1152 for (i = 0; i < 2; i++) {
1142 for (k = 0; k < 2; k++) { 1153 for (k = 0; k < 2; k++) {
1143 unsigned hash = rt_hash_code(daddr, 1154 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
1144 skeys[i] ^ (ikeys[k] << 5));
1145 1155
1146 rthp=&rt_hash_table[hash].chain; 1156 rthp=&rt_hash_table[hash].chain;
1147 1157
@@ -1211,6 +1221,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1211 rt_drop(rt); 1221 rt_drop(rt);
1212 goto do_next; 1222 goto do_next;
1213 } 1223 }
1224
1225 netevent.old = &rth->u.dst;
1226 netevent.new = &rt->u.dst;
1227 call_netevent_notifiers(NETEVENT_REDIRECT,
1228 &netevent);
1214 1229
1215 rt_del(hash, rth); 1230 rt_del(hash, rth);
1216 if (!rt_intern_hash(hash, rt, &rt)) 1231 if (!rt_intern_hash(hash, rt, &rt))
@@ -1248,9 +1263,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1248 ret = NULL; 1263 ret = NULL;
1249 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1264 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1250 rt->u.dst.expires) { 1265 rt->u.dst.expires) {
1251 unsigned hash = rt_hash_code(rt->fl.fl4_dst, 1266 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1252 rt->fl.fl4_src ^ 1267 rt->fl.oif);
1253 (rt->fl.oif << 5));
1254#if RT_CACHE_DEBUG >= 1 1268#if RT_CACHE_DEBUG >= 1
1255 printk(KERN_DEBUG "ip_rt_advice: redirect to " 1269 printk(KERN_DEBUG "ip_rt_advice: redirect to "
1256 "%u.%u.%u.%u/%02x dropped\n", 1270 "%u.%u.%u.%u/%02x dropped\n",
@@ -1385,15 +1399,15 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1385 int i; 1399 int i;
1386 unsigned short old_mtu = ntohs(iph->tot_len); 1400 unsigned short old_mtu = ntohs(iph->tot_len);
1387 struct rtable *rth; 1401 struct rtable *rth;
1388 u32 skeys[2] = { iph->saddr, 0, }; 1402 __be32 skeys[2] = { iph->saddr, 0, };
1389 u32 daddr = iph->daddr; 1403 __be32 daddr = iph->daddr;
1390 unsigned short est_mtu = 0; 1404 unsigned short est_mtu = 0;
1391 1405
1392 if (ipv4_config.no_pmtu_disc) 1406 if (ipv4_config.no_pmtu_disc)
1393 return 0; 1407 return 0;
1394 1408
1395 for (i = 0; i < 2; i++) { 1409 for (i = 0; i < 2; i++) {
1396 unsigned hash = rt_hash_code(daddr, skeys[i]); 1410 unsigned hash = rt_hash(daddr, skeys[i], 0);
1397 1411
1398 rcu_read_lock(); 1412 rcu_read_lock();
1399 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1413 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1447,6 +1461,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1447 } 1461 }
1448 dst->metrics[RTAX_MTU-1] = mtu; 1462 dst->metrics[RTAX_MTU-1] = mtu;
1449 dst_set_expires(dst, ip_rt_mtu_expires); 1463 dst_set_expires(dst, ip_rt_mtu_expires);
1464 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1450 } 1465 }
1451} 1466}
1452 1467
@@ -1517,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb)
1517 1532
1518void ip_rt_get_source(u8 *addr, struct rtable *rt) 1533void ip_rt_get_source(u8 *addr, struct rtable *rt)
1519{ 1534{
1520 u32 src; 1535 __be32 src;
1521 struct fib_result res; 1536 struct fib_result res;
1522 1537
1523 if (rt->fl.iif == 0) 1538 if (rt->fl.iif == 0)
@@ -1583,12 +1598,12 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1583 rt->rt_type = res->type; 1598 rt->rt_type = res->type;
1584} 1599}
1585 1600
1586static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, 1601static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1587 u8 tos, struct net_device *dev, int our) 1602 u8 tos, struct net_device *dev, int our)
1588{ 1603{
1589 unsigned hash; 1604 unsigned hash;
1590 struct rtable *rth; 1605 struct rtable *rth;
1591 u32 spec_dst; 1606 __be32 spec_dst;
1592 struct in_device *in_dev = in_dev_get(dev); 1607 struct in_device *in_dev = in_dev_get(dev);
1593 u32 itag = 0; 1608 u32 itag = 0;
1594 1609
@@ -1652,7 +1667,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
1652 RT_CACHE_STAT_INC(in_slow_mc); 1667 RT_CACHE_STAT_INC(in_slow_mc);
1653 1668
1654 in_dev_put(in_dev); 1669 in_dev_put(in_dev);
1655 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); 1670 hash = rt_hash(daddr, saddr, dev->ifindex);
1656 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1671 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
1657 1672
1658e_nobufs: 1673e_nobufs:
@@ -1668,8 +1683,8 @@ e_inval:
1668static void ip_handle_martian_source(struct net_device *dev, 1683static void ip_handle_martian_source(struct net_device *dev,
1669 struct in_device *in_dev, 1684 struct in_device *in_dev,
1670 struct sk_buff *skb, 1685 struct sk_buff *skb,
1671 u32 daddr, 1686 __be32 daddr,
1672 u32 saddr) 1687 __be32 saddr)
1673{ 1688{
1674 RT_CACHE_STAT_INC(in_martian_src); 1689 RT_CACHE_STAT_INC(in_martian_src);
1675#ifdef CONFIG_IP_ROUTE_VERBOSE 1690#ifdef CONFIG_IP_ROUTE_VERBOSE
@@ -1699,7 +1714,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1699static inline int __mkroute_input(struct sk_buff *skb, 1714static inline int __mkroute_input(struct sk_buff *skb,
1700 struct fib_result* res, 1715 struct fib_result* res,
1701 struct in_device *in_dev, 1716 struct in_device *in_dev,
1702 u32 daddr, u32 saddr, u32 tos, 1717 __be32 daddr, __be32 saddr, u32 tos,
1703 struct rtable **result) 1718 struct rtable **result)
1704{ 1719{
1705 1720
@@ -1707,7 +1722,8 @@ static inline int __mkroute_input(struct sk_buff *skb,
1707 int err; 1722 int err;
1708 struct in_device *out_dev; 1723 struct in_device *out_dev;
1709 unsigned flags = 0; 1724 unsigned flags = 0;
1710 u32 spec_dst, itag; 1725 __be32 spec_dst;
1726 u32 itag;
1711 1727
1712 /* get a working reference to the output device */ 1728 /* get a working reference to the output device */
1713 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1729 out_dev = in_dev_get(FIB_RES_DEV(*res));
@@ -1800,7 +1816,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
1800 struct fib_result* res, 1816 struct fib_result* res,
1801 const struct flowi *fl, 1817 const struct flowi *fl,
1802 struct in_device *in_dev, 1818 struct in_device *in_dev,
1803 u32 daddr, u32 saddr, u32 tos) 1819 __be32 daddr, __be32 saddr, u32 tos)
1804{ 1820{
1805 struct rtable* rth = NULL; 1821 struct rtable* rth = NULL;
1806 int err; 1822 int err;
@@ -1817,7 +1833,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
1817 return err; 1833 return err;
1818 1834
1819 /* put it into the cache */ 1835 /* put it into the cache */
1820 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); 1836 hash = rt_hash(daddr, saddr, fl->iif);
1821 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1837 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1822} 1838}
1823 1839
@@ -1825,7 +1841,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1825 struct fib_result* res, 1841 struct fib_result* res,
1826 const struct flowi *fl, 1842 const struct flowi *fl,
1827 struct in_device *in_dev, 1843 struct in_device *in_dev,
1828 u32 daddr, u32 saddr, u32 tos) 1844 __be32 daddr, __be32 saddr, u32 tos)
1829{ 1845{
1830#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1846#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1831 struct rtable* rth = NULL, *rtres; 1847 struct rtable* rth = NULL, *rtres;
@@ -1858,7 +1874,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1858 return err; 1874 return err;
1859 1875
1860 /* put it into the cache */ 1876 /* put it into the cache */
1861 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); 1877 hash = rt_hash(daddr, saddr, fl->iif);
1862 err = rt_intern_hash(hash, rth, &rtres); 1878 err = rt_intern_hash(hash, rth, &rtres);
1863 if (err) 1879 if (err)
1864 return err; 1880 return err;
@@ -1888,7 +1904,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1888 * 2. IP spoofing attempts are filtered with 100% of guarantee. 1904 * 2. IP spoofing attempts are filtered with 100% of guarantee.
1889 */ 1905 */
1890 1906
1891static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, 1907static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1892 u8 tos, struct net_device *dev) 1908 u8 tos, struct net_device *dev)
1893{ 1909{
1894 struct fib_result res; 1910 struct fib_result res;
@@ -1907,7 +1923,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
1907 u32 itag = 0; 1923 u32 itag = 0;
1908 struct rtable * rth; 1924 struct rtable * rth;
1909 unsigned hash; 1925 unsigned hash;
1910 u32 spec_dst; 1926 __be32 spec_dst;
1911 int err = -EINVAL; 1927 int err = -EINVAL;
1912 int free_res = 0; 1928 int free_res = 0;
1913 1929
@@ -1923,7 +1939,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
1923 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) 1939 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
1924 goto martian_source; 1940 goto martian_source;
1925 1941
1926 if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) 1942 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
1927 goto brd_input; 1943 goto brd_input;
1928 1944
1929 /* Accept zero addresses only to limited broadcast; 1945 /* Accept zero addresses only to limited broadcast;
@@ -2035,7 +2051,7 @@ local_input:
2035 rth->rt_flags &= ~RTCF_LOCAL; 2051 rth->rt_flags &= ~RTCF_LOCAL;
2036 } 2052 }
2037 rth->rt_type = res.type; 2053 rth->rt_type = res.type;
2038 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); 2054 hash = rt_hash(daddr, saddr, fl.iif);
2039 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2055 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
2040 goto done; 2056 goto done;
2041 2057
@@ -2074,7 +2090,7 @@ martian_source:
2074 goto e_inval; 2090 goto e_inval;
2075} 2091}
2076 2092
2077int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, 2093int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2078 u8 tos, struct net_device *dev) 2094 u8 tos, struct net_device *dev)
2079{ 2095{
2080 struct rtable * rth; 2096 struct rtable * rth;
@@ -2082,7 +2098,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
2082 int iif = dev->ifindex; 2098 int iif = dev->ifindex;
2083 2099
2084 tos &= IPTOS_RT_MASK; 2100 tos &= IPTOS_RT_MASK;
2085 hash = rt_hash_code(daddr, saddr ^ (iif << 5)); 2101 hash = rt_hash(daddr, saddr, iif);
2086 2102
2087 rcu_read_lock(); 2103 rcu_read_lock();
2088 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2104 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2156,7 +2172,7 @@ static inline int __mkroute_output(struct rtable **result,
2156 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2172 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
2157 return -EINVAL; 2173 return -EINVAL;
2158 2174
2159 if (fl->fl4_dst == 0xFFFFFFFF) 2175 if (fl->fl4_dst == htonl(0xFFFFFFFF))
2160 res->type = RTN_BROADCAST; 2176 res->type = RTN_BROADCAST;
2161 else if (MULTICAST(fl->fl4_dst)) 2177 else if (MULTICAST(fl->fl4_dst))
2162 res->type = RTN_MULTICAST; 2178 res->type = RTN_MULTICAST;
@@ -2280,8 +2296,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
2280 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2296 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2281 unsigned hash; 2297 unsigned hash;
2282 if (err == 0) { 2298 if (err == 0) {
2283 hash = rt_hash_code(oldflp->fl4_dst, 2299 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
2284 oldflp->fl4_src ^ (oldflp->oif << 5));
2285 err = rt_intern_hash(hash, rth, rp); 2300 err = rt_intern_hash(hash, rth, rp);
2286 } 2301 }
2287 2302
@@ -2323,9 +2338,8 @@ static inline int ip_mkroute_output(struct rtable** rp,
2323 if (err != 0) 2338 if (err != 0)
2324 goto cleanup; 2339 goto cleanup;
2325 2340
2326 hash = rt_hash_code(oldflp->fl4_dst, 2341 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
2327 oldflp->fl4_src ^ 2342 oldflp->oif);
2328 (oldflp->oif << 5));
2329 err = rt_intern_hash(hash, rth, rp); 2343 err = rt_intern_hash(hash, rth, rp);
2330 2344
2331 /* forward hop information to multipath impl. */ 2345 /* forward hop information to multipath impl. */
@@ -2404,7 +2418,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2404 */ 2418 */
2405 2419
2406 if (oldflp->oif == 0 2420 if (oldflp->oif == 0
2407 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) { 2421 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2408 /* Special hack: user can direct multicasts 2422 /* Special hack: user can direct multicasts
2409 and limited broadcast via necessary interface 2423 and limited broadcast via necessary interface
2410 without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 2424 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2441,7 +2455,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2441 goto out; /* Wrong error code */ 2455 goto out; /* Wrong error code */
2442 } 2456 }
2443 2457
2444 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { 2458 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
2445 if (!fl.fl4_src) 2459 if (!fl.fl4_src)
2446 fl.fl4_src = inet_select_addr(dev_out, 0, 2460 fl.fl4_src = inet_select_addr(dev_out, 0,
2447 RT_SCOPE_LINK); 2461 RT_SCOPE_LINK);
@@ -2554,7 +2568,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2554 unsigned hash; 2568 unsigned hash;
2555 struct rtable *rth; 2569 struct rtable *rth;
2556 2570
2557 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); 2571 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
2558 2572
2559 rcu_read_lock_bh(); 2573 rcu_read_lock_bh();
2560 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2574 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2626,51 +2640,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2626{ 2640{
2627 struct rtable *rt = (struct rtable*)skb->dst; 2641 struct rtable *rt = (struct rtable*)skb->dst;
2628 struct rtmsg *r; 2642 struct rtmsg *r;
2629 struct nlmsghdr *nlh; 2643 struct nlmsghdr *nlh;
2630 unsigned char *b = skb->tail;
2631 struct rta_cacheinfo ci; 2644 struct rta_cacheinfo ci;
2632#ifdef CONFIG_IP_MROUTE 2645
2633 struct rtattr *eptr; 2646 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2634#endif 2647 if (nlh == NULL)
2635 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 2648 return -ENOBUFS;
2636 r = NLMSG_DATA(nlh); 2649
2650 r = nlmsg_data(nlh);
2637 r->rtm_family = AF_INET; 2651 r->rtm_family = AF_INET;
2638 r->rtm_dst_len = 32; 2652 r->rtm_dst_len = 32;
2639 r->rtm_src_len = 0; 2653 r->rtm_src_len = 0;
2640 r->rtm_tos = rt->fl.fl4_tos; 2654 r->rtm_tos = rt->fl.fl4_tos;
2641 r->rtm_table = RT_TABLE_MAIN; 2655 r->rtm_table = RT_TABLE_MAIN;
2656 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2642 r->rtm_type = rt->rt_type; 2657 r->rtm_type = rt->rt_type;
2643 r->rtm_scope = RT_SCOPE_UNIVERSE; 2658 r->rtm_scope = RT_SCOPE_UNIVERSE;
2644 r->rtm_protocol = RTPROT_UNSPEC; 2659 r->rtm_protocol = RTPROT_UNSPEC;
2645 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2660 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2646 if (rt->rt_flags & RTCF_NOTIFY) 2661 if (rt->rt_flags & RTCF_NOTIFY)
2647 r->rtm_flags |= RTM_F_NOTIFY; 2662 r->rtm_flags |= RTM_F_NOTIFY;
2648 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); 2663
2664 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2665
2649 if (rt->fl.fl4_src) { 2666 if (rt->fl.fl4_src) {
2650 r->rtm_src_len = 32; 2667 r->rtm_src_len = 32;
2651 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); 2668 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2652 } 2669 }
2653 if (rt->u.dst.dev) 2670 if (rt->u.dst.dev)
2654 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 2671 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
2655#ifdef CONFIG_NET_CLS_ROUTE 2672#ifdef CONFIG_NET_CLS_ROUTE
2656 if (rt->u.dst.tclassid) 2673 if (rt->u.dst.tclassid)
2657 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); 2674 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2658#endif 2675#endif
2659#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2676#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2660 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { 2677 if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
2661 __u32 alg = rt->rt_multipath_alg; 2678 NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
2662
2663 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
2664 }
2665#endif 2679#endif
2666 if (rt->fl.iif) 2680 if (rt->fl.iif)
2667 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); 2681 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2668 else if (rt->rt_src != rt->fl.fl4_src) 2682 else if (rt->rt_src != rt->fl.fl4_src)
2669 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); 2683 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2684
2670 if (rt->rt_dst != rt->rt_gateway) 2685 if (rt->rt_dst != rt->rt_gateway)
2671 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); 2686 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2687
2672 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2688 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2673 goto rtattr_failure; 2689 goto nla_put_failure;
2690
2674 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2691 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2675 ci.rta_used = rt->u.dst.__use; 2692 ci.rta_used = rt->u.dst.__use;
2676 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2693 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
@@ -2687,13 +2704,10 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2687 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2704 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
2688 } 2705 }
2689 } 2706 }
2690#ifdef CONFIG_IP_MROUTE 2707
2691 eptr = (struct rtattr*)skb->tail;
2692#endif
2693 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2694 if (rt->fl.iif) { 2708 if (rt->fl.iif) {
2695#ifdef CONFIG_IP_MROUTE 2709#ifdef CONFIG_IP_MROUTE
2696 u32 dst = rt->rt_dst; 2710 __be32 dst = rt->rt_dst;
2697 2711
2698 if (MULTICAST(dst) && !LOCAL_MCAST(dst) && 2712 if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
2699 ipv4_devconf.mc_forwarding) { 2713 ipv4_devconf.mc_forwarding) {
@@ -2702,41 +2716,48 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2702 if (!nowait) { 2716 if (!nowait) {
2703 if (err == 0) 2717 if (err == 0)
2704 return 0; 2718 return 0;
2705 goto nlmsg_failure; 2719 goto nla_put_failure;
2706 } else { 2720 } else {
2707 if (err == -EMSGSIZE) 2721 if (err == -EMSGSIZE)
2708 goto nlmsg_failure; 2722 goto nla_put_failure;
2709 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; 2723 ci.rta_error = err;
2710 } 2724 }
2711 } 2725 }
2712 } else 2726 } else
2713#endif 2727#endif
2714 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 2728 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2715 } 2729 }
2716 2730
2717 nlh->nlmsg_len = skb->tail - b; 2731 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2718 return skb->len;
2719 2732
2720nlmsg_failure: 2733 return nlmsg_end(skb, nlh);
2721rtattr_failure: 2734
2722 skb_trim(skb, b - skb->data); 2735nla_put_failure:
2723 return -1; 2736 return nlmsg_cancel(skb, nlh);
2724} 2737}
2725 2738
2726int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2739int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2727{ 2740{
2728 struct rtattr **rta = arg; 2741 struct rtmsg *rtm;
2729 struct rtmsg *rtm = NLMSG_DATA(nlh); 2742 struct nlattr *tb[RTA_MAX+1];
2730 struct rtable *rt = NULL; 2743 struct rtable *rt = NULL;
2731 u32 dst = 0; 2744 __be32 dst = 0;
2732 u32 src = 0; 2745 __be32 src = 0;
2733 int iif = 0; 2746 u32 iif;
2734 int err = -ENOBUFS; 2747 int err;
2735 struct sk_buff *skb; 2748 struct sk_buff *skb;
2736 2749
2750 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2751 if (err < 0)
2752 goto errout;
2753
2754 rtm = nlmsg_data(nlh);
2755
2737 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2756 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2738 if (!skb) 2757 if (skb == NULL) {
2739 goto out; 2758 err = -ENOBUFS;
2759 goto errout;
2760 }
2740 2761
2741 /* Reserve room for dummy headers, this skb can pass 2762 /* Reserve room for dummy headers, this skb can pass
2742 through good chunk of routing engine. 2763 through good chunk of routing engine.
@@ -2747,62 +2768,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2747 skb->nh.iph->protocol = IPPROTO_ICMP; 2768 skb->nh.iph->protocol = IPPROTO_ICMP;
2748 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2769 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2749 2770
2750 if (rta[RTA_SRC - 1]) 2771 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2751 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); 2772 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2752 if (rta[RTA_DST - 1]) 2773 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2753 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
2754 if (rta[RTA_IIF - 1])
2755 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
2756 2774
2757 if (iif) { 2775 if (iif) {
2758 struct net_device *dev = __dev_get_by_index(iif); 2776 struct net_device *dev;
2759 err = -ENODEV; 2777
2760 if (!dev) 2778 dev = __dev_get_by_index(iif);
2761 goto out_free; 2779 if (dev == NULL) {
2780 err = -ENODEV;
2781 goto errout_free;
2782 }
2783
2762 skb->protocol = htons(ETH_P_IP); 2784 skb->protocol = htons(ETH_P_IP);
2763 skb->dev = dev; 2785 skb->dev = dev;
2764 local_bh_disable(); 2786 local_bh_disable();
2765 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2787 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2766 local_bh_enable(); 2788 local_bh_enable();
2767 rt = (struct rtable*)skb->dst; 2789
2768 if (!err && rt->u.dst.error) 2790 rt = (struct rtable*) skb->dst;
2791 if (err == 0 && rt->u.dst.error)
2769 err = -rt->u.dst.error; 2792 err = -rt->u.dst.error;
2770 } else { 2793 } else {
2771 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, 2794 struct flowi fl = {
2772 .saddr = src, 2795 .nl_u = {
2773 .tos = rtm->rtm_tos } } }; 2796 .ip4_u = {
2774 int oif = 0; 2797 .daddr = dst,
2775 if (rta[RTA_OIF - 1]) 2798 .saddr = src,
2776 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 2799 .tos = rtm->rtm_tos,
2777 fl.oif = oif; 2800 },
2801 },
2802 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2803 };
2778 err = ip_route_output_key(&rt, &fl); 2804 err = ip_route_output_key(&rt, &fl);
2779 } 2805 }
2806
2780 if (err) 2807 if (err)
2781 goto out_free; 2808 goto errout_free;
2782 2809
2783 skb->dst = &rt->u.dst; 2810 skb->dst = &rt->u.dst;
2784 if (rtm->rtm_flags & RTM_F_NOTIFY) 2811 if (rtm->rtm_flags & RTM_F_NOTIFY)
2785 rt->rt_flags |= RTCF_NOTIFY; 2812 rt->rt_flags |= RTCF_NOTIFY;
2786 2813
2787 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2788
2789 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2814 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2790 RTM_NEWROUTE, 0, 0); 2815 RTM_NEWROUTE, 0, 0);
2791 if (!err) 2816 if (err <= 0)
2792 goto out_free; 2817 goto errout_free;
2793 if (err < 0) {
2794 err = -EMSGSIZE;
2795 goto out_free;
2796 }
2797 2818
2798 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2819 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2799 if (err > 0) 2820errout:
2800 err = 0; 2821 return err;
2801out: return err;
2802 2822
2803out_free: 2823errout_free:
2804 kfree_skb(skb); 2824 kfree_skb(skb);
2805 goto out; 2825 goto errout;
2806} 2826}
2807 2827
2808int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) 2828int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3130,13 +3150,9 @@ int __init ip_rt_init(void)
3130 } 3150 }
3131#endif 3151#endif
3132 3152
3133 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3153 ipv4_dst_ops.kmem_cachep =
3134 sizeof(struct rtable), 3154 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3135 0, SLAB_HWCACHE_ALIGN, 3155 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
3136 NULL, NULL);
3137
3138 if (!ipv4_dst_ops.kmem_cachep)
3139 panic("IP: failed to allocate ip_dst_cache\n");
3140 3156
3141 rt_hash_table = (struct rt_hash_bucket *) 3157 rt_hash_table = (struct rt_hash_bucket *)
3142 alloc_large_system_hash("IP route cache", 3158 alloc_large_system_hash("IP route cache",
@@ -3144,7 +3160,7 @@ int __init ip_rt_init(void)
3144 rhash_entries, 3160 rhash_entries,
3145 (num_physpages >= 128 * 1024) ? 3161 (num_physpages >= 128 * 1024) ?
3146 15 : 17, 3162 15 : 17,
3147 HASH_HIGHMEM, 3163 0,
3148 &rt_hash_log, 3164 &rt_hash_log,
3149 &rt_hash_mask, 3165 &rt_hash_mask,
3150 0); 3166 0);