aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/bluetooth/hidp/core.c214
-rw-r--r--net/bluetooth/hidp/hidp.h2
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/ipv4/cipso_ipv4.c656
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/ndisc.c11
-rw-r--r--net/netfilter/nf_conntrack_acct.c2
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netlabel/Makefile3
-rw-r--r--net/netlabel/netlabel_addrlist.c388
-rw-r--r--net/netlabel/netlabel_addrlist.h189
-rw-r--r--net/netlabel/netlabel_cipso_v4.c136
-rw-r--r--net/netlabel/netlabel_cipso_v4.h10
-rw-r--r--net/netlabel/netlabel_domainhash.c393
-rw-r--r--net/netlabel/netlabel_domainhash.h40
-rw-r--r--net/netlabel/netlabel_kapi.c272
-rw-r--r--net/netlabel/netlabel_mgmt.c410
-rw-r--r--net/netlabel/netlabel_mgmt.h59
-rw-r--r--net/netlabel/netlabel_unlabeled.c456
-rw-r--r--net/rfkill/rfkill-input.c1
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c121
-rw-r--r--net/sunrpc/svc.c251
-rw-r--r--net/sunrpc/svc_xprt.c39
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xprt.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c29
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c187
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c255
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c364
-rw-r--r--net/sunrpc/xprtrdma/transport.c41
-rw-r--r--net/sunrpc/xprtrdma/verbs.c741
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
40 files changed, 3950 insertions, 1421 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 10e320307ec0..e053e06028a5 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -52,7 +52,7 @@ enum {
52 Opt_err, 52 Opt_err,
53}; 53};
54 54
55static match_table_t tokens = { 55static const match_table_t tokens = {
56 {Opt_msize, "msize=%u"}, 56 {Opt_msize, "msize=%u"},
57 {Opt_legacy, "noextend"}, 57 {Opt_legacy, "noextend"},
58 {Opt_trans, "trans=%s"}, 58 {Opt_trans, "trans=%s"},
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d652baf5ff91..6dabbdb66651 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -86,7 +86,7 @@ enum {
86 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, 86 Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
87}; 87};
88 88
89static match_table_t tokens = { 89static const match_table_t tokens = {
90 {Opt_port, "port=%u"}, 90 {Opt_port, "port=%u"},
91 {Opt_rfdno, "rfdno=%u"}, 91 {Opt_rfdno, "rfdno=%u"},
92 {Opt_wfdno, "wfdno=%u"}, 92 {Opt_wfdno, "wfdno=%u"},
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 96434d774c84..acdeab3d9807 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -578,7 +578,7 @@ static int hidp_session(void *arg)
578 if (session->hid) { 578 if (session->hid) {
579 if (session->hid->claimed & HID_CLAIMED_INPUT) 579 if (session->hid->claimed & HID_CLAIMED_INPUT)
580 hidinput_disconnect(session->hid); 580 hidinput_disconnect(session->hid);
581 hid_free_device(session->hid); 581 hid_destroy_device(session->hid);
582 } 582 }
583 583
584 /* Wakeup user-space polling for socket errors */ 584 /* Wakeup user-space polling for socket errors */
@@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session)
623static int hidp_setup_input(struct hidp_session *session, 623static int hidp_setup_input(struct hidp_session *session,
624 struct hidp_connadd_req *req) 624 struct hidp_connadd_req *req)
625{ 625{
626 struct input_dev *input = session->input; 626 struct input_dev *input;
627 int i; 627 int i;
628 628
629 input = input_allocate_device();
630 if (!input)
631 return -ENOMEM;
632
633 session->input = input;
634
629 input_set_drvdata(input, session); 635 input_set_drvdata(input, session);
630 636
631 input->name = "Bluetooth HID Boot Protocol Device"; 637 input->name = "Bluetooth HID Boot Protocol Device";
@@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid)
677{ 683{
678} 684}
679 685
680static const struct { 686static int hidp_parse(struct hid_device *hid)
681 __u16 idVendor; 687{
682 __u16 idProduct; 688 struct hidp_session *session = hid->driver_data;
683 unsigned quirks; 689 struct hidp_connadd_req *req = session->req;
684} hidp_blacklist[] = { 690 unsigned char *buf;
685 /* Apple wireless Mighty Mouse */ 691 int ret;
686 { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
687 692
688 { } /* Terminating entry */ 693 buf = kmalloc(req->rd_size, GFP_KERNEL);
689}; 694 if (!buf)
695 return -ENOMEM;
696
697 if (copy_from_user(buf, req->rd_data, req->rd_size)) {
698 kfree(buf);
699 return -EFAULT;
700 }
701
702 ret = hid_parse_report(session->hid, buf, req->rd_size);
703
704 kfree(buf);
705
706 if (ret)
707 return ret;
708
709 session->req = NULL;
710
711 return 0;
712}
713
714static int hidp_start(struct hid_device *hid)
715{
716 struct hidp_session *session = hid->driver_data;
717 struct hid_report *report;
690 718
691static void hidp_setup_quirks(struct hid_device *hid) 719 list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
720 report_list, list)
721 hidp_send_report(session, report);
722
723 list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
724 report_list, list)
725 hidp_send_report(session, report);
726
727 return 0;
728}
729
730static void hidp_stop(struct hid_device *hid)
692{ 731{
693 unsigned int n; 732 struct hidp_session *session = hid->driver_data;
733
734 skb_queue_purge(&session->ctrl_transmit);
735 skb_queue_purge(&session->intr_transmit);
694 736
695 for (n = 0; hidp_blacklist[n].idVendor; n++) 737 if (hid->claimed & HID_CLAIMED_INPUT)
696 if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) && 738 hidinput_disconnect(hid);
697 hidp_blacklist[n].idProduct == le16_to_cpu(hid->product)) 739 hid->claimed = 0;
698 hid->quirks = hidp_blacklist[n].quirks;
699} 740}
700 741
701static void hidp_setup_hid(struct hidp_session *session, 742static struct hid_ll_driver hidp_hid_driver = {
743 .parse = hidp_parse,
744 .start = hidp_start,
745 .stop = hidp_stop,
746 .open = hidp_open,
747 .close = hidp_close,
748 .hidinput_input_event = hidp_hidinput_event,
749};
750
751static int hidp_setup_hid(struct hidp_session *session,
702 struct hidp_connadd_req *req) 752 struct hidp_connadd_req *req)
703{ 753{
704 struct hid_device *hid = session->hid; 754 struct hid_device *hid;
705 struct hid_report *report;
706 bdaddr_t src, dst; 755 bdaddr_t src, dst;
756 int ret;
707 757
708 baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); 758 hid = hid_allocate_device();
709 baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); 759 if (IS_ERR(hid)) {
760 ret = PTR_ERR(session->hid);
761 goto err;
762 }
710 763
764 session->hid = hid;
765 session->req = req;
711 hid->driver_data = session; 766 hid->driver_data = session;
712 767
713 hid->country = req->country; 768 baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
769 baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
714 770
715 hid->bus = BUS_BLUETOOTH; 771 hid->bus = BUS_BLUETOOTH;
716 hid->vendor = req->vendor; 772 hid->vendor = req->vendor;
717 hid->product = req->product; 773 hid->product = req->product;
718 hid->version = req->version; 774 hid->version = req->version;
775 hid->country = req->country;
719 776
720 strncpy(hid->name, req->name, 128); 777 strncpy(hid->name, req->name, 128);
721 strncpy(hid->phys, batostr(&src), 64); 778 strncpy(hid->phys, batostr(&src), 64);
722 strncpy(hid->uniq, batostr(&dst), 64); 779 strncpy(hid->uniq, batostr(&dst), 64);
723 780
724 hid->dev = hidp_get_device(session); 781 hid->dev.parent = hidp_get_device(session);
725 782 hid->ll_driver = &hidp_hid_driver;
726 hid->hid_open = hidp_open;
727 hid->hid_close = hidp_close;
728
729 hid->hidinput_input_event = hidp_hidinput_event;
730 783
731 hidp_setup_quirks(hid); 784 ret = hid_add_device(hid);
785 if (ret)
786 goto err_hid;
732 787
733 list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list) 788 return 0;
734 hidp_send_report(session, report); 789err_hid:
735 790 hid_destroy_device(hid);
736 list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) 791 session->hid = NULL;
737 hidp_send_report(session, report); 792err:
738 793 return ret;
739 if (hidinput_connect(hid) == 0)
740 hid->claimed |= HID_CLAIMED_INPUT;
741} 794}
742 795
743int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) 796int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
@@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
757 810
758 BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); 811 BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
759 812
760 if (req->rd_size > 0) {
761 unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL);
762
763 if (!buf) {
764 kfree(session);
765 return -ENOMEM;
766 }
767
768 if (copy_from_user(buf, req->rd_data, req->rd_size)) {
769 kfree(buf);
770 kfree(session);
771 return -EFAULT;
772 }
773
774 session->hid = hid_parse_report(buf, req->rd_size);
775
776 kfree(buf);
777
778 if (!session->hid) {
779 kfree(session);
780 return -EINVAL;
781 }
782 }
783
784 if (!session->hid) {
785 session->input = input_allocate_device();
786 if (!session->input) {
787 kfree(session);
788 return -ENOMEM;
789 }
790 }
791
792 down_write(&hidp_session_sem); 813 down_write(&hidp_session_sem);
793 814
794 s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); 815 s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
@@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
816 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); 837 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
817 session->idle_to = req->idle_to; 838 session->idle_to = req->idle_to;
818 839
819 if (session->input) { 840 if (req->rd_size > 0) {
841 err = hidp_setup_hid(session, req);
842 if (err && err != -ENODEV)
843 goto err_skb;
844 }
845
846 if (!session->hid) {
820 err = hidp_setup_input(session, req); 847 err = hidp_setup_input(session, req);
821 if (err < 0) 848 if (err < 0)
822 goto failed; 849 goto err_skb;
823 } 850 }
824 851
825 if (session->hid)
826 hidp_setup_hid(session, req);
827
828 __hidp_link_session(session); 852 __hidp_link_session(session);
829 853
830 hidp_set_timer(session); 854 hidp_set_timer(session);
@@ -850,17 +874,16 @@ unlink:
850 874
851 __hidp_unlink_session(session); 875 __hidp_unlink_session(session);
852 876
853 if (session->input) { 877 if (session->input)
854 input_unregister_device(session->input); 878 input_unregister_device(session->input);
855 session->input = NULL; /* don't try to free it here */ 879 if (session->hid)
856 } 880 hid_destroy_device(session->hid);
857 881err_skb:
882 skb_queue_purge(&session->ctrl_transmit);
883 skb_queue_purge(&session->intr_transmit);
858failed: 884failed:
859 up_write(&hidp_session_sem); 885 up_write(&hidp_session_sem);
860 886
861 if (session->hid)
862 hid_free_device(session->hid);
863
864 input_free_device(session->input); 887 input_free_device(session->input);
865 kfree(session); 888 kfree(session);
866 return err; 889 return err;
@@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci)
950 return err; 973 return err;
951} 974}
952 975
976static const struct hid_device_id hidp_table[] = {
977 { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) },
978 { }
979};
980
981static struct hid_driver hidp_driver = {
982 .name = "generic-bluetooth",
983 .id_table = hidp_table,
984};
985
953static int __init hidp_init(void) 986static int __init hidp_init(void)
954{ 987{
988 int ret;
989
955 l2cap_load(); 990 l2cap_load();
956 991
957 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); 992 BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
958 993
959 return hidp_init_sockets(); 994 ret = hid_register_driver(&hidp_driver);
995 if (ret)
996 goto err;
997
998 ret = hidp_init_sockets();
999 if (ret)
1000 goto err_drv;
1001
1002 return 0;
1003err_drv:
1004 hid_unregister_driver(&hidp_driver);
1005err:
1006 return ret;
960} 1007}
961 1008
962static void __exit hidp_exit(void) 1009static void __exit hidp_exit(void)
963{ 1010{
964 hidp_cleanup_sockets(); 1011 hidp_cleanup_sockets();
1012 hid_unregister_driver(&hidp_driver);
965} 1013}
966 1014
967module_init(hidp_init); 1015module_init(hidp_init);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 343fb0566b3e..e503c89057ad 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -151,6 +151,8 @@ struct hidp_session {
151 151
152 struct sk_buff_head ctrl_transmit; 152 struct sk_buff_head ctrl_transmit;
153 struct sk_buff_head intr_transmit; 153 struct sk_buff_head intr_transmit;
154
155 struct hidp_connadd_req *req;
154}; 156};
155 157
156static inline void hidp_schedule(struct hidp_session *session) 158static inline void hidp_schedule(struct hidp_session *session)
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4fd4a4f74e82..28e26bd08e24 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */
166 166
167static int dn_forwarding_proc(ctl_table *, int, struct file *, 167static int dn_forwarding_proc(ctl_table *, int, struct file *,
168 void __user *, size_t *, loff_t *); 168 void __user *, size_t *, loff_t *);
169static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, 169static int dn_forwarding_sysctl(ctl_table *table,
170 void __user *oldval, size_t __user *oldlenp, 170 void __user *oldval, size_t __user *oldlenp,
171 void __user *newval, size_t newlen); 171 void __user *newval, size_t newlen);
172 172
@@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
318#endif 318#endif
319} 319}
320 320
321static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, 321static int dn_forwarding_sysctl(ctl_table *table,
322 void __user *oldval, size_t __user *oldlenp, 322 void __user *oldval, size_t __user *oldlenp,
323 void __user *newval, size_t newlen) 323 void __user *newval, size_t newlen)
324{ 324{
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 228067c571ba..36400b266896 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str)
132} 132}
133 133
134 134
135static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, 135static int dn_node_address_strategy(ctl_table *table,
136 void __user *oldval, size_t __user *oldlenp, 136 void __user *oldval, size_t __user *oldlenp,
137 void __user *newval, size_t newlen) 137 void __user *newval, size_t newlen)
138{ 138{
@@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
217} 217}
218 218
219 219
220static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, 220static int dn_def_dev_strategy(ctl_table *table,
221 void __user *oldval, size_t __user *oldlenp, 221 void __user *oldval, size_t __user *oldlenp,
222 void __user *newval, size_t newlen) 222 void __user *newval, size_t newlen)
223{ 223{
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c0e4572cc90..490e035c6d90 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15/* 15/*
16 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 16 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
17 * 17 *
18 * This program is free software; you can redistribute it and/or modify 18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by 19 * it under the terms of the GNU General Public License as published by
@@ -47,17 +47,7 @@
47#include <asm/bug.h> 47#include <asm/bug.h>
48#include <asm/unaligned.h> 48#include <asm/unaligned.h>
49 49
50struct cipso_v4_domhsh_entry {
51 char *domain;
52 u32 valid;
53 struct list_head list;
54 struct rcu_head rcu;
55};
56
57/* List of available DOI definitions */ 50/* List of available DOI definitions */
58/* XXX - Updates should be minimal so having a single lock for the
59 * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
60 * okay. */
61/* XXX - This currently assumes a minimal number of different DOIs in use, 51/* XXX - This currently assumes a minimal number of different DOIs in use,
62 * if in practice there are a lot of different DOIs this list should 52 * if in practice there are a lot of different DOIs this list should
63 * probably be turned into a hash table or something similar so we 53 * probably be turned into a hash table or something similar so we
@@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1;
119 * be omitted. */ 109 * be omitted. */
120#define CIPSO_V4_TAG_RNG_CAT_MAX 8 110#define CIPSO_V4_TAG_RNG_CAT_MAX 8
121 111
112/* Base length of the local tag (non-standard tag).
113 * Tag definition (may change between kernel versions)
114 *
115 * 0 8 16 24 32
116 * +----------+----------+----------+----------+
117 * | 10000000 | 00000110 | 32-bit secid value |
118 * +----------+----------+----------+----------+
119 * | in (host byte order)|
120 * +----------+----------+
121 *
122 */
123#define CIPSO_V4_TAG_LOC_BLEN 6
124
122/* 125/*
123 * Helper Functions 126 * Helper Functions
124 */ 127 */
@@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
194} 197}
195 198
196/** 199/**
197 * cipso_v4_doi_domhsh_free - Frees a domain list entry
198 * @entry: the entry's RCU field
199 *
200 * Description:
201 * This function is designed to be used as a callback to the call_rcu()
202 * function so that the memory allocated to a domain list entry can be released
203 * safely.
204 *
205 */
206static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
207{
208 struct cipso_v4_domhsh_entry *ptr;
209
210 ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
211 kfree(ptr->domain);
212 kfree(ptr);
213}
214
215/**
216 * cipso_v4_cache_entry_free - Frees a cache entry 200 * cipso_v4_cache_entry_free - Frees a cache entry
217 * @entry: the entry to free 201 * @entry: the entry to free
218 * 202 *
@@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
457 struct cipso_v4_doi *iter; 441 struct cipso_v4_doi *iter;
458 442
459 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) 443 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
460 if (iter->doi == doi && iter->valid) 444 if (iter->doi == doi && atomic_read(&iter->refcount))
461 return iter; 445 return iter;
462 return NULL; 446 return NULL;
463} 447}
@@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
496 if (doi_def->type != CIPSO_V4_MAP_PASS) 480 if (doi_def->type != CIPSO_V4_MAP_PASS)
497 return -EINVAL; 481 return -EINVAL;
498 break; 482 break;
483 case CIPSO_V4_TAG_LOCAL:
484 if (doi_def->type != CIPSO_V4_MAP_LOCAL)
485 return -EINVAL;
486 break;
499 default: 487 default:
500 return -EINVAL; 488 return -EINVAL;
501 } 489 }
502 } 490 }
503 491
504 doi_def->valid = 1; 492 atomic_set(&doi_def->refcount, 1);
505 INIT_RCU_HEAD(&doi_def->rcu); 493 INIT_RCU_HEAD(&doi_def->rcu);
506 INIT_LIST_HEAD(&doi_def->dom_list);
507 494
508 spin_lock(&cipso_v4_doi_list_lock); 495 spin_lock(&cipso_v4_doi_list_lock);
509 if (cipso_v4_doi_search(doi_def->doi) != NULL) 496 if (cipso_v4_doi_search(doi_def->doi) != NULL)
@@ -519,59 +506,129 @@ doi_add_failure:
519} 506}
520 507
521/** 508/**
509 * cipso_v4_doi_free - Frees a DOI definition
510 * @entry: the entry's RCU field
511 *
512 * Description:
513 * This function frees all of the memory associated with a DOI definition.
514 *
515 */
516void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
517{
518 if (doi_def == NULL)
519 return;
520
521 switch (doi_def->type) {
522 case CIPSO_V4_MAP_TRANS:
523 kfree(doi_def->map.std->lvl.cipso);
524 kfree(doi_def->map.std->lvl.local);
525 kfree(doi_def->map.std->cat.cipso);
526 kfree(doi_def->map.std->cat.local);
527 break;
528 }
529 kfree(doi_def);
530}
531
532/**
533 * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer
534 * @entry: the entry's RCU field
535 *
536 * Description:
537 * This function is designed to be used as a callback to the call_rcu()
538 * function so that the memory allocated to the DOI definition can be released
539 * safely.
540 *
541 */
542static void cipso_v4_doi_free_rcu(struct rcu_head *entry)
543{
544 struct cipso_v4_doi *doi_def;
545
546 doi_def = container_of(entry, struct cipso_v4_doi, rcu);
547 cipso_v4_doi_free(doi_def);
548}
549
550/**
522 * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine 551 * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
523 * @doi: the DOI value 552 * @doi: the DOI value
524 * @audit_secid: the LSM secid to use in the audit message 553 * @audit_secid: the LSM secid to use in the audit message
525 * @callback: the DOI cleanup/free callback
526 * 554 *
527 * Description: 555 * Description:
528 * Removes a DOI definition from the CIPSO engine, @callback is called to 556 * Removes a DOI definition from the CIPSO engine. The NetLabel routines will
529 * free any memory. The NetLabel routines will be called to release their own 557 * be called to release their own LSM domain mappings as well as our own
530 * LSM domain mappings as well as our own domain list. Returns zero on 558 * domain list. Returns zero on success and negative values on failure.
531 * success and negative values on failure.
532 * 559 *
533 */ 560 */
534int cipso_v4_doi_remove(u32 doi, 561int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
535 struct netlbl_audit *audit_info,
536 void (*callback) (struct rcu_head * head))
537{ 562{
538 struct cipso_v4_doi *doi_def; 563 struct cipso_v4_doi *doi_def;
539 struct cipso_v4_domhsh_entry *dom_iter;
540 564
541 spin_lock(&cipso_v4_doi_list_lock); 565 spin_lock(&cipso_v4_doi_list_lock);
542 doi_def = cipso_v4_doi_search(doi); 566 doi_def = cipso_v4_doi_search(doi);
543 if (doi_def != NULL) { 567 if (doi_def == NULL) {
544 doi_def->valid = 0;
545 list_del_rcu(&doi_def->list);
546 spin_unlock(&cipso_v4_doi_list_lock); 568 spin_unlock(&cipso_v4_doi_list_lock);
547 rcu_read_lock(); 569 return -ENOENT;
548 list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) 570 }
549 if (dom_iter->valid) 571 if (!atomic_dec_and_test(&doi_def->refcount)) {
550 netlbl_cfg_map_del(dom_iter->domain, 572 spin_unlock(&cipso_v4_doi_list_lock);
551 audit_info); 573 return -EBUSY;
552 rcu_read_unlock();
553 cipso_v4_cache_invalidate();
554 call_rcu(&doi_def->rcu, callback);
555 return 0;
556 } 574 }
575 list_del_rcu(&doi_def->list);
557 spin_unlock(&cipso_v4_doi_list_lock); 576 spin_unlock(&cipso_v4_doi_list_lock);
558 577
559 return -ENOENT; 578 cipso_v4_cache_invalidate();
579 call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
580
581 return 0;
560} 582}
561 583
562/** 584/**
563 * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition 585 * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition
564 * @doi: the DOI value 586 * @doi: the DOI value
565 * 587 *
566 * Description: 588 * Description:
567 * Searches for a valid DOI definition and if one is found it is returned to 589 * Searches for a valid DOI definition and if one is found it is returned to
568 * the caller. Otherwise NULL is returned. The caller must ensure that 590 * the caller. Otherwise NULL is returned. The caller must ensure that
569 * rcu_read_lock() is held while accessing the returned definition. 591 * rcu_read_lock() is held while accessing the returned definition and the DOI
592 * definition reference count is decremented when the caller is done.
570 * 593 *
571 */ 594 */
572struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) 595struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
573{ 596{
574 return cipso_v4_doi_search(doi); 597 struct cipso_v4_doi *doi_def;
598
599 rcu_read_lock();
600 doi_def = cipso_v4_doi_search(doi);
601 if (doi_def == NULL)
602 goto doi_getdef_return;
603 if (!atomic_inc_not_zero(&doi_def->refcount))
604 doi_def = NULL;
605
606doi_getdef_return:
607 rcu_read_unlock();
608 return doi_def;
609}
610
611/**
612 * cipso_v4_doi_putdef - Releases a reference for the given DOI definition
613 * @doi_def: the DOI definition
614 *
615 * Description:
616 * Releases a DOI definition reference obtained from cipso_v4_doi_getdef().
617 *
618 */
619void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
620{
621 if (doi_def == NULL)
622 return;
623
624 if (!atomic_dec_and_test(&doi_def->refcount))
625 return;
626 spin_lock(&cipso_v4_doi_list_lock);
627 list_del_rcu(&doi_def->list);
628 spin_unlock(&cipso_v4_doi_list_lock);
629
630 cipso_v4_cache_invalidate();
631 call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
575} 632}
576 633
577/** 634/**
@@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
597 654
598 rcu_read_lock(); 655 rcu_read_lock();
599 list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) 656 list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
600 if (iter_doi->valid) { 657 if (atomic_read(&iter_doi->refcount) > 0) {
601 if (doi_cnt++ < *skip_cnt) 658 if (doi_cnt++ < *skip_cnt)
602 continue; 659 continue;
603 ret_val = callback(iter_doi, cb_arg); 660 ret_val = callback(iter_doi, cb_arg);
@@ -613,85 +670,6 @@ doi_walk_return:
613 return ret_val; 670 return ret_val;
614} 671}
615 672
616/**
617 * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
618 * @doi_def: the DOI definition
619 * @domain: the domain to add
620 *
621 * Description:
622 * Adds the @domain to the DOI specified by @doi_def, this function
623 * should only be called by external functions (i.e. NetLabel). This function
624 * does allocate memory. Returns zero on success, negative values on failure.
625 *
626 */
627int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
628{
629 struct cipso_v4_domhsh_entry *iter;
630 struct cipso_v4_domhsh_entry *new_dom;
631
632 new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
633 if (new_dom == NULL)
634 return -ENOMEM;
635 if (domain) {
636 new_dom->domain = kstrdup(domain, GFP_KERNEL);
637 if (new_dom->domain == NULL) {
638 kfree(new_dom);
639 return -ENOMEM;
640 }
641 }
642 new_dom->valid = 1;
643 INIT_RCU_HEAD(&new_dom->rcu);
644
645 spin_lock(&cipso_v4_doi_list_lock);
646 list_for_each_entry(iter, &doi_def->dom_list, list)
647 if (iter->valid &&
648 ((domain != NULL && iter->domain != NULL &&
649 strcmp(iter->domain, domain) == 0) ||
650 (domain == NULL && iter->domain == NULL))) {
651 spin_unlock(&cipso_v4_doi_list_lock);
652 kfree(new_dom->domain);
653 kfree(new_dom);
654 return -EEXIST;
655 }
656 list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
657 spin_unlock(&cipso_v4_doi_list_lock);
658
659 return 0;
660}
661
662/**
663 * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
664 * @doi_def: the DOI definition
665 * @domain: the domain to remove
666 *
667 * Description:
668 * Removes the @domain from the DOI specified by @doi_def, this function
669 * should only be called by external functions (i.e. NetLabel). Returns zero
670 * on success and negative values on error.
671 *
672 */
673int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
674 const char *domain)
675{
676 struct cipso_v4_domhsh_entry *iter;
677
678 spin_lock(&cipso_v4_doi_list_lock);
679 list_for_each_entry(iter, &doi_def->dom_list, list)
680 if (iter->valid &&
681 ((domain != NULL && iter->domain != NULL &&
682 strcmp(iter->domain, domain) == 0) ||
683 (domain == NULL && iter->domain == NULL))) {
684 iter->valid = 0;
685 list_del_rcu(&iter->list);
686 spin_unlock(&cipso_v4_doi_list_lock);
687 call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
688 return 0;
689 }
690 spin_unlock(&cipso_v4_doi_list_lock);
691
692 return -ENOENT;
693}
694
695/* 673/*
696 * Label Mapping Functions 674 * Label Mapping Functions
697 */ 675 */
@@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
712 switch (doi_def->type) { 690 switch (doi_def->type) {
713 case CIPSO_V4_MAP_PASS: 691 case CIPSO_V4_MAP_PASS:
714 return 0; 692 return 0;
715 case CIPSO_V4_MAP_STD: 693 case CIPSO_V4_MAP_TRANS:
716 if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) 694 if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
717 return 0; 695 return 0;
718 break; 696 break;
@@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
741 case CIPSO_V4_MAP_PASS: 719 case CIPSO_V4_MAP_PASS:
742 *net_lvl = host_lvl; 720 *net_lvl = host_lvl;
743 return 0; 721 return 0;
744 case CIPSO_V4_MAP_STD: 722 case CIPSO_V4_MAP_TRANS:
745 if (host_lvl < doi_def->map.std->lvl.local_size && 723 if (host_lvl < doi_def->map.std->lvl.local_size &&
746 doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { 724 doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) {
747 *net_lvl = doi_def->map.std->lvl.local[host_lvl]; 725 *net_lvl = doi_def->map.std->lvl.local[host_lvl];
@@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
775 case CIPSO_V4_MAP_PASS: 753 case CIPSO_V4_MAP_PASS:
776 *host_lvl = net_lvl; 754 *host_lvl = net_lvl;
777 return 0; 755 return 0;
778 case CIPSO_V4_MAP_STD: 756 case CIPSO_V4_MAP_TRANS:
779 map_tbl = doi_def->map.std; 757 map_tbl = doi_def->map.std;
780 if (net_lvl < map_tbl->lvl.cipso_size && 758 if (net_lvl < map_tbl->lvl.cipso_size &&
781 map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { 759 map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
@@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
812 switch (doi_def->type) { 790 switch (doi_def->type) {
813 case CIPSO_V4_MAP_PASS: 791 case CIPSO_V4_MAP_PASS:
814 return 0; 792 return 0;
815 case CIPSO_V4_MAP_STD: 793 case CIPSO_V4_MAP_TRANS:
816 cipso_cat_size = doi_def->map.std->cat.cipso_size; 794 cipso_cat_size = doi_def->map.std->cat.cipso_size;
817 cipso_array = doi_def->map.std->cat.cipso; 795 cipso_array = doi_def->map.std->cat.cipso;
818 for (;;) { 796 for (;;) {
@@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
860 u32 host_cat_size = 0; 838 u32 host_cat_size = 0;
861 u32 *host_cat_array = NULL; 839 u32 *host_cat_array = NULL;
862 840
863 if (doi_def->type == CIPSO_V4_MAP_STD) { 841 if (doi_def->type == CIPSO_V4_MAP_TRANS) {
864 host_cat_size = doi_def->map.std->cat.local_size; 842 host_cat_size = doi_def->map.std->cat.local_size;
865 host_cat_array = doi_def->map.std->cat.local; 843 host_cat_array = doi_def->map.std->cat.local;
866 } 844 }
@@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
875 case CIPSO_V4_MAP_PASS: 853 case CIPSO_V4_MAP_PASS:
876 net_spot = host_spot; 854 net_spot = host_spot;
877 break; 855 break;
878 case CIPSO_V4_MAP_STD: 856 case CIPSO_V4_MAP_TRANS:
879 if (host_spot >= host_cat_size) 857 if (host_spot >= host_cat_size)
880 return -EPERM; 858 return -EPERM;
881 net_spot = host_cat_array[host_spot]; 859 net_spot = host_cat_array[host_spot];
@@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
921 u32 net_cat_size = 0; 899 u32 net_cat_size = 0;
922 u32 *net_cat_array = NULL; 900 u32 *net_cat_array = NULL;
923 901
924 if (doi_def->type == CIPSO_V4_MAP_STD) { 902 if (doi_def->type == CIPSO_V4_MAP_TRANS) {
925 net_cat_size = doi_def->map.std->cat.cipso_size; 903 net_cat_size = doi_def->map.std->cat.cipso_size;
926 net_cat_array = doi_def->map.std->cat.cipso; 904 net_cat_array = doi_def->map.std->cat.cipso;
927 } 905 }
@@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
941 case CIPSO_V4_MAP_PASS: 919 case CIPSO_V4_MAP_PASS:
942 host_spot = net_spot; 920 host_spot = net_spot;
943 break; 921 break;
944 case CIPSO_V4_MAP_STD: 922 case CIPSO_V4_MAP_TRANS:
945 if (net_spot >= net_cat_size) 923 if (net_spot >= net_cat_size)
946 return -EPERM; 924 return -EPERM;
947 host_spot = net_cat_array[net_spot]; 925 host_spot = net_cat_array[net_spot];
@@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
1277 } else 1255 } else
1278 tag_len = 4; 1256 tag_len = 4;
1279 1257
1280 buffer[0] = 0x01; 1258 buffer[0] = CIPSO_V4_TAG_RBITMAP;
1281 buffer[1] = tag_len; 1259 buffer[1] = tag_len;
1282 buffer[3] = level; 1260 buffer[3] = level;
1283 1261
@@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def,
1373 } else 1351 } else
1374 tag_len = 4; 1352 tag_len = 4;
1375 1353
1376 buffer[0] = 0x02; 1354 buffer[0] = CIPSO_V4_TAG_ENUM;
1377 buffer[1] = tag_len; 1355 buffer[1] = tag_len;
1378 buffer[3] = level; 1356 buffer[3] = level;
1379 1357
@@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def,
1469 } else 1447 } else
1470 tag_len = 4; 1448 tag_len = 4;
1471 1449
1472 buffer[0] = 0x05; 1450 buffer[0] = CIPSO_V4_TAG_RANGE;
1473 buffer[1] = tag_len; 1451 buffer[1] = tag_len;
1474 buffer[3] = level; 1452 buffer[3] = level;
1475 1453
@@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1523} 1501}
1524 1502
1525/** 1503/**
1504 * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard)
1505 * @doi_def: the DOI definition
1506 * @secattr: the security attributes
1507 * @buffer: the option buffer
1508 * @buffer_len: length of buffer in bytes
1509 *
1510 * Description:
1511 * Generate a CIPSO option using the local tag. Returns the size of the tag
1512 * on success, negative values on failure.
1513 *
1514 */
1515static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def,
1516 const struct netlbl_lsm_secattr *secattr,
1517 unsigned char *buffer,
1518 u32 buffer_len)
1519{
1520 if (!(secattr->flags & NETLBL_SECATTR_SECID))
1521 return -EPERM;
1522
1523 buffer[0] = CIPSO_V4_TAG_LOCAL;
1524 buffer[1] = CIPSO_V4_TAG_LOC_BLEN;
1525 *(u32 *)&buffer[2] = secattr->attr.secid;
1526
1527 return CIPSO_V4_TAG_LOC_BLEN;
1528}
1529
1530/**
1531 * cipso_v4_parsetag_loc - Parse a CIPSO local tag
1532 * @doi_def: the DOI definition
1533 * @tag: the CIPSO tag
1534 * @secattr: the security attributes
1535 *
1536 * Description:
1537 * Parse a CIPSO local tag and return the security attributes in @secattr.
1538 * Return zero on success, negatives values on failure.
1539 *
1540 */
1541static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
1542 const unsigned char *tag,
1543 struct netlbl_lsm_secattr *secattr)
1544{
1545 secattr->attr.secid = *(u32 *)&tag[2];
1546 secattr->flags |= NETLBL_SECATTR_SECID;
1547
1548 return 0;
1549}
1550
1551/**
1526 * cipso_v4_validate - Validate a CIPSO option 1552 * cipso_v4_validate - Validate a CIPSO option
1527 * @option: the start of the option, on error it is set to point to the error 1553 * @option: the start of the option, on error it is set to point to the error
1528 * 1554 *
@@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def,
1541 * that is unrecognized." 1567 * that is unrecognized."
1542 * 1568 *
1543 */ 1569 */
1544int cipso_v4_validate(unsigned char **option) 1570int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
1545{ 1571{
1546 unsigned char *opt = *option; 1572 unsigned char *opt = *option;
1547 unsigned char *tag; 1573 unsigned char *tag;
@@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option)
1566 goto validate_return_locked; 1592 goto validate_return_locked;
1567 } 1593 }
1568 1594
1569 opt_iter = 6; 1595 opt_iter = CIPSO_V4_HDR_LEN;
1570 tag = opt + opt_iter; 1596 tag = opt + opt_iter;
1571 while (opt_iter < opt_len) { 1597 while (opt_iter < opt_len) {
1572 for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) 1598 for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
@@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option)
1584 1610
1585 switch (tag[0]) { 1611 switch (tag[0]) {
1586 case CIPSO_V4_TAG_RBITMAP: 1612 case CIPSO_V4_TAG_RBITMAP:
1587 if (tag_len < 4) { 1613 if (tag_len < CIPSO_V4_TAG_RBM_BLEN) {
1588 err_offset = opt_iter + 1; 1614 err_offset = opt_iter + 1;
1589 goto validate_return_locked; 1615 goto validate_return_locked;
1590 } 1616 }
@@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option)
1602 err_offset = opt_iter + 3; 1628 err_offset = opt_iter + 3;
1603 goto validate_return_locked; 1629 goto validate_return_locked;
1604 } 1630 }
1605 if (tag_len > 4 && 1631 if (tag_len > CIPSO_V4_TAG_RBM_BLEN &&
1606 cipso_v4_map_cat_rbm_valid(doi_def, 1632 cipso_v4_map_cat_rbm_valid(doi_def,
1607 &tag[4], 1633 &tag[4],
1608 tag_len - 4) < 0) { 1634 tag_len - 4) < 0) {
@@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option)
1612 } 1638 }
1613 break; 1639 break;
1614 case CIPSO_V4_TAG_ENUM: 1640 case CIPSO_V4_TAG_ENUM:
1615 if (tag_len < 4) { 1641 if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) {
1616 err_offset = opt_iter + 1; 1642 err_offset = opt_iter + 1;
1617 goto validate_return_locked; 1643 goto validate_return_locked;
1618 } 1644 }
@@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option)
1622 err_offset = opt_iter + 3; 1648 err_offset = opt_iter + 3;
1623 goto validate_return_locked; 1649 goto validate_return_locked;
1624 } 1650 }
1625 if (tag_len > 4 && 1651 if (tag_len > CIPSO_V4_TAG_ENUM_BLEN &&
1626 cipso_v4_map_cat_enum_valid(doi_def, 1652 cipso_v4_map_cat_enum_valid(doi_def,
1627 &tag[4], 1653 &tag[4],
1628 tag_len - 4) < 0) { 1654 tag_len - 4) < 0) {
@@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option)
1631 } 1657 }
1632 break; 1658 break;
1633 case CIPSO_V4_TAG_RANGE: 1659 case CIPSO_V4_TAG_RANGE:
1634 if (tag_len < 4) { 1660 if (tag_len < CIPSO_V4_TAG_RNG_BLEN) {
1635 err_offset = opt_iter + 1; 1661 err_offset = opt_iter + 1;
1636 goto validate_return_locked; 1662 goto validate_return_locked;
1637 } 1663 }
@@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option)
1641 err_offset = opt_iter + 3; 1667 err_offset = opt_iter + 3;
1642 goto validate_return_locked; 1668 goto validate_return_locked;
1643 } 1669 }
1644 if (tag_len > 4 && 1670 if (tag_len > CIPSO_V4_TAG_RNG_BLEN &&
1645 cipso_v4_map_cat_rng_valid(doi_def, 1671 cipso_v4_map_cat_rng_valid(doi_def,
1646 &tag[4], 1672 &tag[4],
1647 tag_len - 4) < 0) { 1673 tag_len - 4) < 0) {
@@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option)
1649 goto validate_return_locked; 1675 goto validate_return_locked;
1650 } 1676 }
1651 break; 1677 break;
1678 case CIPSO_V4_TAG_LOCAL:
1679 /* This is a non-standard tag that we only allow for
1680 * local connections, so if the incoming interface is
1681 * not the loopback device drop the packet. */
1682 if (!(skb->dev->flags & IFF_LOOPBACK)) {
1683 err_offset = opt_iter;
1684 goto validate_return_locked;
1685 }
1686 if (tag_len != CIPSO_V4_TAG_LOC_BLEN) {
1687 err_offset = opt_iter + 1;
1688 goto validate_return_locked;
1689 }
1690 break;
1652 default: 1691 default:
1653 err_offset = opt_iter; 1692 err_offset = opt_iter;
1654 goto validate_return_locked; 1693 goto validate_return_locked;
@@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
1704} 1743}
1705 1744
1706/** 1745/**
1707 * cipso_v4_sock_setattr - Add a CIPSO option to a socket 1746 * cipso_v4_genopt - Generate a CIPSO option
1708 * @sk: the socket 1747 * @buf: the option buffer
1748 * @buf_len: the size of opt_buf
1709 * @doi_def: the CIPSO DOI to use 1749 * @doi_def: the CIPSO DOI to use
1710 * @secattr: the specific security attributes of the socket 1750 * @secattr: the security attributes
1711 * 1751 *
1712 * Description: 1752 * Description:
1713 * Set the CIPSO option on the given socket using the DOI definition and 1753 * Generate a CIPSO option using the DOI definition and security attributes
1714 * security attributes passed to the function. This function requires 1754 * passed to the function. Returns the length of the option on success and
1715 * exclusive access to @sk, which means it either needs to be in the 1755 * negative values on failure.
1716 * process of being created or locked. Returns zero on success and negative
1717 * values on failure.
1718 * 1756 *
1719 */ 1757 */
1720int cipso_v4_sock_setattr(struct sock *sk, 1758static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
1721 const struct cipso_v4_doi *doi_def, 1759 const struct cipso_v4_doi *doi_def,
1722 const struct netlbl_lsm_secattr *secattr) 1760 const struct netlbl_lsm_secattr *secattr)
1723{ 1761{
1724 int ret_val = -EPERM; 1762 int ret_val;
1725 u32 iter; 1763 u32 iter;
1726 unsigned char *buf;
1727 u32 buf_len = 0;
1728 u32 opt_len;
1729 struct ip_options *opt = NULL;
1730 struct inet_sock *sk_inet;
1731 struct inet_connection_sock *sk_conn;
1732 1764
1733 /* In the case of sock_create_lite(), the sock->sk field is not 1765 if (buf_len <= CIPSO_V4_HDR_LEN)
1734 * defined yet but it is not a problem as the only users of these 1766 return -ENOSPC;
1735 * "lite" PF_INET sockets are functions which do an accept() call
1736 * afterwards so we will label the socket as part of the accept(). */
1737 if (sk == NULL)
1738 return 0;
1739
1740 /* We allocate the maximum CIPSO option size here so we are probably
1741 * being a little wasteful, but it makes our life _much_ easier later
1742 * on and after all we are only talking about 40 bytes. */
1743 buf_len = CIPSO_V4_OPT_LEN_MAX;
1744 buf = kmalloc(buf_len, GFP_ATOMIC);
1745 if (buf == NULL) {
1746 ret_val = -ENOMEM;
1747 goto socket_setattr_failure;
1748 }
1749 1767
1750 /* XXX - This code assumes only one tag per CIPSO option which isn't 1768 /* XXX - This code assumes only one tag per CIPSO option which isn't
1751 * really a good assumption to make but since we only support the MAC 1769 * really a good assumption to make but since we only support the MAC
@@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk,
1772 &buf[CIPSO_V4_HDR_LEN], 1790 &buf[CIPSO_V4_HDR_LEN],
1773 buf_len - CIPSO_V4_HDR_LEN); 1791 buf_len - CIPSO_V4_HDR_LEN);
1774 break; 1792 break;
1793 case CIPSO_V4_TAG_LOCAL:
1794 ret_val = cipso_v4_gentag_loc(doi_def,
1795 secattr,
1796 &buf[CIPSO_V4_HDR_LEN],
1797 buf_len - CIPSO_V4_HDR_LEN);
1798 break;
1775 default: 1799 default:
1776 ret_val = -EPERM; 1800 return -EPERM;
1777 goto socket_setattr_failure;
1778 } 1801 }
1779 1802
1780 iter++; 1803 iter++;
@@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk,
1782 iter < CIPSO_V4_TAG_MAXCNT && 1805 iter < CIPSO_V4_TAG_MAXCNT &&
1783 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); 1806 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
1784 if (ret_val < 0) 1807 if (ret_val < 0)
1785 goto socket_setattr_failure; 1808 return ret_val;
1786 cipso_v4_gentag_hdr(doi_def, buf, ret_val); 1809 cipso_v4_gentag_hdr(doi_def, buf, ret_val);
1787 buf_len = CIPSO_V4_HDR_LEN + ret_val; 1810 return CIPSO_V4_HDR_LEN + ret_val;
1811}
1812
1813/**
1814 * cipso_v4_sock_setattr - Add a CIPSO option to a socket
1815 * @sk: the socket
1816 * @doi_def: the CIPSO DOI to use
1817 * @secattr: the specific security attributes of the socket
1818 *
1819 * Description:
1820 * Set the CIPSO option on the given socket using the DOI definition and
1821 * security attributes passed to the function. This function requires
1822 * exclusive access to @sk, which means it either needs to be in the
1823 * process of being created or locked. Returns zero on success and negative
1824 * values on failure.
1825 *
1826 */
1827int cipso_v4_sock_setattr(struct sock *sk,
1828 const struct cipso_v4_doi *doi_def,
1829 const struct netlbl_lsm_secattr *secattr)
1830{
1831 int ret_val = -EPERM;
1832 unsigned char *buf = NULL;
1833 u32 buf_len;
1834 u32 opt_len;
1835 struct ip_options *opt = NULL;
1836 struct inet_sock *sk_inet;
1837 struct inet_connection_sock *sk_conn;
1838
1839 /* In the case of sock_create_lite(), the sock->sk field is not
1840 * defined yet but it is not a problem as the only users of these
1841 * "lite" PF_INET sockets are functions which do an accept() call
1842 * afterwards so we will label the socket as part of the accept(). */
1843 if (sk == NULL)
1844 return 0;
1845
1846 /* We allocate the maximum CIPSO option size here so we are probably
1847 * being a little wasteful, but it makes our life _much_ easier later
1848 * on and after all we are only talking about 40 bytes. */
1849 buf_len = CIPSO_V4_OPT_LEN_MAX;
1850 buf = kmalloc(buf_len, GFP_ATOMIC);
1851 if (buf == NULL) {
1852 ret_val = -ENOMEM;
1853 goto socket_setattr_failure;
1854 }
1855
1856 ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
1857 if (ret_val < 0)
1858 goto socket_setattr_failure;
1859 buf_len = ret_val;
1788 1860
1789 /* We can't use ip_options_get() directly because it makes a call to 1861 /* We can't use ip_options_get() directly because it makes a call to
1790 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and 1862 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
@@ -1822,6 +1894,80 @@ socket_setattr_failure:
1822} 1894}
1823 1895
1824/** 1896/**
1897 * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
1898 * @sk: the socket
1899 *
1900 * Description:
1901 * Removes the CIPSO option from a socket, if present.
1902 *
1903 */
1904void cipso_v4_sock_delattr(struct sock *sk)
1905{
1906 u8 hdr_delta;
1907 struct ip_options *opt;
1908 struct inet_sock *sk_inet;
1909
1910 sk_inet = inet_sk(sk);
1911 opt = sk_inet->opt;
1912 if (opt == NULL || opt->cipso == 0)
1913 return;
1914
1915 if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
1916 u8 cipso_len;
1917 u8 cipso_off;
1918 unsigned char *cipso_ptr;
1919 int iter;
1920 int optlen_new;
1921
1922 cipso_off = opt->cipso - sizeof(struct iphdr);
1923 cipso_ptr = &opt->__data[cipso_off];
1924 cipso_len = cipso_ptr[1];
1925
1926 if (opt->srr > opt->cipso)
1927 opt->srr -= cipso_len;
1928 if (opt->rr > opt->cipso)
1929 opt->rr -= cipso_len;
1930 if (opt->ts > opt->cipso)
1931 opt->ts -= cipso_len;
1932 if (opt->router_alert > opt->cipso)
1933 opt->router_alert -= cipso_len;
1934 opt->cipso = 0;
1935
1936 memmove(cipso_ptr, cipso_ptr + cipso_len,
1937 opt->optlen - cipso_off - cipso_len);
1938
1939 /* determining the new total option length is tricky because of
1940 * the padding necessary, the only thing i can think to do at
1941 * this point is walk the options one-by-one, skipping the
1942 * padding at the end to determine the actual option size and
1943 * from there we can determine the new total option length */
1944 iter = 0;
1945 optlen_new = 0;
1946 while (iter < opt->optlen)
1947 if (opt->__data[iter] != IPOPT_NOP) {
1948 iter += opt->__data[iter + 1];
1949 optlen_new = iter;
1950 } else
1951 iter++;
1952 hdr_delta = opt->optlen;
1953 opt->optlen = (optlen_new + 3) & ~3;
1954 hdr_delta -= opt->optlen;
1955 } else {
1956 /* only the cipso option was present on the socket so we can
1957 * remove the entire option struct */
1958 sk_inet->opt = NULL;
1959 hdr_delta = opt->optlen;
1960 kfree(opt);
1961 }
1962
1963 if (sk_inet->is_icsk && hdr_delta > 0) {
1964 struct inet_connection_sock *sk_conn = inet_csk(sk);
1965 sk_conn->icsk_ext_hdr_len -= hdr_delta;
1966 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1967 }
1968}
1969
1970/**
1825 * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions 1971 * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
1826 * @cipso: the CIPSO v4 option 1972 * @cipso: the CIPSO v4 option
1827 * @secattr: the security attributes 1973 * @secattr: the security attributes
@@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso,
1859 case CIPSO_V4_TAG_RANGE: 2005 case CIPSO_V4_TAG_RANGE:
1860 ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); 2006 ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
1861 break; 2007 break;
2008 case CIPSO_V4_TAG_LOCAL:
2009 ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr);
2010 break;
1862 } 2011 }
1863 if (ret_val == 0) 2012 if (ret_val == 0)
1864 secattr->type = NETLBL_NLTYPE_CIPSOV4; 2013 secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
1893} 2042}
1894 2043
1895/** 2044/**
2045 * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
2046 * @skb: the packet
2047 * @secattr: the security attributes
2048 *
2049 * Description:
2050 * Set the CIPSO option on the given packet based on the security attributes.
2051 * Returns a pointer to the IP header on success and NULL on failure.
2052 *
2053 */
2054int cipso_v4_skbuff_setattr(struct sk_buff *skb,
2055 const struct cipso_v4_doi *doi_def,
2056 const struct netlbl_lsm_secattr *secattr)
2057{
2058 int ret_val;
2059 struct iphdr *iph;
2060 struct ip_options *opt = &IPCB(skb)->opt;
2061 unsigned char buf[CIPSO_V4_OPT_LEN_MAX];
2062 u32 buf_len = CIPSO_V4_OPT_LEN_MAX;
2063 u32 opt_len;
2064 int len_delta;
2065
2066 buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
2067 if (buf_len < 0)
2068 return buf_len;
2069 opt_len = (buf_len + 3) & ~3;
2070
2071 /* we overwrite any existing options to ensure that we have enough
2072 * room for the CIPSO option, the reason is that we _need_ to guarantee
2073 * that the security label is applied to the packet - we do the same
2074 * thing when using the socket options and it hasn't caused a problem,
2075 * if we need to we can always revisit this choice later */
2076
2077 len_delta = opt_len - opt->optlen;
2078 /* if we don't ensure enough headroom we could panic on the skb_push()
2079 * call below so make sure we have enough, we are also "mangling" the
2080 * packet so we should probably do a copy-on-write call anyway */
2081 ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
2082 if (ret_val < 0)
2083 return ret_val;
2084
2085 if (len_delta > 0) {
2086 /* we assume that the header + opt->optlen have already been
2087 * "pushed" in ip_options_build() or similar */
2088 iph = ip_hdr(skb);
2089 skb_push(skb, len_delta);
2090 memmove((char *)iph - len_delta, iph, iph->ihl << 2);
2091 skb_reset_network_header(skb);
2092 iph = ip_hdr(skb);
2093 } else if (len_delta < 0) {
2094 iph = ip_hdr(skb);
2095 memset(iph + 1, IPOPT_NOP, opt->optlen);
2096 } else
2097 iph = ip_hdr(skb);
2098
2099 if (opt->optlen > 0)
2100 memset(opt, 0, sizeof(*opt));
2101 opt->optlen = opt_len;
2102 opt->cipso = sizeof(struct iphdr);
2103 opt->is_changed = 1;
2104
2105 /* we have to do the following because we are being called from a
2106 * netfilter hook which means the packet already has had the header
2107 * fields populated and the checksum calculated - yes this means we
2108 * are doing more work than needed but we do it to keep the core
2109 * stack clean and tidy */
2110 memcpy(iph + 1, buf, buf_len);
2111 if (opt_len > buf_len)
2112 memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
2113 if (len_delta != 0) {
2114 iph->ihl = 5 + (opt_len >> 2);
2115 iph->tot_len = htons(skb->len);
2116 }
2117 ip_send_check(iph);
2118
2119 return 0;
2120}
2121
2122/**
2123 * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet
2124 * @skb: the packet
2125 *
2126 * Description:
2127 * Removes any and all CIPSO options from the given packet. Returns zero on
2128 * success, negative values on failure.
2129 *
2130 */
2131int cipso_v4_skbuff_delattr(struct sk_buff *skb)
2132{
2133 int ret_val;
2134 struct iphdr *iph;
2135 struct ip_options *opt = &IPCB(skb)->opt;
2136 unsigned char *cipso_ptr;
2137
2138 if (opt->cipso == 0)
2139 return 0;
2140
2141 /* since we are changing the packet we should make a copy */
2142 ret_val = skb_cow(skb, skb_headroom(skb));
2143 if (ret_val < 0)
2144 return ret_val;
2145
2146 /* the easiest thing to do is just replace the cipso option with noop
2147 * options since we don't change the size of the packet, although we
2148 * still need to recalculate the checksum */
2149
2150 iph = ip_hdr(skb);
2151 cipso_ptr = (unsigned char *)iph + opt->cipso;
2152 memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
2153 opt->cipso = 0;
2154 opt->is_changed = 1;
2155
2156 ip_send_check(iph);
2157
2158 return 0;
2159}
2160
2161/**
1896 * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option 2162 * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
1897 * @skb: the packet 2163 * @skb: the packet
1898 * @secattr: the security attributes 2164 * @secattr: the security attributes
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index abef49376ac8..56fce3ab6c55 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1281,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1281 return ret; 1281 return ret;
1282} 1282}
1283 1283
1284static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, 1284static int devinet_conf_sysctl(ctl_table *table,
1285 void __user *oldval, size_t __user *oldlenp, 1285 void __user *oldval, size_t __user *oldlenp,
1286 void __user *newval, size_t newlen) 1286 void __user *newval, size_t newlen)
1287{ 1287{
@@ -1377,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
1377 return ret; 1377 return ret;
1378} 1378}
1379 1379
1380int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, 1380int ipv4_doint_and_flush_strategy(ctl_table *table,
1381 void __user *oldval, size_t __user *oldlenp, 1381 void __user *oldval, size_t __user *oldlenp,
1382 void __user *newval, size_t newlen) 1382 void __user *newval, size_t newlen)
1383{ 1383{
1384 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, 1384 int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1385 newval, newlen);
1386 struct net *net = table->extra2; 1385 struct net *net = table->extra2;
1387 1386
1388 if (ret == 1) 1387 if (ret == 1)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index be3f18a7a40e..2c88da6e7862 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -438,7 +438,7 @@ int ip_options_compile(struct net *net,
438 goto error; 438 goto error;
439 } 439 }
440 opt->cipso = optptr - iph; 440 opt->cipso = optptr - iph;
441 if (cipso_v4_validate(&optptr)) { 441 if (cipso_v4_validate(skb, &optptr)) {
442 pp_ptr = optptr; 442 pp_ptr = optptr;
443 goto error; 443 goto error;
444 } 444 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8d23cc7efbad..2ea6dcc3e2cc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2913,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
2913} 2913}
2914 2914
2915static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, 2915static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2916 int __user *name,
2917 int nlen,
2918 void __user *oldval, 2916 void __user *oldval,
2919 size_t __user *oldlenp, 2917 size_t __user *oldlenp,
2920 void __user *newval, 2918 void __user *newval,
@@ -2977,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
2977} 2975}
2978 2976
2979static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, 2977static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
2980 int __user *name,
2981 int nlen,
2982 void __user *oldval, 2978 void __user *oldval,
2983 size_t __user *oldlenp, 2979 size_t __user *oldlenp,
2984 void __user *newval, 2980 void __user *newval,
2985 size_t newlen) 2981 size_t newlen)
2986{ 2982{
2987 int old = ip_rt_secret_interval; 2983 int old = ip_rt_secret_interval;
2988 int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, 2984 int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen);
2989 newlen);
2990 2985
2991 rt_secret_reschedule(old); 2986 rt_secret_reschedule(old);
2992 2987
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 276d047fb85a..1bb10df8ce7d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
64} 64}
65 65
66/* Validate changes from sysctl interface. */ 66/* Validate changes from sysctl interface. */
67static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, 67static int ipv4_sysctl_local_port_range(ctl_table *table,
68 int nlen, void __user *oldval, 68 void __user *oldval,
69 size_t __user *oldlenp, 69 size_t __user *oldlenp,
70 void __user *newval, size_t newlen) 70 void __user *newval, size_t newlen)
71{ 71{
@@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
80 }; 80 };
81 81
82 inet_get_local_port_range(range, range + 1); 82 inet_get_local_port_range(range, range + 1);
83 ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); 83 ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen);
84 if (ret == 0 && newval && newlen) { 84 if (ret == 0 && newval && newlen) {
85 if (range[1] < range[0]) 85 if (range[1] < range[0])
86 ret = -EINVAL; 86 ret = -EINVAL;
@@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
109 return ret; 109 return ret;
110} 110}
111 111
112static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, 112static int sysctl_tcp_congestion_control(ctl_table *table,
113 int nlen, void __user *oldval, 113 void __user *oldval,
114 size_t __user *oldlenp, 114 size_t __user *oldlenp,
115 void __user *newval, size_t newlen) 115 void __user *newval, size_t newlen)
116{ 116{
@@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
122 int ret; 122 int ret;
123 123
124 tcp_get_default_congestion_control(val); 124 tcp_get_default_congestion_control(val);
125 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); 125 ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
126 if (ret == 1 && newval && newlen) 126 if (ret == 1 && newval && newlen)
127 ret = tcp_set_default_congestion_control(val); 127 ret = tcp_set_default_congestion_control(val);
128 return ret; 128 return ret;
@@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
165 return ret; 165 return ret;
166} 166}
167 167
168static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, 168static int strategy_allowed_congestion_control(ctl_table *table,
169 int nlen, void __user *oldval, 169 void __user *oldval,
170 size_t __user *oldlenp, 170 size_t __user *oldlenp,
171 void __user *newval, 171 void __user *newval,
172 size_t newlen) 172 size_t newlen)
@@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
179 return -ENOMEM; 179 return -ENOMEM;
180 180
181 tcp_get_available_congestion_control(tbl.data, tbl.maxlen); 181 tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
182 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); 182 ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
183 if (ret == 1 && newval && newlen) 183 if (ret == 1 && newval && newlen)
184 ret = tcp_set_allowed_congestion_control(tbl.data); 184 ret = tcp_set_allowed_congestion_control(tbl.data);
185 kfree(tbl.data); 185 kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7b6a584b62dd..eea9542728ca 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
3982} 3982}
3983 3983
3984static int addrconf_sysctl_forward_strategy(ctl_table *table, 3984static int addrconf_sysctl_forward_strategy(ctl_table *table,
3985 int __user *name, int nlen,
3986 void __user *oldval, 3985 void __user *oldval,
3987 size_t __user *oldlenp, 3986 size_t __user *oldlenp,
3988 void __user *newval, size_t newlen) 3987 void __user *newval, size_t newlen)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aae7ddcc8a2e..172438320eec 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
1730 return ret; 1730 return ret;
1731} 1731}
1732 1732
1733int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, 1733int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
1734 int nlen, void __user *oldval, 1734 void __user *oldval, size_t __user *oldlenp,
1735 size_t __user *oldlenp,
1736 void __user *newval, size_t newlen) 1735 void __user *newval, size_t newlen)
1737{ 1736{
1738 struct net_device *dev = ctl->extra1; 1737 struct net_device *dev = ctl->extra1;
@@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1745 1744
1746 switch (ctl->ctl_name) { 1745 switch (ctl->ctl_name) {
1747 case NET_NEIGH_REACHABLE_TIME: 1746 case NET_NEIGH_REACHABLE_TIME:
1748 ret = sysctl_jiffies(ctl, name, nlen, 1747 ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen);
1749 oldval, oldlenp, newval, newlen);
1750 break; 1748 break;
1751 case NET_NEIGH_RETRANS_TIME_MS: 1749 case NET_NEIGH_RETRANS_TIME_MS:
1752 case NET_NEIGH_REACHABLE_TIME_MS: 1750 case NET_NEIGH_REACHABLE_TIME_MS:
1753 ret = sysctl_ms_jiffies(ctl, name, nlen, 1751 ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen);
1754 oldval, oldlenp, newval, newlen);
1755 break; 1752 break;
1756 default: 1753 default:
1757 ret = 0; 1754 ret = 0;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 03591d37b9cc..b92df5c1dfcf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -115,7 +115,7 @@ int nf_conntrack_acct_init(struct net *net)
115 115
116 if (net_eq(net, &init_net)) { 116 if (net_eq(net, &init_net)) {
117#ifdef CONFIG_NF_CT_ACCT 117#ifdef CONFIG_NF_CT_ACCT
118 printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); 118 printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
119 printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); 119 printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
120 printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); 120 printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
121#endif 121#endif
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 373e51e91ce5..1bc3001d1827 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -65,7 +65,7 @@ void
65 struct nf_conntrack_expect *exp) __read_mostly; 65 struct nf_conntrack_expect *exp) __read_mostly;
66EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); 66EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
67 67
68#ifdef DEBUG 68#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
69/* PptpControlMessageType names */ 69/* PptpControlMessageType names */
70const char *const pptp_msg_name[] = { 70const char *const pptp_msg_name[] = {
71 "UNKNOWN_MESSAGE", 71 "UNKNOWN_MESSAGE",
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index 8af18c0a47d9..ea750e9df65f 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -5,7 +5,8 @@
5# 5#
6 6
7# base objects 7# base objects
8obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o 8obj-y := netlabel_user.o netlabel_kapi.o
9obj-y += netlabel_domainhash.o netlabel_addrlist.o
9 10
10# management objects 11# management objects
11obj-y += netlabel_mgmt.o 12obj-y += netlabel_mgmt.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
new file mode 100644
index 000000000000..b0925a303353
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.c
@@ -0,0 +1,388 @@
1/*
2 * NetLabel Network Address Lists
3 *
4 * This file contains network address list functions used to manage ordered
5 * lists of network addresses for use by the NetLabel subsystem. The NetLabel
6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO.
8 *
9 * Author: Paul Moore <paul.moore@hp.com>
10 *
11 */
12
13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
24 * the GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31
32#include <linux/types.h>
33#include <linux/rcupdate.h>
34#include <linux/list.h>
35#include <linux/spinlock.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/ip.h>
39#include <linux/ipv6.h>
40#include <net/ip.h>
41#include <net/ipv6.h>
42#include <linux/audit.h>
43
44#include "netlabel_addrlist.h"
45
46/*
47 * Address List Functions
48 */
49
50/**
51 * netlbl_af4list_search - Search for a matching IPv4 address entry
52 * @addr: IPv4 address
53 * @head: the list head
54 *
55 * Description:
56 * Searches the IPv4 address list given by @head. If a matching address entry
57 * is found it is returned, otherwise NULL is returned. The caller is
58 * responsible for calling the rcu_read_[un]lock() functions.
59 *
60 */
61struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
62 struct list_head *head)
63{
64 struct netlbl_af4list *iter;
65
66 list_for_each_entry_rcu(iter, head, list)
67 if (iter->valid && (addr & iter->mask) == iter->addr)
68 return iter;
69
70 return NULL;
71}
72
73/**
74 * netlbl_af4list_search_exact - Search for an exact IPv4 address entry
75 * @addr: IPv4 address
76 * @mask: IPv4 address mask
77 * @head: the list head
78 *
79 * Description:
80 * Searches the IPv4 address list given by @head. If an exact match if found
81 * it is returned, otherwise NULL is returned. The caller is responsible for
82 * calling the rcu_read_[un]lock() functions.
83 *
84 */
85struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
86 __be32 mask,
87 struct list_head *head)
88{
89 struct netlbl_af4list *iter;
90
91 list_for_each_entry_rcu(iter, head, list)
92 if (iter->valid && iter->addr == addr && iter->mask == mask)
93 return iter;
94
95 return NULL;
96}
97
98
99#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
100/**
101 * netlbl_af6list_search - Search for a matching IPv6 address entry
102 * @addr: IPv6 address
103 * @head: the list head
104 *
105 * Description:
106 * Searches the IPv6 address list given by @head. If a matching address entry
107 * is found it is returned, otherwise NULL is returned. The caller is
108 * responsible for calling the rcu_read_[un]lock() functions.
109 *
110 */
111struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
112 struct list_head *head)
113{
114 struct netlbl_af6list *iter;
115
116 list_for_each_entry_rcu(iter, head, list)
117 if (iter->valid &&
118 ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
119 return iter;
120
121 return NULL;
122}
123
124/**
125 * netlbl_af6list_search_exact - Search for an exact IPv6 address entry
126 * @addr: IPv6 address
127 * @mask: IPv6 address mask
128 * @head: the list head
129 *
130 * Description:
131 * Searches the IPv6 address list given by @head. If an exact match if found
132 * it is returned, otherwise NULL is returned. The caller is responsible for
133 * calling the rcu_read_[un]lock() functions.
134 *
135 */
136struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
137 const struct in6_addr *mask,
138 struct list_head *head)
139{
140 struct netlbl_af6list *iter;
141
142 list_for_each_entry_rcu(iter, head, list)
143 if (iter->valid &&
144 ipv6_addr_equal(&iter->addr, addr) &&
145 ipv6_addr_equal(&iter->mask, mask))
146 return iter;
147
148 return NULL;
149}
150#endif /* IPv6 */
151
152/**
153 * netlbl_af4list_add - Add a new IPv4 address entry to a list
154 * @entry: address entry
155 * @head: the list head
156 *
157 * Description:
158 * Add a new address entry to the list pointed to by @head. On success zero is
159 * returned, otherwise a negative value is returned. The caller is responsible
160 * for calling the necessary locking functions.
161 *
162 */
163int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
164{
165 struct netlbl_af4list *iter;
166
167 iter = netlbl_af4list_search(entry->addr, head);
168 if (iter != NULL &&
169 iter->addr == entry->addr && iter->mask == entry->mask)
170 return -EEXIST;
171
172 /* in order to speed up address searches through the list (the common
173 * case) we need to keep the list in order based on the size of the
174 * address mask such that the entry with the widest mask (smallest
175 * numerical value) appears first in the list */
176 list_for_each_entry_rcu(iter, head, list)
177 if (iter->valid &&
178 ntohl(entry->mask) > ntohl(iter->mask)) {
179 __list_add_rcu(&entry->list,
180 iter->list.prev,
181 &iter->list);
182 return 0;
183 }
184 list_add_tail_rcu(&entry->list, head);
185 return 0;
186}
187
188#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
189/**
190 * netlbl_af6list_add - Add a new IPv6 address entry to a list
191 * @entry: address entry
192 * @head: the list head
193 *
194 * Description:
195 * Add a new address entry to the list pointed to by @head. On success zero is
196 * returned, otherwise a negative value is returned. The caller is responsible
197 * for calling the necessary locking functions.
198 *
199 */
200int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head)
201{
202 struct netlbl_af6list *iter;
203
204 iter = netlbl_af6list_search(&entry->addr, head);
205 if (iter != NULL &&
206 ipv6_addr_equal(&iter->addr, &entry->addr) &&
207 ipv6_addr_equal(&iter->mask, &entry->mask))
208 return -EEXIST;
209
210 /* in order to speed up address searches through the list (the common
211 * case) we need to keep the list in order based on the size of the
212 * address mask such that the entry with the widest mask (smallest
213 * numerical value) appears first in the list */
214 list_for_each_entry_rcu(iter, head, list)
215 if (iter->valid &&
216 ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
217 __list_add_rcu(&entry->list,
218 iter->list.prev,
219 &iter->list);
220 return 0;
221 }
222 list_add_tail_rcu(&entry->list, head);
223 return 0;
224}
225#endif /* IPv6 */
226
227/**
228 * netlbl_af4list_remove_entry - Remove an IPv4 address entry
229 * @entry: address entry
230 *
231 * Description:
232 * Remove the specified IP address entry. The caller is responsible for
233 * calling the necessary locking functions.
234 *
235 */
236void netlbl_af4list_remove_entry(struct netlbl_af4list *entry)
237{
238 entry->valid = 0;
239 list_del_rcu(&entry->list);
240}
241
242/**
243 * netlbl_af4list_remove - Remove an IPv4 address entry
244 * @addr: IP address
245 * @mask: IP address mask
246 * @head: the list head
247 *
248 * Description:
249 * Remove an IP address entry from the list pointed to by @head. Returns the
250 * entry on success, NULL on failure. The caller is responsible for calling
251 * the necessary locking functions.
252 *
253 */
254struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
255 struct list_head *head)
256{
257 struct netlbl_af4list *entry;
258
259 entry = netlbl_af4list_search(addr, head);
260 if (entry != NULL && entry->addr == addr && entry->mask == mask) {
261 netlbl_af4list_remove_entry(entry);
262 return entry;
263 }
264
265 return NULL;
266}
267
268#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
269/**
270 * netlbl_af6list_remove_entry - Remove an IPv6 address entry
271 * @entry: address entry
272 *
273 * Description:
274 * Remove the specified IP address entry. The caller is responsible for
275 * calling the necessary locking functions.
276 *
277 */
278void netlbl_af6list_remove_entry(struct netlbl_af6list *entry)
279{
280 entry->valid = 0;
281 list_del_rcu(&entry->list);
282}
283
284/**
285 * netlbl_af6list_remove - Remove an IPv6 address entry
286 * @addr: IP address
287 * @mask: IP address mask
288 * @head: the list head
289 *
290 * Description:
291 * Remove an IP address entry from the list pointed to by @head. Returns the
292 * entry on success, NULL on failure. The caller is responsible for calling
293 * the necessary locking functions.
294 *
295 */
296struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
297 const struct in6_addr *mask,
298 struct list_head *head)
299{
300 struct netlbl_af6list *entry;
301
302 entry = netlbl_af6list_search(addr, head);
303 if (entry != NULL &&
304 ipv6_addr_equal(&entry->addr, addr) &&
305 ipv6_addr_equal(&entry->mask, mask)) {
306 netlbl_af6list_remove_entry(entry);
307 return entry;
308 }
309
310 return NULL;
311}
312#endif /* IPv6 */
313
314/*
315 * Audit Helper Functions
316 */
317
318/**
319 * netlbl_af4list_audit_addr - Audit an IPv4 address
320 * @audit_buf: audit buffer
321 * @src: true if source address, false if destination
322 * @dev: network interface
323 * @addr: IP address
324 * @mask: IP address mask
325 *
326 * Description:
327 * Write the IPv4 address and address mask, if necessary, to @audit_buf.
328 *
329 */
330void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
331 int src, const char *dev,
332 __be32 addr, __be32 mask)
333{
334 u32 mask_val = ntohl(mask);
335 char *dir = (src ? "src" : "dst");
336
337 if (dev != NULL)
338 audit_log_format(audit_buf, " netif=%s", dev);
339 audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr));
340 if (mask_val != 0xffffffff) {
341 u32 mask_len = 0;
342 while (mask_val > 0) {
343 mask_val <<= 1;
344 mask_len++;
345 }
346 audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
347 }
348}
349
350#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
351/**
352 * netlbl_af6list_audit_addr - Audit an IPv6 address
353 * @audit_buf: audit buffer
354 * @src: true if source address, false if destination
355 * @dev: network interface
356 * @addr: IP address
357 * @mask: IP address mask
358 *
359 * Description:
360 * Write the IPv6 address and address mask, if necessary, to @audit_buf.
361 *
362 */
363void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
364 int src,
365 const char *dev,
366 const struct in6_addr *addr,
367 const struct in6_addr *mask)
368{
369 char *dir = (src ? "src" : "dst");
370
371 if (dev != NULL)
372 audit_log_format(audit_buf, " netif=%s", dev);
373 audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr));
374 if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
375 u32 mask_len = 0;
376 u32 mask_val;
377 int iter = -1;
378 while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
379 mask_len += 32;
380 mask_val = ntohl(mask->s6_addr32[iter]);
381 while (mask_val > 0) {
382 mask_val <<= 1;
383 mask_len++;
384 }
385 audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len);
386 }
387}
388#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
new file mode 100644
index 000000000000..0242bead405f
--- /dev/null
+++ b/net/netlabel/netlabel_addrlist.h
@@ -0,0 +1,189 @@
1/*
2 * NetLabel Network Address Lists
3 *
4 * This file contains network address list functions used to manage ordered
5 * lists of network addresses for use by the NetLabel subsystem. The NetLabel
6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO.
8 *
9 * Author: Paul Moore <paul.moore@hp.com>
10 *
11 */
12
13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
24 * the GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31
32#ifndef _NETLABEL_ADDRLIST_H
33#define _NETLABEL_ADDRLIST_H
34
35#include <linux/types.h>
36#include <linux/rcupdate.h>
37#include <linux/list.h>
38#include <linux/in6.h>
39#include <linux/audit.h>
40
41/**
42 * struct netlbl_af4list - NetLabel IPv4 address list
43 * @addr: IPv4 address
44 * @mask: IPv4 address mask
45 * @valid: valid flag
46 * @list: list structure, used internally
47 */
48struct netlbl_af4list {
49 __be32 addr;
50 __be32 mask;
51
52 u32 valid;
53 struct list_head list;
54};
55
56/**
57 * struct netlbl_af6list - NetLabel IPv6 address list
58 * @addr: IPv6 address
59 * @mask: IPv6 address mask
60 * @valid: valid flag
61 * @list: list structure, used internally
62 */
63struct netlbl_af6list {
64 struct in6_addr addr;
65 struct in6_addr mask;
66
67 u32 valid;
68 struct list_head list;
69};
70
71#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list)
72
73static inline struct netlbl_af4list *__af4list_valid(struct list_head *s,
74 struct list_head *h)
75{
76 struct list_head *i = s;
77 struct netlbl_af4list *n = __af4list_entry(s);
78 while (i != h && !n->valid) {
79 i = i->next;
80 n = __af4list_entry(i);
81 }
82 return n;
83}
84
85static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
86 struct list_head *h)
87{
88 struct list_head *i = s;
89 struct netlbl_af4list *n = __af4list_entry(s);
90 while (i != h && !n->valid) {
91 i = rcu_dereference(i->next);
92 n = __af4list_entry(i);
93 }
94 return n;
95}
96
97#define netlbl_af4list_foreach(iter, head) \
98 for (iter = __af4list_valid((head)->next, head); \
99 prefetch(iter->list.next), &iter->list != (head); \
100 iter = __af4list_valid(iter->list.next, head))
101
102#define netlbl_af4list_foreach_rcu(iter, head) \
103 for (iter = __af4list_valid_rcu((head)->next, head); \
104 prefetch(iter->list.next), &iter->list != (head); \
105 iter = __af4list_valid_rcu(iter->list.next, head))
106
107#define netlbl_af4list_foreach_safe(iter, tmp, head) \
108 for (iter = __af4list_valid((head)->next, head), \
109 tmp = __af4list_valid(iter->list.next, head); \
110 &iter->list != (head); \
111 iter = tmp, tmp = __af4list_valid(iter->list.next, head))
112
113int netlbl_af4list_add(struct netlbl_af4list *entry,
114 struct list_head *head);
115struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
116 struct list_head *head);
117void netlbl_af4list_remove_entry(struct netlbl_af4list *entry);
118struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
119 struct list_head *head);
120struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
121 __be32 mask,
122 struct list_head *head);
123void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
124 int src, const char *dev,
125 __be32 addr, __be32 mask);
126
127#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
128
129#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
130
131static inline struct netlbl_af6list *__af6list_valid(struct list_head *s,
132 struct list_head *h)
133{
134 struct list_head *i = s;
135 struct netlbl_af6list *n = __af6list_entry(s);
136 while (i != h && !n->valid) {
137 i = i->next;
138 n = __af6list_entry(i);
139 }
140 return n;
141}
142
143static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
144 struct list_head *h)
145{
146 struct list_head *i = s;
147 struct netlbl_af6list *n = __af6list_entry(s);
148 while (i != h && !n->valid) {
149 i = rcu_dereference(i->next);
150 n = __af6list_entry(i);
151 }
152 return n;
153}
154
155#define netlbl_af6list_foreach(iter, head) \
156 for (iter = __af6list_valid((head)->next, head); \
157 prefetch(iter->list.next), &iter->list != (head); \
158 iter = __af6list_valid(iter->list.next, head))
159
160#define netlbl_af6list_foreach_rcu(iter, head) \
161 for (iter = __af6list_valid_rcu((head)->next, head); \
162 prefetch(iter->list.next), &iter->list != (head); \
163 iter = __af6list_valid_rcu(iter->list.next, head))
164
165#define netlbl_af6list_foreach_safe(iter, tmp, head) \
166 for (iter = __af6list_valid((head)->next, head), \
167 tmp = __af6list_valid(iter->list.next, head); \
168 &iter->list != (head); \
169 iter = tmp, tmp = __af6list_valid(iter->list.next, head))
170
171int netlbl_af6list_add(struct netlbl_af6list *entry,
172 struct list_head *head);
173struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
174 const struct in6_addr *mask,
175 struct list_head *head);
176void netlbl_af6list_remove_entry(struct netlbl_af6list *entry);
177struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
178 struct list_head *head);
179struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
180 const struct in6_addr *mask,
181 struct list_head *head);
182void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
183 int src,
184 const char *dev,
185 const struct in6_addr *addr,
186 const struct in6_addr *mask);
187#endif /* IPV6 */
188
189#endif
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 0aec318bf0ef..fff32b70efa9 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -43,6 +43,7 @@
43#include "netlabel_user.h" 43#include "netlabel_user.h"
44#include "netlabel_cipso_v4.h" 44#include "netlabel_cipso_v4.h"
45#include "netlabel_mgmt.h" 45#include "netlabel_mgmt.h"
46#include "netlabel_domainhash.h"
46 47
47/* Argument struct for cipso_v4_doi_walk() */ 48/* Argument struct for cipso_v4_doi_walk() */
48struct netlbl_cipsov4_doiwalk_arg { 49struct netlbl_cipsov4_doiwalk_arg {
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg {
51 u32 seq; 52 u32 seq;
52}; 53};
53 54
55/* Argument struct for netlbl_domhsh_walk() */
56struct netlbl_domhsh_walk_arg {
57 struct netlbl_audit *audit_info;
58 u32 doi;
59};
60
54/* NetLabel Generic NETLINK CIPSOv4 family */ 61/* NetLabel Generic NETLINK CIPSOv4 family */
55static struct genl_family netlbl_cipsov4_gnl_family = { 62static struct genl_family netlbl_cipsov4_gnl_family = {
56 .id = GENL_ID_GENERATE, 63 .id = GENL_ID_GENERATE,
@@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1
81 */ 88 */
82 89
83/** 90/**
84 * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
85 * @entry: the entry's RCU field
86 *
87 * Description:
88 * This function is designed to be used as a callback to the call_rcu()
89 * function so that the memory allocated to the DOI definition can be released
90 * safely.
91 *
92 */
93void netlbl_cipsov4_doi_free(struct rcu_head *entry)
94{
95 struct cipso_v4_doi *ptr;
96
97 ptr = container_of(entry, struct cipso_v4_doi, rcu);
98 switch (ptr->type) {
99 case CIPSO_V4_MAP_STD:
100 kfree(ptr->map.std->lvl.cipso);
101 kfree(ptr->map.std->lvl.local);
102 kfree(ptr->map.std->cat.cipso);
103 kfree(ptr->map.std->cat.local);
104 break;
105 }
106 kfree(ptr);
107}
108
109/**
110 * netlbl_cipsov4_add_common - Parse the common sections of a ADD message 91 * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
111 * @info: the Generic NETLINK info block 92 * @info: the Generic NETLINK info block
112 * @doi_def: the CIPSO V4 DOI definition 93 * @doi_def: the CIPSO V4 DOI definition
@@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
151 * @info: the Generic NETLINK info block 132 * @info: the Generic NETLINK info block
152 * 133 *
153 * Description: 134 * Description:
154 * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message 135 * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD
155 * and add it to the CIPSO V4 engine. Return zero on success and non-zero on 136 * message and add it to the CIPSO V4 engine. Return zero on success and
156 * error. 137 * non-zero on error.
157 * 138 *
158 */ 139 */
159static int netlbl_cipsov4_add_std(struct genl_info *info) 140static int netlbl_cipsov4_add_std(struct genl_info *info)
@@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
183 ret_val = -ENOMEM; 164 ret_val = -ENOMEM;
184 goto add_std_failure; 165 goto add_std_failure;
185 } 166 }
186 doi_def->type = CIPSO_V4_MAP_STD; 167 doi_def->type = CIPSO_V4_MAP_TRANS;
187 168
188 ret_val = netlbl_cipsov4_add_common(info, doi_def); 169 ret_val = netlbl_cipsov4_add_common(info, doi_def);
189 if (ret_val != 0) 170 if (ret_val != 0)
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
342 323
343add_std_failure: 324add_std_failure:
344 if (doi_def) 325 if (doi_def)
345 netlbl_cipsov4_doi_free(&doi_def->rcu); 326 cipso_v4_doi_free(doi_def);
346 return ret_val; 327 return ret_val;
347} 328}
348 329
@@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
379 return 0; 360 return 0;
380 361
381add_pass_failure: 362add_pass_failure:
382 netlbl_cipsov4_doi_free(&doi_def->rcu); 363 cipso_v4_doi_free(doi_def);
364 return ret_val;
365}
366
367/**
368 * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition
369 * @info: the Generic NETLINK info block
370 *
371 * Description:
372 * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD
373 * message and add it to the CIPSO V4 engine. Return zero on success and
374 * non-zero on error.
375 *
376 */
377static int netlbl_cipsov4_add_local(struct genl_info *info)
378{
379 int ret_val;
380 struct cipso_v4_doi *doi_def = NULL;
381
382 if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
383 return -EINVAL;
384
385 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
386 if (doi_def == NULL)
387 return -ENOMEM;
388 doi_def->type = CIPSO_V4_MAP_LOCAL;
389
390 ret_val = netlbl_cipsov4_add_common(info, doi_def);
391 if (ret_val != 0)
392 goto add_local_failure;
393
394 ret_val = cipso_v4_doi_add(doi_def);
395 if (ret_val != 0)
396 goto add_local_failure;
397 return 0;
398
399add_local_failure:
400 cipso_v4_doi_free(doi_def);
383 return ret_val; 401 return ret_val;
384} 402}
385 403
@@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
412 430
413 type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); 431 type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
414 switch (type) { 432 switch (type) {
415 case CIPSO_V4_MAP_STD: 433 case CIPSO_V4_MAP_TRANS:
416 type_str = "std"; 434 type_str = "trans";
417 ret_val = netlbl_cipsov4_add_std(info); 435 ret_val = netlbl_cipsov4_add_std(info);
418 break; 436 break;
419 case CIPSO_V4_MAP_PASS: 437 case CIPSO_V4_MAP_PASS:
420 type_str = "pass"; 438 type_str = "pass";
421 ret_val = netlbl_cipsov4_add_pass(info); 439 ret_val = netlbl_cipsov4_add_pass(info);
422 break; 440 break;
441 case CIPSO_V4_MAP_LOCAL:
442 type_str = "local";
443 ret_val = netlbl_cipsov4_add_local(info);
444 break;
423 } 445 }
424 if (ret_val == 0) 446 if (ret_val == 0)
425 atomic_inc(&netlabel_mgmt_protocount); 447 atomic_inc(&netlabel_mgmt_protocount);
@@ -491,7 +513,7 @@ list_start:
491 doi_def = cipso_v4_doi_getdef(doi); 513 doi_def = cipso_v4_doi_getdef(doi);
492 if (doi_def == NULL) { 514 if (doi_def == NULL) {
493 ret_val = -EINVAL; 515 ret_val = -EINVAL;
494 goto list_failure; 516 goto list_failure_lock;
495 } 517 }
496 518
497 ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); 519 ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
@@ -516,7 +538,7 @@ list_start:
516 nla_nest_end(ans_skb, nla_a); 538 nla_nest_end(ans_skb, nla_a);
517 539
518 switch (doi_def->type) { 540 switch (doi_def->type) {
519 case CIPSO_V4_MAP_STD: 541 case CIPSO_V4_MAP_TRANS:
520 nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); 542 nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
521 if (nla_a == NULL) { 543 if (nla_a == NULL) {
522 ret_val = -ENOMEM; 544 ret_val = -ENOMEM;
@@ -655,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
655 struct netlink_callback *cb) 677 struct netlink_callback *cb)
656{ 678{
657 struct netlbl_cipsov4_doiwalk_arg cb_arg; 679 struct netlbl_cipsov4_doiwalk_arg cb_arg;
658 int doi_skip = cb->args[0]; 680 u32 doi_skip = cb->args[0];
659 681
660 cb_arg.nl_cb = cb; 682 cb_arg.nl_cb = cb;
661 cb_arg.skb = skb; 683 cb_arg.skb = skb;
@@ -668,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
668} 690}
669 691
670/** 692/**
693 * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE
694 * @entry: LSM domain mapping entry
695 * @arg: the netlbl_domhsh_walk_arg structure
696 *
697 * Description:
698 * This function is intended for use by netlbl_cipsov4_remove() as the callback
699 * for the netlbl_domhsh_walk() function; it removes LSM domain map entries
700 * which are associated with the CIPSO DOI specified in @arg. Returns zero on
701 * success, negative values on failure.
702 *
703 */
704static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
705{
706 struct netlbl_domhsh_walk_arg *cb_arg = arg;
707
708 if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
709 entry->type_def.cipsov4->doi == cb_arg->doi)
710 return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
711
712 return 0;
713}
714
715/**
671 * netlbl_cipsov4_remove - Handle a REMOVE message 716 * netlbl_cipsov4_remove - Handle a REMOVE message
672 * @skb: the NETLINK buffer 717 * @skb: the NETLINK buffer
673 * @info: the Generic NETLINK info block 718 * @info: the Generic NETLINK info block
@@ -681,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
681{ 726{
682 int ret_val = -EINVAL; 727 int ret_val = -EINVAL;
683 u32 doi = 0; 728 u32 doi = 0;
729 struct netlbl_domhsh_walk_arg cb_arg;
684 struct audit_buffer *audit_buf; 730 struct audit_buffer *audit_buf;
685 struct netlbl_audit audit_info; 731 struct netlbl_audit audit_info;
732 u32 skip_bkt = 0;
733 u32 skip_chain = 0;
686 734
687 if (!info->attrs[NLBL_CIPSOV4_A_DOI]) 735 if (!info->attrs[NLBL_CIPSOV4_A_DOI])
688 return -EINVAL; 736 return -EINVAL;
@@ -690,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
690 doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); 738 doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
691 netlbl_netlink_auditinfo(skb, &audit_info); 739 netlbl_netlink_auditinfo(skb, &audit_info);
692 740
693 ret_val = cipso_v4_doi_remove(doi, 741 cb_arg.doi = doi;
694 &audit_info, 742 cb_arg.audit_info = &audit_info;
695 netlbl_cipsov4_doi_free); 743 ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
696 if (ret_val == 0) 744 netlbl_cipsov4_remove_cb, &cb_arg);
697 atomic_dec(&netlabel_mgmt_protocount); 745 if (ret_val == 0 || ret_val == -ENOENT) {
746 ret_val = cipso_v4_doi_remove(doi, &audit_info);
747 if (ret_val == 0)
748 atomic_dec(&netlabel_mgmt_protocount);
749 }
698 750
699 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, 751 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
700 &audit_info); 752 &audit_info);
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 220cb9d06b49..c8a4079261f0 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -45,12 +45,13 @@
45 * NLBL_CIPSOV4_A_MTYPE 45 * NLBL_CIPSOV4_A_MTYPE
46 * NLBL_CIPSOV4_A_TAGLST 46 * NLBL_CIPSOV4_A_TAGLST
47 * 47 *
48 * If using CIPSO_V4_MAP_STD the following attributes are required: 48 * If using CIPSO_V4_MAP_TRANS the following attributes are required:
49 * 49 *
50 * NLBL_CIPSOV4_A_MLSLVLLST 50 * NLBL_CIPSOV4_A_MLSLVLLST
51 * NLBL_CIPSOV4_A_MLSCATLST 51 * NLBL_CIPSOV4_A_MLSCATLST
52 * 52 *
53 * If using CIPSO_V4_MAP_PASS no additional attributes are required. 53 * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
54 * are required.
54 * 55 *
55 * o REMOVE: 56 * o REMOVE:
56 * Sent by an application to remove a specific DOI mapping table from the 57 * Sent by an application to remove a specific DOI mapping table from the
@@ -76,12 +77,13 @@
76 * NLBL_CIPSOV4_A_MTYPE 77 * NLBL_CIPSOV4_A_MTYPE
77 * NLBL_CIPSOV4_A_TAGLST 78 * NLBL_CIPSOV4_A_TAGLST
78 * 79 *
79 * If using CIPSO_V4_MAP_STD the following attributes are required: 80 * If using CIPSO_V4_MAP_TRANS the following attributes are required:
80 * 81 *
81 * NLBL_CIPSOV4_A_MLSLVLLST 82 * NLBL_CIPSOV4_A_MLSLVLLST
82 * NLBL_CIPSOV4_A_MLSCATLST 83 * NLBL_CIPSOV4_A_MLSCATLST
83 * 84 *
84 * If using CIPSO_V4_MAP_PASS no additional attributes are required. 85 * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes
86 * are required.
85 * 87 *
86 * o LISTALL: 88 * o LISTALL:
87 * This message is sent by an application to list the valid DOIs on the 89 * This message is sent by an application to list the valid DOIs on the
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 643c032a3a57..5fadf10e5ddf 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -11,7 +11,7 @@
11 */ 11 */
12 12
13/* 13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
15 * 15 *
16 * This program is free software; you can redistribute it and/or modify 16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by 17 * it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@
40#include <asm/bug.h> 40#include <asm/bug.h>
41 41
42#include "netlabel_mgmt.h" 42#include "netlabel_mgmt.h"
43#include "netlabel_addrlist.h"
43#include "netlabel_domainhash.h" 44#include "netlabel_domainhash.h"
44#include "netlabel_user.h" 45#include "netlabel_user.h"
45 46
@@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
72static void netlbl_domhsh_free_entry(struct rcu_head *entry) 73static void netlbl_domhsh_free_entry(struct rcu_head *entry)
73{ 74{
74 struct netlbl_dom_map *ptr; 75 struct netlbl_dom_map *ptr;
76 struct netlbl_af4list *iter4;
77 struct netlbl_af4list *tmp4;
78#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
79 struct netlbl_af6list *iter6;
80 struct netlbl_af6list *tmp6;
81#endif /* IPv6 */
75 82
76 ptr = container_of(entry, struct netlbl_dom_map, rcu); 83 ptr = container_of(entry, struct netlbl_dom_map, rcu);
84 if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) {
85 netlbl_af4list_foreach_safe(iter4, tmp4,
86 &ptr->type_def.addrsel->list4) {
87 netlbl_af4list_remove_entry(iter4);
88 kfree(netlbl_domhsh_addr4_entry(iter4));
89 }
90#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
91 netlbl_af6list_foreach_safe(iter6, tmp6,
92 &ptr->type_def.addrsel->list6) {
93 netlbl_af6list_remove_entry(iter6);
94 kfree(netlbl_domhsh_addr6_entry(iter6));
95 }
96#endif /* IPv6 */
97 }
77 kfree(ptr->domain); 98 kfree(ptr->domain);
78 kfree(ptr); 99 kfree(ptr);
79} 100}
@@ -115,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key)
115static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) 136static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
116{ 137{
117 u32 bkt; 138 u32 bkt;
139 struct list_head *bkt_list;
118 struct netlbl_dom_map *iter; 140 struct netlbl_dom_map *iter;
119 141
120 if (domain != NULL) { 142 if (domain != NULL) {
121 bkt = netlbl_domhsh_hash(domain); 143 bkt = netlbl_domhsh_hash(domain);
122 list_for_each_entry_rcu(iter, 144 bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt];
123 &rcu_dereference(netlbl_domhsh)->tbl[bkt], 145 list_for_each_entry_rcu(iter, bkt_list, list)
124 list)
125 if (iter->valid && strcmp(iter->domain, domain) == 0) 146 if (iter->valid && strcmp(iter->domain, domain) == 0)
126 return iter; 147 return iter;
127 } 148 }
@@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
156 return entry; 177 return entry;
157} 178}
158 179
180/**
181 * netlbl_domhsh_audit_add - Generate an audit entry for an add event
182 * @entry: the entry being added
183 * @addr4: the IPv4 address information
184 * @addr6: the IPv6 address information
185 * @result: the result code
186 * @audit_info: NetLabel audit information
187 *
188 * Description:
189 * Generate an audit record for adding a new NetLabel/LSM mapping entry with
190 * the given information. Caller is responsibile for holding the necessary
191 * locks.
192 *
193 */
194static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
195 struct netlbl_af4list *addr4,
196 struct netlbl_af6list *addr6,
197 int result,
198 struct netlbl_audit *audit_info)
199{
200 struct audit_buffer *audit_buf;
201 struct cipso_v4_doi *cipsov4 = NULL;
202 u32 type;
203
204 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
205 if (audit_buf != NULL) {
206 audit_log_format(audit_buf, " nlbl_domain=%s",
207 entry->domain ? entry->domain : "(default)");
208 if (addr4 != NULL) {
209 struct netlbl_domaddr4_map *map4;
210 map4 = netlbl_domhsh_addr4_entry(addr4);
211 type = map4->type;
212 cipsov4 = map4->type_def.cipsov4;
213 netlbl_af4list_audit_addr(audit_buf, 0, NULL,
214 addr4->addr, addr4->mask);
215#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
216 } else if (addr6 != NULL) {
217 struct netlbl_domaddr6_map *map6;
218 map6 = netlbl_domhsh_addr6_entry(addr6);
219 type = map6->type;
220 netlbl_af6list_audit_addr(audit_buf, 0, NULL,
221 &addr6->addr, &addr6->mask);
222#endif /* IPv6 */
223 } else {
224 type = entry->type;
225 cipsov4 = entry->type_def.cipsov4;
226 }
227 switch (type) {
228 case NETLBL_NLTYPE_UNLABELED:
229 audit_log_format(audit_buf, " nlbl_protocol=unlbl");
230 break;
231 case NETLBL_NLTYPE_CIPSOV4:
232 BUG_ON(cipsov4 == NULL);
233 audit_log_format(audit_buf,
234 " nlbl_protocol=cipsov4 cipso_doi=%u",
235 cipsov4->doi);
236 break;
237 }
238 audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0);
239 audit_log_end(audit_buf);
240 }
241}
242
159/* 243/*
160 * Domain Hash Table Functions 244 * Domain Hash Table Functions
161 */ 245 */
@@ -213,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size)
213int netlbl_domhsh_add(struct netlbl_dom_map *entry, 297int netlbl_domhsh_add(struct netlbl_dom_map *entry,
214 struct netlbl_audit *audit_info) 298 struct netlbl_audit *audit_info)
215{ 299{
216 int ret_val; 300 int ret_val = 0;
217 u32 bkt; 301 struct netlbl_dom_map *entry_old;
218 struct audit_buffer *audit_buf; 302 struct netlbl_af4list *iter4;
219 303 struct netlbl_af4list *tmp4;
220 switch (entry->type) { 304#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
221 case NETLBL_NLTYPE_UNLABELED: 305 struct netlbl_af6list *iter6;
222 ret_val = 0; 306 struct netlbl_af6list *tmp6;
223 break; 307#endif /* IPv6 */
224 case NETLBL_NLTYPE_CIPSOV4:
225 ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
226 entry->domain);
227 break;
228 default:
229 return -EINVAL;
230 }
231 if (ret_val != 0)
232 return ret_val;
233
234 entry->valid = 1;
235 INIT_RCU_HEAD(&entry->rcu);
236 308
237 rcu_read_lock(); 309 rcu_read_lock();
310
238 spin_lock(&netlbl_domhsh_lock); 311 spin_lock(&netlbl_domhsh_lock);
239 if (entry->domain != NULL) { 312 if (entry->domain != NULL)
240 bkt = netlbl_domhsh_hash(entry->domain); 313 entry_old = netlbl_domhsh_search(entry->domain);
241 if (netlbl_domhsh_search(entry->domain) == NULL) 314 else
315 entry_old = netlbl_domhsh_search_def(entry->domain);
316 if (entry_old == NULL) {
317 entry->valid = 1;
318 INIT_RCU_HEAD(&entry->rcu);
319
320 if (entry->domain != NULL) {
321 u32 bkt = netlbl_domhsh_hash(entry->domain);
242 list_add_tail_rcu(&entry->list, 322 list_add_tail_rcu(&entry->list,
243 &rcu_dereference(netlbl_domhsh)->tbl[bkt]); 323 &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
244 else 324 } else {
245 ret_val = -EEXIST; 325 INIT_LIST_HEAD(&entry->list);
246 } else {
247 INIT_LIST_HEAD(&entry->list);
248 if (rcu_dereference(netlbl_domhsh_def) == NULL)
249 rcu_assign_pointer(netlbl_domhsh_def, entry); 326 rcu_assign_pointer(netlbl_domhsh_def, entry);
250 else
251 ret_val = -EEXIST;
252 }
253 spin_unlock(&netlbl_domhsh_lock);
254 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
255 if (audit_buf != NULL) {
256 audit_log_format(audit_buf,
257 " nlbl_domain=%s",
258 entry->domain ? entry->domain : "(default)");
259 switch (entry->type) {
260 case NETLBL_NLTYPE_UNLABELED:
261 audit_log_format(audit_buf, " nlbl_protocol=unlbl");
262 break;
263 case NETLBL_NLTYPE_CIPSOV4:
264 audit_log_format(audit_buf,
265 " nlbl_protocol=cipsov4 cipso_doi=%u",
266 entry->type_def.cipsov4->doi);
267 break;
268 } 327 }
269 audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
270 audit_log_end(audit_buf);
271 }
272 rcu_read_unlock();
273 328
274 if (ret_val != 0) { 329 if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
275 switch (entry->type) { 330 netlbl_af4list_foreach_rcu(iter4,
276 case NETLBL_NLTYPE_CIPSOV4: 331 &entry->type_def.addrsel->list4)
277 if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, 332 netlbl_domhsh_audit_add(entry, iter4, NULL,
278 entry->domain) != 0) 333 ret_val, audit_info);
279 BUG(); 334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
280 break; 335 netlbl_af6list_foreach_rcu(iter6,
336 &entry->type_def.addrsel->list6)
337 netlbl_domhsh_audit_add(entry, NULL, iter6,
338 ret_val, audit_info);
339#endif /* IPv6 */
340 } else
341 netlbl_domhsh_audit_add(entry, NULL, NULL,
342 ret_val, audit_info);
343 } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT &&
344 entry->type == NETLBL_NLTYPE_ADDRSELECT) {
345 struct list_head *old_list4;
346 struct list_head *old_list6;
347
348 old_list4 = &entry_old->type_def.addrsel->list4;
349 old_list6 = &entry_old->type_def.addrsel->list6;
350
351 /* we only allow the addition of address selectors if all of
352 * the selectors do not exist in the existing domain map */
353 netlbl_af4list_foreach_rcu(iter4,
354 &entry->type_def.addrsel->list4)
355 if (netlbl_af4list_search_exact(iter4->addr,
356 iter4->mask,
357 old_list4)) {
358 ret_val = -EEXIST;
359 goto add_return;
360 }
361#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
362 netlbl_af6list_foreach_rcu(iter6,
363 &entry->type_def.addrsel->list6)
364 if (netlbl_af6list_search_exact(&iter6->addr,
365 &iter6->mask,
366 old_list6)) {
367 ret_val = -EEXIST;
368 goto add_return;
369 }
370#endif /* IPv6 */
371
372 netlbl_af4list_foreach_safe(iter4, tmp4,
373 &entry->type_def.addrsel->list4) {
374 netlbl_af4list_remove_entry(iter4);
375 iter4->valid = 1;
376 ret_val = netlbl_af4list_add(iter4, old_list4);
377 netlbl_domhsh_audit_add(entry_old, iter4, NULL,
378 ret_val, audit_info);
379 if (ret_val != 0)
380 goto add_return;
281 } 381 }
282 } 382#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
383 netlbl_af6list_foreach_safe(iter6, tmp6,
384 &entry->type_def.addrsel->list6) {
385 netlbl_af6list_remove_entry(iter6);
386 iter6->valid = 1;
387 ret_val = netlbl_af6list_add(iter6, old_list6);
388 netlbl_domhsh_audit_add(entry_old, NULL, iter6,
389 ret_val, audit_info);
390 if (ret_val != 0)
391 goto add_return;
392 }
393#endif /* IPv6 */
394 } else
395 ret_val = -EINVAL;
283 396
397add_return:
398 spin_unlock(&netlbl_domhsh_lock);
399 rcu_read_unlock();
284 return ret_val; 400 return ret_val;
285} 401}
286 402
@@ -302,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
302} 418}
303 419
304/** 420/**
305 * netlbl_domhsh_remove - Removes an entry from the domain hash table 421 * netlbl_domhsh_remove_entry - Removes a given entry from the domain table
306 * @domain: the domain to remove 422 * @entry: the entry to remove
307 * @audit_info: NetLabel audit information 423 * @audit_info: NetLabel audit information
308 * 424 *
309 * Description: 425 * Description:
310 * Removes an entry from the domain hash table and handles any updates to the 426 * Removes an entry from the domain hash table and handles any updates to the
311 * lower level protocol handler (i.e. CIPSO). Returns zero on success, 427 * lower level protocol handler (i.e. CIPSO). Caller is responsible for
312 * negative on failure. 428 * ensuring that the RCU read lock is held. Returns zero on success, negative
429 * on failure.
313 * 430 *
314 */ 431 */
315int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) 432int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
433 struct netlbl_audit *audit_info)
316{ 434{
317 int ret_val = -ENOENT; 435 int ret_val = 0;
318 struct netlbl_dom_map *entry;
319 struct audit_buffer *audit_buf; 436 struct audit_buffer *audit_buf;
320 437
321 rcu_read_lock();
322 if (domain)
323 entry = netlbl_domhsh_search(domain);
324 else
325 entry = netlbl_domhsh_search_def(domain);
326 if (entry == NULL) 438 if (entry == NULL)
327 goto remove_return; 439 return -ENOENT;
328 switch (entry->type) { 440
329 case NETLBL_NLTYPE_CIPSOV4:
330 cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
331 entry->domain);
332 break;
333 }
334 spin_lock(&netlbl_domhsh_lock); 441 spin_lock(&netlbl_domhsh_lock);
335 if (entry->valid) { 442 if (entry->valid) {
336 entry->valid = 0; 443 entry->valid = 0;
@@ -338,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
338 list_del_rcu(&entry->list); 445 list_del_rcu(&entry->list);
339 else 446 else
340 rcu_assign_pointer(netlbl_domhsh_def, NULL); 447 rcu_assign_pointer(netlbl_domhsh_def, NULL);
341 ret_val = 0; 448 } else
342 } 449 ret_val = -ENOENT;
343 spin_unlock(&netlbl_domhsh_lock); 450 spin_unlock(&netlbl_domhsh_lock);
344 451
345 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); 452 audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
@@ -351,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
351 audit_log_end(audit_buf); 458 audit_log_end(audit_buf);
352 } 459 }
353 460
354remove_return: 461 if (ret_val == 0) {
355 rcu_read_unlock(); 462 struct netlbl_af4list *iter4;
356 if (ret_val == 0) 463 struct netlbl_domaddr4_map *map4;
464
465 switch (entry->type) {
466 case NETLBL_NLTYPE_ADDRSELECT:
467 netlbl_af4list_foreach_rcu(iter4,
468 &entry->type_def.addrsel->list4) {
469 map4 = netlbl_domhsh_addr4_entry(iter4);
470 cipso_v4_doi_putdef(map4->type_def.cipsov4);
471 }
472 /* no need to check the IPv6 list since we currently
473 * support only unlabeled protocols for IPv6 */
474 break;
475 case NETLBL_NLTYPE_CIPSOV4:
476 cipso_v4_doi_putdef(entry->type_def.cipsov4);
477 break;
478 }
357 call_rcu(&entry->rcu, netlbl_domhsh_free_entry); 479 call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
480 }
481
482 return ret_val;
483}
484
485/**
486 * netlbl_domhsh_remove - Removes an entry from the domain hash table
487 * @domain: the domain to remove
488 * @audit_info: NetLabel audit information
489 *
490 * Description:
491 * Removes an entry from the domain hash table and handles any updates to the
492 * lower level protocol handler (i.e. CIPSO). Returns zero on success,
493 * negative on failure.
494 *
495 */
496int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
497{
498 int ret_val;
499 struct netlbl_dom_map *entry;
500
501 rcu_read_lock();
502 if (domain)
503 entry = netlbl_domhsh_search(domain);
504 else
505 entry = netlbl_domhsh_search_def(domain);
506 ret_val = netlbl_domhsh_remove_entry(entry, audit_info);
507 rcu_read_unlock();
508
358 return ret_val; 509 return ret_val;
359} 510}
360 511
@@ -389,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
389} 540}
390 541
391/** 542/**
543 * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table
544 * @domain: the domain name to search for
545 * @addr: the IP address to search for
546 *
547 * Description:
548 * Look through the domain hash table searching for an entry to match @domain
549 * and @addr, return a pointer to a copy of the entry or NULL. The caller is
550 * responsible for ensuring that rcu_read_[un]lock() is called.
551 *
552 */
553struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
554 __be32 addr)
555{
556 struct netlbl_dom_map *dom_iter;
557 struct netlbl_af4list *addr_iter;
558
559 dom_iter = netlbl_domhsh_search_def(domain);
560 if (dom_iter == NULL)
561 return NULL;
562 if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
563 return NULL;
564
565 addr_iter = netlbl_af4list_search(addr,
566 &dom_iter->type_def.addrsel->list4);
567 if (addr_iter == NULL)
568 return NULL;
569
570 return netlbl_domhsh_addr4_entry(addr_iter);
571}
572
573#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
574/**
575 * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
576 * @domain: the domain name to search for
577 * @addr: the IP address to search for
578 *
579 * Description:
580 * Look through the domain hash table searching for an entry to match @domain
581 * and @addr, return a pointer to a copy of the entry or NULL. The caller is
582 * responsible for ensuring that rcu_read_[un]lock() is called.
583 *
584 */
585struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
586 const struct in6_addr *addr)
587{
588 struct netlbl_dom_map *dom_iter;
589 struct netlbl_af6list *addr_iter;
590
591 dom_iter = netlbl_domhsh_search_def(domain);
592 if (dom_iter == NULL)
593 return NULL;
594 if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
595 return NULL;
596
597 addr_iter = netlbl_af6list_search(addr,
598 &dom_iter->type_def.addrsel->list6);
599 if (addr_iter == NULL)
600 return NULL;
601
602 return netlbl_domhsh_addr6_entry(addr_iter);
603}
604#endif /* IPv6 */
605
606/**
392 * netlbl_domhsh_walk - Iterate through the domain mapping hash table 607 * netlbl_domhsh_walk - Iterate through the domain mapping hash table
393 * @skip_bkt: the number of buckets to skip at the start 608 * @skip_bkt: the number of buckets to skip at the start
394 * @skip_chain: the number of entries to skip in the first iterated bucket 609 * @skip_chain: the number of entries to skip in the first iterated bucket
@@ -410,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
410{ 625{
411 int ret_val = -ENOENT; 626 int ret_val = -ENOENT;
412 u32 iter_bkt; 627 u32 iter_bkt;
628 struct list_head *iter_list;
413 struct netlbl_dom_map *iter_entry; 629 struct netlbl_dom_map *iter_entry;
414 u32 chain_cnt = 0; 630 u32 chain_cnt = 0;
415 631
@@ -417,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
417 for (iter_bkt = *skip_bkt; 633 for (iter_bkt = *skip_bkt;
418 iter_bkt < rcu_dereference(netlbl_domhsh)->size; 634 iter_bkt < rcu_dereference(netlbl_domhsh)->size;
419 iter_bkt++, chain_cnt = 0) { 635 iter_bkt++, chain_cnt = 0) {
420 list_for_each_entry_rcu(iter_entry, 636 iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt];
421 &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], 637 list_for_each_entry_rcu(iter_entry, iter_list, list)
422 list)
423 if (iter_entry->valid) { 638 if (iter_entry->valid) {
424 if (chain_cnt++ < *skip_chain) 639 if (chain_cnt++ < *skip_chain)
425 continue; 640 continue;
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 8220990ceb96..bfcb6763a1a1 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -11,7 +11,7 @@
11 */ 11 */
12 12
13/* 13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
15 * 15 *
16 * This program is free software; you can redistribute it and/or modify 16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by 17 * it under the terms of the GNU General Public License as published by
@@ -36,16 +36,43 @@
36#include <linux/rcupdate.h> 36#include <linux/rcupdate.h>
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "netlabel_addrlist.h"
40
39/* Domain hash table size */ 41/* Domain hash table size */
40/* XXX - currently this number is an uneducated guess */ 42/* XXX - currently this number is an uneducated guess */
41#define NETLBL_DOMHSH_BITSIZE 7 43#define NETLBL_DOMHSH_BITSIZE 7
42 44
43/* Domain mapping definition struct */ 45/* Domain mapping definition structures */
46#define netlbl_domhsh_addr4_entry(iter) \
47 container_of(iter, struct netlbl_domaddr4_map, list)
48struct netlbl_domaddr4_map {
49 u32 type;
50 union {
51 struct cipso_v4_doi *cipsov4;
52 } type_def;
53
54 struct netlbl_af4list list;
55};
56#define netlbl_domhsh_addr6_entry(iter) \
57 container_of(iter, struct netlbl_domaddr6_map, list)
58struct netlbl_domaddr6_map {
59 u32 type;
60
61 /* NOTE: no 'type_def' union needed at present since we don't currently
62 * support any IPv6 labeling protocols */
63
64 struct netlbl_af6list list;
65};
66struct netlbl_domaddr_map {
67 struct list_head list4;
68 struct list_head list6;
69};
44struct netlbl_dom_map { 70struct netlbl_dom_map {
45 char *domain; 71 char *domain;
46 u32 type; 72 u32 type;
47 union { 73 union {
48 struct cipso_v4_doi *cipsov4; 74 struct cipso_v4_doi *cipsov4;
75 struct netlbl_domaddr_map *addrsel;
49 } type_def; 76 } type_def;
50 77
51 u32 valid; 78 u32 valid;
@@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
61 struct netlbl_audit *audit_info); 88 struct netlbl_audit *audit_info);
62int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, 89int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
63 struct netlbl_audit *audit_info); 90 struct netlbl_audit *audit_info);
91int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
92 struct netlbl_audit *audit_info);
64int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); 93int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
65int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); 94int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
66struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); 95struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
96struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
97 __be32 addr);
67int netlbl_domhsh_walk(u32 *skip_bkt, 98int netlbl_domhsh_walk(u32 *skip_bkt,
68 u32 *skip_chain, 99 u32 *skip_chain,
69 int (*callback) (struct netlbl_dom_map *entry, void *arg), 100 int (*callback) (struct netlbl_dom_map *entry, void *arg),
70 void *cb_arg); 101 void *cb_arg);
71 102
103#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
104struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
105 const struct in6_addr *addr);
106#endif /* IPv6 */
107
72#endif 108#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 39793a1a93aa..b32eceb3ab0d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12/* 12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
14 * 14 *
15 * This program is free software; you can redistribute it and/or modify 15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by 16 * it under the terms of the GNU General Public License as published by
@@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain,
82 82
83 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 83 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
84 if (entry == NULL) 84 if (entry == NULL)
85 goto cfg_unlbl_add_map_failure; 85 return -ENOMEM;
86 if (domain != NULL) { 86 if (domain != NULL) {
87 entry->domain = kstrdup(domain, GFP_ATOMIC); 87 entry->domain = kstrdup(domain, GFP_ATOMIC);
88 if (entry->domain == NULL) 88 if (entry->domain == NULL)
@@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure:
104} 104}
105 105
106/** 106/**
107 * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition
108 * @doi_def: the DOI definition
109 * @audit_info: NetLabel audit information
110 *
111 * Description:
112 * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on
113 * success, negative values on failure.
114 *
115 */
116int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def,
117 struct netlbl_audit *audit_info)
118{
119 int ret_val;
120 const char *type_str;
121 struct audit_buffer *audit_buf;
122
123 ret_val = cipso_v4_doi_add(doi_def);
124
125 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
126 audit_info);
127 if (audit_buf != NULL) {
128 switch (doi_def->type) {
129 case CIPSO_V4_MAP_STD:
130 type_str = "std";
131 break;
132 case CIPSO_V4_MAP_PASS:
133 type_str = "pass";
134 break;
135 default:
136 type_str = "(unknown)";
137 }
138 audit_log_format(audit_buf,
139 " cipso_doi=%u cipso_type=%s res=%u",
140 doi_def->doi,
141 type_str,
142 ret_val == 0 ? 1 : 0);
143 audit_log_end(audit_buf);
144 }
145
146 return ret_val;
147}
148
149/**
150 * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping 107 * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping
151 * @doi_def: the DOI definition 108 * @doi_def: the DOI definition
152 * @domain: the domain mapping to add 109 * @domain: the domain mapping to add
@@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
164 struct netlbl_audit *audit_info) 121 struct netlbl_audit *audit_info)
165{ 122{
166 int ret_val = -ENOMEM; 123 int ret_val = -ENOMEM;
124 u32 doi;
125 u32 doi_type;
167 struct netlbl_dom_map *entry; 126 struct netlbl_dom_map *entry;
127 const char *type_str;
128 struct audit_buffer *audit_buf;
129
130 doi = doi_def->doi;
131 doi_type = doi_def->type;
168 132
169 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 133 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
170 if (entry == NULL) 134 if (entry == NULL)
171 goto cfg_cipsov4_add_map_failure; 135 return -ENOMEM;
172 if (domain != NULL) { 136 if (domain != NULL) {
173 entry->domain = kstrdup(domain, GFP_ATOMIC); 137 entry->domain = kstrdup(domain, GFP_ATOMIC);
174 if (entry->domain == NULL) 138 if (entry->domain == NULL)
175 goto cfg_cipsov4_add_map_failure; 139 goto cfg_cipsov4_add_map_failure;
176 } 140 }
177 entry->type = NETLBL_NLTYPE_CIPSOV4;
178 entry->type_def.cipsov4 = doi_def;
179
180 /* Grab a RCU read lock here so nothing happens to the doi_def variable
181 * between adding it to the CIPSOv4 protocol engine and adding a
182 * domain mapping for it. */
183 141
184 rcu_read_lock(); 142 ret_val = cipso_v4_doi_add(doi_def);
185 ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info);
186 if (ret_val != 0) 143 if (ret_val != 0)
187 goto cfg_cipsov4_add_map_failure_unlock; 144 goto cfg_cipsov4_add_map_failure_remove_doi;
145 entry->type = NETLBL_NLTYPE_CIPSOV4;
146 entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi);
147 if (entry->type_def.cipsov4 == NULL) {
148 ret_val = -ENOENT;
149 goto cfg_cipsov4_add_map_failure_remove_doi;
150 }
188 ret_val = netlbl_domhsh_add(entry, audit_info); 151 ret_val = netlbl_domhsh_add(entry, audit_info);
189 if (ret_val != 0) 152 if (ret_val != 0)
190 goto cfg_cipsov4_add_map_failure_remove_doi; 153 goto cfg_cipsov4_add_map_failure_release_doi;
191 rcu_read_unlock();
192 154
193 return 0; 155cfg_cipsov4_add_map_return:
156 audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
157 audit_info);
158 if (audit_buf != NULL) {
159 switch (doi_type) {
160 case CIPSO_V4_MAP_TRANS:
161 type_str = "trans";
162 break;
163 case CIPSO_V4_MAP_PASS:
164 type_str = "pass";
165 break;
166 case CIPSO_V4_MAP_LOCAL:
167 type_str = "local";
168 break;
169 default:
170 type_str = "(unknown)";
171 }
172 audit_log_format(audit_buf,
173 " cipso_doi=%u cipso_type=%s res=%u",
174 doi, type_str, ret_val == 0 ? 1 : 0);
175 audit_log_end(audit_buf);
176 }
194 177
178 return ret_val;
179
180cfg_cipsov4_add_map_failure_release_doi:
181 cipso_v4_doi_putdef(doi_def);
195cfg_cipsov4_add_map_failure_remove_doi: 182cfg_cipsov4_add_map_failure_remove_doi:
196 cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); 183 cipso_v4_doi_remove(doi, audit_info);
197cfg_cipsov4_add_map_failure_unlock:
198 rcu_read_unlock();
199cfg_cipsov4_add_map_failure: 184cfg_cipsov4_add_map_failure:
200 if (entry != NULL) 185 if (entry != NULL)
201 kfree(entry->domain); 186 kfree(entry->domain);
202 kfree(entry); 187 kfree(entry);
203 return ret_val; 188 goto cfg_cipsov4_add_map_return;
204}
205
206/**
207 * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition
208 * @doi: the CIPSO DOI value
209 * @audit_info: NetLabel audit information
210 *
211 * Description:
212 * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem.
213 * Returns zero on success, negative values on failure.
214 *
215 */
216int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info)
217{
218 return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free);
219} 189}
220 190
221/* 191/*
@@ -452,7 +422,9 @@ int netlbl_enabled(void)
452 * Attach the correct label to the given socket using the security attributes 422 * Attach the correct label to the given socket using the security attributes
453 * specified in @secattr. This function requires exclusive access to @sk, 423 * specified in @secattr. This function requires exclusive access to @sk,
454 * which means it either needs to be in the process of being created or locked. 424 * which means it either needs to be in the process of being created or locked.
455 * Returns zero on success, negative values on failure. 425 * Returns zero on success, -EDESTADDRREQ if the domain is configured to use
426 * network address selectors (can't blindly label the socket), and negative
427 * values on all other failures.
456 * 428 *
457 */ 429 */
458int netlbl_sock_setattr(struct sock *sk, 430int netlbl_sock_setattr(struct sock *sk,
@@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk,
466 if (dom_entry == NULL) 438 if (dom_entry == NULL)
467 goto socket_setattr_return; 439 goto socket_setattr_return;
468 switch (dom_entry->type) { 440 switch (dom_entry->type) {
441 case NETLBL_NLTYPE_ADDRSELECT:
442 ret_val = -EDESTADDRREQ;
443 break;
469 case NETLBL_NLTYPE_CIPSOV4: 444 case NETLBL_NLTYPE_CIPSOV4:
470 ret_val = cipso_v4_sock_setattr(sk, 445 ret_val = cipso_v4_sock_setattr(sk,
471 dom_entry->type_def.cipsov4, 446 dom_entry->type_def.cipsov4,
@@ -484,6 +459,20 @@ socket_setattr_return:
484} 459}
485 460
486/** 461/**
462 * netlbl_sock_delattr - Delete all the NetLabel labels on a socket
463 * @sk: the socket
464 *
465 * Description:
466 * Remove all the NetLabel labeling from @sk. The caller is responsible for
467 * ensuring that @sk is locked.
468 *
469 */
470void netlbl_sock_delattr(struct sock *sk)
471{
472 cipso_v4_sock_delattr(sk);
473}
474
475/**
487 * netlbl_sock_getattr - Determine the security attributes of a sock 476 * netlbl_sock_getattr - Determine the security attributes of a sock
488 * @sk: the sock 477 * @sk: the sock
489 * @secattr: the security attributes 478 * @secattr: the security attributes
@@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
501} 490}
502 491
503/** 492/**
493 * netlbl_conn_setattr - Label a connected socket using the correct protocol
494 * @sk: the socket to label
495 * @addr: the destination address
496 * @secattr: the security attributes
497 *
498 * Description:
499 * Attach the correct label to the given connected socket using the security
500 * attributes specified in @secattr. The caller is responsible for ensuring
501 * that @sk is locked. Returns zero on success, negative values on failure.
502 *
503 */
504int netlbl_conn_setattr(struct sock *sk,
505 struct sockaddr *addr,
506 const struct netlbl_lsm_secattr *secattr)
507{
508 int ret_val;
509 struct sockaddr_in *addr4;
510 struct netlbl_domaddr4_map *af4_entry;
511
512 rcu_read_lock();
513 switch (addr->sa_family) {
514 case AF_INET:
515 addr4 = (struct sockaddr_in *)addr;
516 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
517 addr4->sin_addr.s_addr);
518 if (af4_entry == NULL) {
519 ret_val = -ENOENT;
520 goto conn_setattr_return;
521 }
522 switch (af4_entry->type) {
523 case NETLBL_NLTYPE_CIPSOV4:
524 ret_val = cipso_v4_sock_setattr(sk,
525 af4_entry->type_def.cipsov4,
526 secattr);
527 break;
528 case NETLBL_NLTYPE_UNLABELED:
529 /* just delete the protocols we support for right now
530 * but we could remove other protocols if needed */
531 cipso_v4_sock_delattr(sk);
532 ret_val = 0;
533 break;
534 default:
535 ret_val = -ENOENT;
536 }
537 break;
538#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
539 case AF_INET6:
540 /* since we don't support any IPv6 labeling protocols right
541 * now we can optimize everything away until we do */
542 ret_val = 0;
543 break;
544#endif /* IPv6 */
545 default:
546 ret_val = 0;
547 }
548
549conn_setattr_return:
550 rcu_read_unlock();
551 return ret_val;
552}
553
554/**
555 * netlbl_skbuff_setattr - Label a packet using the correct protocol
556 * @skb: the packet
557 * @family: protocol family
558 * @secattr: the security attributes
559 *
560 * Description:
561 * Attach the correct label to the given packet using the security attributes
562 * specified in @secattr. Returns zero on success, negative values on failure.
563 *
564 */
565int netlbl_skbuff_setattr(struct sk_buff *skb,
566 u16 family,
567 const struct netlbl_lsm_secattr *secattr)
568{
569 int ret_val;
570 struct iphdr *hdr4;
571 struct netlbl_domaddr4_map *af4_entry;
572
573 rcu_read_lock();
574 switch (family) {
575 case AF_INET:
576 hdr4 = ip_hdr(skb);
577 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
578 hdr4->daddr);
579 if (af4_entry == NULL) {
580 ret_val = -ENOENT;
581 goto skbuff_setattr_return;
582 }
583 switch (af4_entry->type) {
584 case NETLBL_NLTYPE_CIPSOV4:
585 ret_val = cipso_v4_skbuff_setattr(skb,
586 af4_entry->type_def.cipsov4,
587 secattr);
588 break;
589 case NETLBL_NLTYPE_UNLABELED:
590 /* just delete the protocols we support for right now
591 * but we could remove other protocols if needed */
592 ret_val = cipso_v4_skbuff_delattr(skb);
593 break;
594 default:
595 ret_val = -ENOENT;
596 }
597 break;
598#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
599 case AF_INET6:
600 /* since we don't support any IPv6 labeling protocols right
601 * now we can optimize everything away until we do */
602 ret_val = 0;
603 break;
604#endif /* IPv6 */
605 default:
606 ret_val = 0;
607 }
608
609skbuff_setattr_return:
610 rcu_read_unlock();
611 return ret_val;
612}
613
614/**
504 * netlbl_skbuff_getattr - Determine the security attributes of a packet 615 * netlbl_skbuff_getattr - Determine the security attributes of a packet
505 * @skb: the packet 616 * @skb: the packet
506 * @family: protocol family 617 * @family: protocol family
@@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
528 * netlbl_skbuff_err - Handle a LSM error on a sk_buff 639 * netlbl_skbuff_err - Handle a LSM error on a sk_buff
529 * @skb: the packet 640 * @skb: the packet
530 * @error: the error code 641 * @error: the error code
642 * @gateway: true if host is acting as a gateway, false otherwise
531 * 643 *
532 * Description: 644 * Description:
533 * Deal with a LSM problem when handling the packet in @skb, typically this is 645 * Deal with a LSM problem when handling the packet in @skb, typically this is
@@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
535 * according to the packet's labeling protocol. 647 * according to the packet's labeling protocol.
536 * 648 *
537 */ 649 */
538void netlbl_skbuff_err(struct sk_buff *skb, int error) 650void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
539{ 651{
540 if (CIPSO_V4_OPTEXIST(skb)) 652 if (CIPSO_V4_OPTEXIST(skb))
541 cipso_v4_error(skb, error, 0); 653 cipso_v4_error(skb, error, gateway);
542} 654}
543 655
544/** 656/**
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 44be5d5261f4..ee769ecaa13c 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12/* 12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
14 * 14 *
15 * This program is free software; you can redistribute it and/or modify 15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by 16 * it under the terms of the GNU General Public License as published by
@@ -32,9 +32,13 @@
32#include <linux/socket.h> 32#include <linux/socket.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/skbuff.h> 34#include <linux/skbuff.h>
35#include <linux/in.h>
36#include <linux/in6.h>
35#include <net/sock.h> 37#include <net/sock.h>
36#include <net/netlink.h> 38#include <net/netlink.h>
37#include <net/genetlink.h> 39#include <net/genetlink.h>
40#include <net/ip.h>
41#include <net/ipv6.h>
38#include <net/netlabel.h> 42#include <net/netlabel.h>
39#include <net/cipso_ipv4.h> 43#include <net/cipso_ipv4.h>
40#include <asm/atomic.h> 44#include <asm/atomic.h>
@@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
71}; 75};
72 76
73/* 77/*
74 * NetLabel Command Handlers 78 * Helper Functions
75 */ 79 */
76 80
77/** 81/**
78 * netlbl_mgmt_add - Handle an ADD message 82 * netlbl_mgmt_add - Handle an ADD message
79 * @skb: the NETLINK buffer
80 * @info: the Generic NETLINK info block 83 * @info: the Generic NETLINK info block
84 * @audit_info: NetLabel audit information
81 * 85 *
82 * Description: 86 * Description:
83 * Process a user generated ADD message and add the domains from the message 87 * Helper function for the ADD and ADDDEF messages to add the domain mappings
84 * to the hash table. See netlabel.h for a description of the message format. 88 * from the message to the hash table. See netlabel.h for a description of the
85 * Returns zero on success, negative values on failure. 89 * message format. Returns zero on success, negative values on failure.
86 * 90 *
87 */ 91 */
88static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) 92static int netlbl_mgmt_add_common(struct genl_info *info,
93 struct netlbl_audit *audit_info)
89{ 94{
90 int ret_val = -EINVAL; 95 int ret_val = -EINVAL;
91 struct netlbl_dom_map *entry = NULL; 96 struct netlbl_dom_map *entry = NULL;
92 size_t tmp_size; 97 struct netlbl_domaddr_map *addrmap = NULL;
98 struct cipso_v4_doi *cipsov4 = NULL;
93 u32 tmp_val; 99 u32 tmp_val;
94 struct netlbl_audit audit_info;
95
96 if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
97 !info->attrs[NLBL_MGMT_A_PROTOCOL])
98 goto add_failure;
99
100 netlbl_netlink_auditinfo(skb, &audit_info);
101 100
102 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 101 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
103 if (entry == NULL) { 102 if (entry == NULL) {
104 ret_val = -ENOMEM; 103 ret_val = -ENOMEM;
105 goto add_failure; 104 goto add_failure;
106 } 105 }
107 tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
108 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
109 if (entry->domain == NULL) {
110 ret_val = -ENOMEM;
111 goto add_failure;
112 }
113 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); 106 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
114 nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); 107 if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
108 size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
109 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
110 if (entry->domain == NULL) {
111 ret_val = -ENOMEM;
112 goto add_failure;
113 }
114 nla_strlcpy(entry->domain,
115 info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
116 }
117
118 /* NOTE: internally we allow/use a entry->type value of
119 * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
120 * to pass that as a protocol value because we need to know the
121 * "real" protocol */
115 122
116 switch (entry->type) { 123 switch (entry->type) {
117 case NETLBL_NLTYPE_UNLABELED: 124 case NETLBL_NLTYPE_UNLABELED:
118 ret_val = netlbl_domhsh_add(entry, &audit_info);
119 break; 125 break;
120 case NETLBL_NLTYPE_CIPSOV4: 126 case NETLBL_NLTYPE_CIPSOV4:
121 if (!info->attrs[NLBL_MGMT_A_CV4DOI]) 127 if (!info->attrs[NLBL_MGMT_A_CV4DOI])
122 goto add_failure; 128 goto add_failure;
123 129
124 tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); 130 tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
125 /* We should be holding a rcu_read_lock() here while we hold 131 cipsov4 = cipso_v4_doi_getdef(tmp_val);
126 * the result but since the entry will always be deleted when 132 if (cipsov4 == NULL)
127 * the CIPSO DOI is deleted we aren't going to keep the
128 * lock. */
129 rcu_read_lock();
130 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
131 if (entry->type_def.cipsov4 == NULL) {
132 rcu_read_unlock();
133 goto add_failure; 133 goto add_failure;
134 } 134 entry->type_def.cipsov4 = cipsov4;
135 ret_val = netlbl_domhsh_add(entry, &audit_info);
136 rcu_read_unlock();
137 break; 135 break;
138 default: 136 default:
139 goto add_failure; 137 goto add_failure;
140 } 138 }
139
140 if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) {
141 struct in_addr *addr;
142 struct in_addr *mask;
143 struct netlbl_domaddr4_map *map;
144
145 addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
146 if (addrmap == NULL) {
147 ret_val = -ENOMEM;
148 goto add_failure;
149 }
150 INIT_LIST_HEAD(&addrmap->list4);
151 INIT_LIST_HEAD(&addrmap->list6);
152
153 if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) !=
154 sizeof(struct in_addr)) {
155 ret_val = -EINVAL;
156 goto add_failure;
157 }
158 if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) !=
159 sizeof(struct in_addr)) {
160 ret_val = -EINVAL;
161 goto add_failure;
162 }
163 addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]);
164 mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]);
165
166 map = kzalloc(sizeof(*map), GFP_KERNEL);
167 if (map == NULL) {
168 ret_val = -ENOMEM;
169 goto add_failure;
170 }
171 map->list.addr = addr->s_addr & mask->s_addr;
172 map->list.mask = mask->s_addr;
173 map->list.valid = 1;
174 map->type = entry->type;
175 if (cipsov4)
176 map->type_def.cipsov4 = cipsov4;
177
178 ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
179 if (ret_val != 0) {
180 kfree(map);
181 goto add_failure;
182 }
183
184 entry->type = NETLBL_NLTYPE_ADDRSELECT;
185 entry->type_def.addrsel = addrmap;
186#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
187 } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
188 struct in6_addr *addr;
189 struct in6_addr *mask;
190 struct netlbl_domaddr6_map *map;
191
192 addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
193 if (addrmap == NULL) {
194 ret_val = -ENOMEM;
195 goto add_failure;
196 }
197 INIT_LIST_HEAD(&addrmap->list4);
198 INIT_LIST_HEAD(&addrmap->list6);
199
200 if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) !=
201 sizeof(struct in6_addr)) {
202 ret_val = -EINVAL;
203 goto add_failure;
204 }
205 if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) !=
206 sizeof(struct in6_addr)) {
207 ret_val = -EINVAL;
208 goto add_failure;
209 }
210 addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]);
211 mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]);
212
213 map = kzalloc(sizeof(*map), GFP_KERNEL);
214 if (map == NULL) {
215 ret_val = -ENOMEM;
216 goto add_failure;
217 }
218 ipv6_addr_copy(&map->list.addr, addr);
219 map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
220 map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
221 map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
222 map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
223 ipv6_addr_copy(&map->list.mask, mask);
224 map->list.valid = 1;
225 map->type = entry->type;
226
227 ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
228 if (ret_val != 0) {
229 kfree(map);
230 goto add_failure;
231 }
232
233 entry->type = NETLBL_NLTYPE_ADDRSELECT;
234 entry->type_def.addrsel = addrmap;
235#endif /* IPv6 */
236 }
237
238 ret_val = netlbl_domhsh_add(entry, audit_info);
141 if (ret_val != 0) 239 if (ret_val != 0)
142 goto add_failure; 240 goto add_failure;
143 241
144 return 0; 242 return 0;
145 243
146add_failure: 244add_failure:
245 if (cipsov4)
246 cipso_v4_doi_putdef(cipsov4);
147 if (entry) 247 if (entry)
148 kfree(entry->domain); 248 kfree(entry->domain);
249 kfree(addrmap);
149 kfree(entry); 250 kfree(entry);
150 return ret_val; 251 return ret_val;
151} 252}
152 253
153/** 254/**
255 * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry
256 * @skb: the NETLINK buffer
257 * @entry: the map entry
258 *
259 * Description:
260 * This function is a helper function used by the LISTALL and LISTDEF command
261 * handlers. The caller is responsibile for ensuring that the RCU read lock
262 * is held. Returns zero on success, negative values on failure.
263 *
264 */
265static int netlbl_mgmt_listentry(struct sk_buff *skb,
266 struct netlbl_dom_map *entry)
267{
268 int ret_val;
269 struct nlattr *nla_a;
270 struct nlattr *nla_b;
271 struct netlbl_af4list *iter4;
272#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
273 struct netlbl_af6list *iter6;
274#endif
275
276 if (entry->domain != NULL) {
277 ret_val = nla_put_string(skb,
278 NLBL_MGMT_A_DOMAIN, entry->domain);
279 if (ret_val != 0)
280 return ret_val;
281 }
282
283 switch (entry->type) {
284 case NETLBL_NLTYPE_ADDRSELECT:
285 nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
286 if (nla_a == NULL)
287 return -ENOMEM;
288
289 netlbl_af4list_foreach_rcu(iter4,
290 &entry->type_def.addrsel->list4) {
291 struct netlbl_domaddr4_map *map4;
292 struct in_addr addr_struct;
293
294 nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
295 if (nla_b == NULL)
296 return -ENOMEM;
297
298 addr_struct.s_addr = iter4->addr;
299 ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR,
300 sizeof(struct in_addr),
301 &addr_struct);
302 if (ret_val != 0)
303 return ret_val;
304 addr_struct.s_addr = iter4->mask;
305 ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK,
306 sizeof(struct in_addr),
307 &addr_struct);
308 if (ret_val != 0)
309 return ret_val;
310 map4 = netlbl_domhsh_addr4_entry(iter4);
311 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
312 map4->type);
313 if (ret_val != 0)
314 return ret_val;
315 switch (map4->type) {
316 case NETLBL_NLTYPE_CIPSOV4:
317 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
318 map4->type_def.cipsov4->doi);
319 if (ret_val != 0)
320 return ret_val;
321 break;
322 }
323
324 nla_nest_end(skb, nla_b);
325 }
326#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
327 netlbl_af6list_foreach_rcu(iter6,
328 &entry->type_def.addrsel->list6) {
329 struct netlbl_domaddr6_map *map6;
330
331 nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
332 if (nla_b == NULL)
333 return -ENOMEM;
334
335 ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR,
336 sizeof(struct in6_addr),
337 &iter6->addr);
338 if (ret_val != 0)
339 return ret_val;
340 ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK,
341 sizeof(struct in6_addr),
342 &iter6->mask);
343 if (ret_val != 0)
344 return ret_val;
345 map6 = netlbl_domhsh_addr6_entry(iter6);
346 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
347 map6->type);
348 if (ret_val != 0)
349 return ret_val;
350
351 nla_nest_end(skb, nla_b);
352 }
353#endif /* IPv6 */
354
355 nla_nest_end(skb, nla_a);
356 break;
357 case NETLBL_NLTYPE_UNLABELED:
358 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
359 break;
360 case NETLBL_NLTYPE_CIPSOV4:
361 ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
362 if (ret_val != 0)
363 return ret_val;
364 ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
365 entry->type_def.cipsov4->doi);
366 break;
367 }
368
369 return ret_val;
370}
371
372/*
373 * NetLabel Command Handlers
374 */
375
376/**
377 * netlbl_mgmt_add - Handle an ADD message
378 * @skb: the NETLINK buffer
379 * @info: the Generic NETLINK info block
380 *
381 * Description:
382 * Process a user generated ADD message and add the domains from the message
383 * to the hash table. See netlabel.h for a description of the message format.
384 * Returns zero on success, negative values on failure.
385 *
386 */
387static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
388{
389 struct netlbl_audit audit_info;
390
391 if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) ||
392 (!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
393 (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
394 info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
395 (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
396 info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
397 ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
398 (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
399 ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
400 (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
401 return -EINVAL;
402
403 netlbl_netlink_auditinfo(skb, &audit_info);
404
405 return netlbl_mgmt_add_common(info, &audit_info);
406}
407
408/**
154 * netlbl_mgmt_remove - Handle a REMOVE message 409 * netlbl_mgmt_remove - Handle a REMOVE message
155 * @skb: the NETLINK buffer 410 * @skb: the NETLINK buffer
156 * @info: the Generic NETLINK info block 411 * @info: the Generic NETLINK info block
@@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
198 if (data == NULL) 453 if (data == NULL)
199 goto listall_cb_failure; 454 goto listall_cb_failure;
200 455
201 ret_val = nla_put_string(cb_arg->skb, 456 ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry);
202 NLBL_MGMT_A_DOMAIN,
203 entry->domain);
204 if (ret_val != 0) 457 if (ret_val != 0)
205 goto listall_cb_failure; 458 goto listall_cb_failure;
206 ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
207 if (ret_val != 0)
208 goto listall_cb_failure;
209 switch (entry->type) {
210 case NETLBL_NLTYPE_CIPSOV4:
211 ret_val = nla_put_u32(cb_arg->skb,
212 NLBL_MGMT_A_CV4DOI,
213 entry->type_def.cipsov4->doi);
214 if (ret_val != 0)
215 goto listall_cb_failure;
216 break;
217 }
218 459
219 cb_arg->seq++; 460 cb_arg->seq++;
220 return genlmsg_end(cb_arg->skb, data); 461 return genlmsg_end(cb_arg->skb, data);
@@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb,
268 */ 509 */
269static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) 510static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
270{ 511{
271 int ret_val = -EINVAL;
272 struct netlbl_dom_map *entry = NULL;
273 u32 tmp_val;
274 struct netlbl_audit audit_info; 512 struct netlbl_audit audit_info;
275 513
276 if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) 514 if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) ||
277 goto adddef_failure; 515 (info->attrs[NLBL_MGMT_A_IPV4ADDR] &&
516 info->attrs[NLBL_MGMT_A_IPV6ADDR]) ||
517 (info->attrs[NLBL_MGMT_A_IPV4MASK] &&
518 info->attrs[NLBL_MGMT_A_IPV6MASK]) ||
519 ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^
520 (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) ||
521 ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^
522 (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
523 return -EINVAL;
278 524
279 netlbl_netlink_auditinfo(skb, &audit_info); 525 netlbl_netlink_auditinfo(skb, &audit_info);
280 526
281 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 527 return netlbl_mgmt_add_common(info, &audit_info);
282 if (entry == NULL) {
283 ret_val = -ENOMEM;
284 goto adddef_failure;
285 }
286 entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
287
288 switch (entry->type) {
289 case NETLBL_NLTYPE_UNLABELED:
290 ret_val = netlbl_domhsh_add_default(entry, &audit_info);
291 break;
292 case NETLBL_NLTYPE_CIPSOV4:
293 if (!info->attrs[NLBL_MGMT_A_CV4DOI])
294 goto adddef_failure;
295
296 tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
297 /* We should be holding a rcu_read_lock() here while we hold
298 * the result but since the entry will always be deleted when
299 * the CIPSO DOI is deleted we aren't going to keep the
300 * lock. */
301 rcu_read_lock();
302 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
303 if (entry->type_def.cipsov4 == NULL) {
304 rcu_read_unlock();
305 goto adddef_failure;
306 }
307 ret_val = netlbl_domhsh_add_default(entry, &audit_info);
308 rcu_read_unlock();
309 break;
310 default:
311 goto adddef_failure;
312 }
313 if (ret_val != 0)
314 goto adddef_failure;
315
316 return 0;
317
318adddef_failure:
319 kfree(entry);
320 return ret_val;
321} 528}
322 529
323/** 530/**
@@ -371,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
371 ret_val = -ENOENT; 578 ret_val = -ENOENT;
372 goto listdef_failure_lock; 579 goto listdef_failure_lock;
373 } 580 }
374 ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); 581 ret_val = netlbl_mgmt_listentry(ans_skb, entry);
375 if (ret_val != 0)
376 goto listdef_failure_lock;
377 switch (entry->type) {
378 case NETLBL_NLTYPE_CIPSOV4:
379 ret_val = nla_put_u32(ans_skb,
380 NLBL_MGMT_A_CV4DOI,
381 entry->type_def.cipsov4->doi);
382 if (ret_val != 0)
383 goto listdef_failure_lock;
384 break;
385 }
386 rcu_read_unlock(); 582 rcu_read_unlock();
583 if (ret_val != 0)
584 goto listdef_failure;
387 585
388 genlmsg_end(ans_skb, data); 586 genlmsg_end(ans_skb, data);
389 return genlmsg_reply(ans_skb, info); 587 return genlmsg_reply(ans_skb, info);
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index a43bff169d6b..05d96431f819 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -45,6 +45,16 @@
45 * NLBL_MGMT_A_DOMAIN 45 * NLBL_MGMT_A_DOMAIN
46 * NLBL_MGMT_A_PROTOCOL 46 * NLBL_MGMT_A_PROTOCOL
47 * 47 *
48 * If IPv4 is specified the following attributes are required:
49 *
50 * NLBL_MGMT_A_IPV4ADDR
51 * NLBL_MGMT_A_IPV4MASK
52 *
53 * If IPv6 is specified the following attributes are required:
54 *
55 * NLBL_MGMT_A_IPV6ADDR
56 * NLBL_MGMT_A_IPV6MASK
57 *
48 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: 58 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
49 * 59 *
50 * NLBL_MGMT_A_CV4DOI 60 * NLBL_MGMT_A_CV4DOI
@@ -68,13 +78,24 @@
68 * Required attributes: 78 * Required attributes:
69 * 79 *
70 * NLBL_MGMT_A_DOMAIN 80 * NLBL_MGMT_A_DOMAIN
81 *
82 * If the IP address selectors are not used the following attribute is
83 * required:
84 *
71 * NLBL_MGMT_A_PROTOCOL 85 * NLBL_MGMT_A_PROTOCOL
72 * 86 *
73 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: 87 * If the IP address selectors are used then the following attritbute is
88 * required:
89 *
90 * NLBL_MGMT_A_SELECTORLIST
91 *
92 * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
93 * attributes are required:
74 * 94 *
75 * NLBL_MGMT_A_CV4DOI 95 * NLBL_MGMT_A_CV4DOI
76 * 96 *
77 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. 97 * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
98 * attributes are required.
78 * 99 *
79 * o ADDDEF: 100 * o ADDDEF:
80 * Sent by an application to set the default domain mapping for the NetLabel 101 * Sent by an application to set the default domain mapping for the NetLabel
@@ -100,15 +121,23 @@
100 * application there is no payload. On success the kernel should send a 121 * application there is no payload. On success the kernel should send a
101 * response using the following format. 122 * response using the following format.
102 * 123 *
103 * Required attributes: 124 * If the IP address selectors are not used the following attribute is
125 * required:
104 * 126 *
105 * NLBL_MGMT_A_PROTOCOL 127 * NLBL_MGMT_A_PROTOCOL
106 * 128 *
107 * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: 129 * If the IP address selectors are used then the following attritbute is
130 * required:
131 *
132 * NLBL_MGMT_A_SELECTORLIST
133 *
134 * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following
135 * attributes are required:
108 * 136 *
109 * NLBL_MGMT_A_CV4DOI 137 * NLBL_MGMT_A_CV4DOI
110 * 138 *
111 * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. 139 * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other
140 * attributes are required.
112 * 141 *
113 * o PROTOCOLS: 142 * o PROTOCOLS:
114 * Sent by an application to request a list of configured NetLabel protocols 143 * Sent by an application to request a list of configured NetLabel protocols
@@ -162,6 +191,26 @@ enum {
162 NLBL_MGMT_A_CV4DOI, 191 NLBL_MGMT_A_CV4DOI,
163 /* (NLA_U32) 192 /* (NLA_U32)
164 * the CIPSOv4 DOI value */ 193 * the CIPSOv4 DOI value */
194 NLBL_MGMT_A_IPV6ADDR,
195 /* (NLA_BINARY, struct in6_addr)
196 * an IPv6 address */
197 NLBL_MGMT_A_IPV6MASK,
198 /* (NLA_BINARY, struct in6_addr)
199 * an IPv6 address mask */
200 NLBL_MGMT_A_IPV4ADDR,
201 /* (NLA_BINARY, struct in_addr)
202 * an IPv4 address */
203 NLBL_MGMT_A_IPV4MASK,
204 /* (NLA_BINARY, struct in_addr)
205 * and IPv4 address mask */
206 NLBL_MGMT_A_ADDRSELECTOR,
207 /* (NLA_NESTED)
208 * an IP address selector, must contain an address, mask, and protocol
209 * attribute plus any protocol specific attributes */
210 NLBL_MGMT_A_SELECTORLIST,
211 /* (NLA_NESTED)
212 * the selector list, there must be at least one
213 * NLBL_MGMT_A_ADDRSELECTOR attribute */
165 __NLBL_MGMT_A_MAX, 214 __NLBL_MGMT_A_MAX,
166}; 215};
167#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) 216#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 921c118ead89..e8a5c32b0f10 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12/* 12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007 13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
14 * 14 *
15 * This program is free software; you can redistribute it and/or modify 15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by 16 * it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@
54#include <asm/atomic.h> 54#include <asm/atomic.h>
55 55
56#include "netlabel_user.h" 56#include "netlabel_user.h"
57#include "netlabel_addrlist.h"
57#include "netlabel_domainhash.h" 58#include "netlabel_domainhash.h"
58#include "netlabel_unlabeled.h" 59#include "netlabel_unlabeled.h"
59#include "netlabel_mgmt.h" 60#include "netlabel_mgmt.h"
@@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl {
76 struct list_head *tbl; 77 struct list_head *tbl;
77 u32 size; 78 u32 size;
78}; 79};
80#define netlbl_unlhsh_addr4_entry(iter) \
81 container_of(iter, struct netlbl_unlhsh_addr4, list)
79struct netlbl_unlhsh_addr4 { 82struct netlbl_unlhsh_addr4 {
80 __be32 addr;
81 __be32 mask;
82 u32 secid; 83 u32 secid;
83 84
84 u32 valid; 85 struct netlbl_af4list list;
85 struct list_head list;
86 struct rcu_head rcu; 86 struct rcu_head rcu;
87}; 87};
88#define netlbl_unlhsh_addr6_entry(iter) \
89 container_of(iter, struct netlbl_unlhsh_addr6, list)
88struct netlbl_unlhsh_addr6 { 90struct netlbl_unlhsh_addr6 {
89 struct in6_addr addr;
90 struct in6_addr mask;
91 u32 secid; 91 u32 secid;
92 92
93 u32 valid; 93 struct netlbl_af6list list;
94 struct list_head list;
95 struct rcu_head rcu; 94 struct rcu_head rcu;
96}; 95};
97struct netlbl_unlhsh_iface { 96struct netlbl_unlhsh_iface {
@@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
147}; 146};
148 147
149/* 148/*
150 * Audit Helper Functions
151 */
152
153/**
154 * netlbl_unlabel_audit_addr4 - Audit an IPv4 address
155 * @audit_buf: audit buffer
156 * @dev: network interface
157 * @addr: IP address
158 * @mask: IP address mask
159 *
160 * Description:
161 * Write the IPv4 address and address mask, if necessary, to @audit_buf.
162 *
163 */
164static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf,
165 const char *dev,
166 __be32 addr, __be32 mask)
167{
168 u32 mask_val = ntohl(mask);
169
170 if (dev != NULL)
171 audit_log_format(audit_buf, " netif=%s", dev);
172 audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr));
173 if (mask_val != 0xffffffff) {
174 u32 mask_len = 0;
175 while (mask_val > 0) {
176 mask_val <<= 1;
177 mask_len++;
178 }
179 audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
180 }
181}
182
183#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
184/**
185 * netlbl_unlabel_audit_addr6 - Audit an IPv6 address
186 * @audit_buf: audit buffer
187 * @dev: network interface
188 * @addr: IP address
189 * @mask: IP address mask
190 *
191 * Description:
192 * Write the IPv6 address and address mask, if necessary, to @audit_buf.
193 *
194 */
195static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf,
196 const char *dev,
197 const struct in6_addr *addr,
198 const struct in6_addr *mask)
199{
200 if (dev != NULL)
201 audit_log_format(audit_buf, " netif=%s", dev);
202 audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr));
203 if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
204 u32 mask_len = 0;
205 u32 mask_val;
206 int iter = -1;
207 while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff)
208 mask_len += 32;
209 mask_val = ntohl(mask->s6_addr32[iter]);
210 while (mask_val > 0) {
211 mask_val <<= 1;
212 mask_len++;
213 }
214 audit_log_format(audit_buf, " src_prefixlen=%d", mask_len);
215 }
216}
217#endif /* IPv6 */
218
219/*
220 * Unlabeled Connection Hash Table Functions 149 * Unlabeled Connection Hash Table Functions
221 */ 150 */
222 151
@@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
274static void netlbl_unlhsh_free_iface(struct rcu_head *entry) 203static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
275{ 204{
276 struct netlbl_unlhsh_iface *iface; 205 struct netlbl_unlhsh_iface *iface;
277 struct netlbl_unlhsh_addr4 *iter4; 206 struct netlbl_af4list *iter4;
278 struct netlbl_unlhsh_addr4 *tmp4; 207 struct netlbl_af4list *tmp4;
279 struct netlbl_unlhsh_addr6 *iter6; 208#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
280 struct netlbl_unlhsh_addr6 *tmp6; 209 struct netlbl_af6list *iter6;
210 struct netlbl_af6list *tmp6;
211#endif /* IPv6 */
281 212
282 iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); 213 iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);
283 214
284 /* no need for locks here since we are the only one with access to this 215 /* no need for locks here since we are the only one with access to this
285 * structure */ 216 * structure */
286 217
287 list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list) 218 netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) {
288 if (iter4->valid) { 219 netlbl_af4list_remove_entry(iter4);
289 list_del_rcu(&iter4->list); 220 kfree(netlbl_unlhsh_addr4_entry(iter4));
290 kfree(iter4); 221 }
291 } 222#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
292 list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list) 223 netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
293 if (iter6->valid) { 224 netlbl_af6list_remove_entry(iter6);
294 list_del_rcu(&iter6->list); 225 kfree(netlbl_unlhsh_addr6_entry(iter6));
295 kfree(iter6); 226 }
296 } 227#endif /* IPv6 */
297 kfree(iface); 228 kfree(iface);
298} 229}
299 230
@@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex)
316} 247}
317 248
318/** 249/**
319 * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry
320 * @addr: IPv4 address
321 * @iface: the network interface entry
322 *
323 * Description:
324 * Searches the IPv4 address list of the network interface specified by @iface.
325 * If a matching address entry is found it is returned, otherwise NULL is
326 * returned. The caller is responsible for calling the rcu_read_[un]lock()
327 * functions.
328 *
329 */
330static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4(
331 __be32 addr,
332 const struct netlbl_unlhsh_iface *iface)
333{
334 struct netlbl_unlhsh_addr4 *iter;
335
336 list_for_each_entry_rcu(iter, &iface->addr4_list, list)
337 if (iter->valid && (addr & iter->mask) == iter->addr)
338 return iter;
339
340 return NULL;
341}
342
343#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
344/**
345 * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry
346 * @addr: IPv6 address
347 * @iface: the network interface entry
348 *
349 * Description:
350 * Searches the IPv6 address list of the network interface specified by @iface.
351 * If a matching address entry is found it is returned, otherwise NULL is
352 * returned. The caller is responsible for calling the rcu_read_[un]lock()
353 * functions.
354 *
355 */
356static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
357 const struct in6_addr *addr,
358 const struct netlbl_unlhsh_iface *iface)
359{
360 struct netlbl_unlhsh_addr6 *iter;
361
362 list_for_each_entry_rcu(iter, &iface->addr6_list, list)
363 if (iter->valid &&
364 ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0)
365 return iter;
366
367 return NULL;
368}
369#endif /* IPv6 */
370
371/**
372 * netlbl_unlhsh_search_iface - Search for a matching interface entry 250 * netlbl_unlhsh_search_iface - Search for a matching interface entry
373 * @ifindex: the network interface 251 * @ifindex: the network interface
374 * 252 *
@@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6(
381static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) 259static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
382{ 260{
383 u32 bkt; 261 u32 bkt;
262 struct list_head *bkt_list;
384 struct netlbl_unlhsh_iface *iter; 263 struct netlbl_unlhsh_iface *iter;
385 264
386 bkt = netlbl_unlhsh_hash(ifindex); 265 bkt = netlbl_unlhsh_hash(ifindex);
387 list_for_each_entry_rcu(iter, 266 bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt];
388 &rcu_dereference(netlbl_unlhsh)->tbl[bkt], 267 list_for_each_entry_rcu(iter, bkt_list, list)
389 list)
390 if (iter->valid && iter->ifindex == ifindex) 268 if (iter->valid && iter->ifindex == ifindex)
391 return iter; 269 return iter;
392 270
@@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
439 const struct in_addr *mask, 317 const struct in_addr *mask,
440 u32 secid) 318 u32 secid)
441{ 319{
320 int ret_val;
442 struct netlbl_unlhsh_addr4 *entry; 321 struct netlbl_unlhsh_addr4 *entry;
443 struct netlbl_unlhsh_addr4 *iter;
444 322
445 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 323 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
446 if (entry == NULL) 324 if (entry == NULL)
447 return -ENOMEM; 325 return -ENOMEM;
448 326
449 entry->addr = addr->s_addr & mask->s_addr; 327 entry->list.addr = addr->s_addr & mask->s_addr;
450 entry->mask = mask->s_addr; 328 entry->list.mask = mask->s_addr;
451 entry->secid = secid; 329 entry->list.valid = 1;
452 entry->valid = 1;
453 INIT_RCU_HEAD(&entry->rcu); 330 INIT_RCU_HEAD(&entry->rcu);
331 entry->secid = secid;
454 332
455 spin_lock(&netlbl_unlhsh_lock); 333 spin_lock(&netlbl_unlhsh_lock);
456 iter = netlbl_unlhsh_search_addr4(entry->addr, iface); 334 ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list);
457 if (iter != NULL &&
458 iter->addr == addr->s_addr && iter->mask == mask->s_addr) {
459 spin_unlock(&netlbl_unlhsh_lock);
460 kfree(entry);
461 return -EEXIST;
462 }
463 /* in order to speed up address searches through the list (the common
464 * case) we need to keep the list in order based on the size of the
465 * address mask such that the entry with the widest mask (smallest
466 * numerical value) appears first in the list */
467 list_for_each_entry_rcu(iter, &iface->addr4_list, list)
468 if (iter->valid &&
469 ntohl(entry->mask) > ntohl(iter->mask)) {
470 __list_add_rcu(&entry->list,
471 iter->list.prev,
472 &iter->list);
473 spin_unlock(&netlbl_unlhsh_lock);
474 return 0;
475 }
476 list_add_tail_rcu(&entry->list, &iface->addr4_list);
477 spin_unlock(&netlbl_unlhsh_lock); 335 spin_unlock(&netlbl_unlhsh_lock);
478 return 0; 336
337 if (ret_val != 0)
338 kfree(entry);
339 return ret_val;
479} 340}
480 341
481#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 342#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
498 const struct in6_addr *mask, 359 const struct in6_addr *mask,
499 u32 secid) 360 u32 secid)
500{ 361{
362 int ret_val;
501 struct netlbl_unlhsh_addr6 *entry; 363 struct netlbl_unlhsh_addr6 *entry;
502 struct netlbl_unlhsh_addr6 *iter;
503 364
504 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 365 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
505 if (entry == NULL) 366 if (entry == NULL)
506 return -ENOMEM; 367 return -ENOMEM;
507 368
508 ipv6_addr_copy(&entry->addr, addr); 369 ipv6_addr_copy(&entry->list.addr, addr);
509 entry->addr.s6_addr32[0] &= mask->s6_addr32[0]; 370 entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
510 entry->addr.s6_addr32[1] &= mask->s6_addr32[1]; 371 entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
511 entry->addr.s6_addr32[2] &= mask->s6_addr32[2]; 372 entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
512 entry->addr.s6_addr32[3] &= mask->s6_addr32[3]; 373 entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
513 ipv6_addr_copy(&entry->mask, mask); 374 ipv6_addr_copy(&entry->list.mask, mask);
514 entry->secid = secid; 375 entry->list.valid = 1;
515 entry->valid = 1;
516 INIT_RCU_HEAD(&entry->rcu); 376 INIT_RCU_HEAD(&entry->rcu);
377 entry->secid = secid;
517 378
518 spin_lock(&netlbl_unlhsh_lock); 379 spin_lock(&netlbl_unlhsh_lock);
519 iter = netlbl_unlhsh_search_addr6(&entry->addr, iface); 380 ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list);
520 if (iter != NULL &&
521 (ipv6_addr_equal(&iter->addr, addr) &&
522 ipv6_addr_equal(&iter->mask, mask))) {
523 spin_unlock(&netlbl_unlhsh_lock);
524 kfree(entry);
525 return -EEXIST;
526 }
527 /* in order to speed up address searches through the list (the common
528 * case) we need to keep the list in order based on the size of the
529 * address mask such that the entry with the widest mask (smallest
530 * numerical value) appears first in the list */
531 list_for_each_entry_rcu(iter, &iface->addr6_list, list)
532 if (iter->valid &&
533 ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) {
534 __list_add_rcu(&entry->list,
535 iter->list.prev,
536 &iter->list);
537 spin_unlock(&netlbl_unlhsh_lock);
538 return 0;
539 }
540 list_add_tail_rcu(&entry->list, &iface->addr6_list);
541 spin_unlock(&netlbl_unlhsh_lock); 381 spin_unlock(&netlbl_unlhsh_lock);
382
383 if (ret_val != 0)
384 kfree(entry);
542 return 0; 385 return 0;
543} 386}
544#endif /* IPv6 */ 387#endif /* IPv6 */
@@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net,
658 mask4 = (struct in_addr *)mask; 501 mask4 = (struct in_addr *)mask;
659 ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); 502 ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
660 if (audit_buf != NULL) 503 if (audit_buf != NULL)
661 netlbl_unlabel_audit_addr4(audit_buf, 504 netlbl_af4list_audit_addr(audit_buf, 1,
662 dev_name, 505 dev_name,
663 addr4->s_addr, 506 addr4->s_addr,
664 mask4->s_addr); 507 mask4->s_addr);
665 break; 508 break;
666 } 509 }
667#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 510#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net,
672 mask6 = (struct in6_addr *)mask; 515 mask6 = (struct in6_addr *)mask;
673 ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); 516 ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
674 if (audit_buf != NULL) 517 if (audit_buf != NULL)
675 netlbl_unlabel_audit_addr6(audit_buf, 518 netlbl_af6list_audit_addr(audit_buf, 1,
676 dev_name, 519 dev_name,
677 addr6, mask6); 520 addr6, mask6);
678 break; 521 break;
679 } 522 }
680#endif /* IPv6 */ 523#endif /* IPv6 */
@@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
719 const struct in_addr *mask, 562 const struct in_addr *mask,
720 struct netlbl_audit *audit_info) 563 struct netlbl_audit *audit_info)
721{ 564{
722 int ret_val = -ENOENT; 565 int ret_val = 0;
566 struct netlbl_af4list *list_entry;
723 struct netlbl_unlhsh_addr4 *entry; 567 struct netlbl_unlhsh_addr4 *entry;
724 struct audit_buffer *audit_buf = NULL; 568 struct audit_buffer *audit_buf;
725 struct net_device *dev; 569 struct net_device *dev;
726 char *secctx = NULL; 570 char *secctx;
727 u32 secctx_len; 571 u32 secctx_len;
728 572
729 spin_lock(&netlbl_unlhsh_lock); 573 spin_lock(&netlbl_unlhsh_lock);
730 entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface); 574 list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
731 if (entry != NULL && 575 &iface->addr4_list);
732 entry->addr == addr->s_addr && entry->mask == mask->s_addr) {
733 entry->valid = 0;
734 list_del_rcu(&entry->list);
735 ret_val = 0;
736 }
737 spin_unlock(&netlbl_unlhsh_lock); 576 spin_unlock(&netlbl_unlhsh_lock);
577 if (list_entry == NULL)
578 ret_val = -ENOENT;
579 entry = netlbl_unlhsh_addr4_entry(list_entry);
738 580
739 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, 581 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
740 audit_info); 582 audit_info);
741 if (audit_buf != NULL) { 583 if (audit_buf != NULL) {
742 dev = dev_get_by_index(net, iface->ifindex); 584 dev = dev_get_by_index(net, iface->ifindex);
743 netlbl_unlabel_audit_addr4(audit_buf, 585 netlbl_af4list_audit_addr(audit_buf, 1,
744 (dev != NULL ? dev->name : NULL), 586 (dev != NULL ? dev->name : NULL),
745 entry->addr, entry->mask); 587 addr->s_addr, mask->s_addr);
746 if (dev != NULL) 588 if (dev != NULL)
747 dev_put(dev); 589 dev_put(dev);
748 if (security_secid_to_secctx(entry->secid, 590 if (entry && security_secid_to_secctx(entry->secid,
749 &secctx, 591 &secctx,
750 &secctx_len) == 0) { 592 &secctx_len) == 0) {
751 audit_log_format(audit_buf, " sec_obj=%s", secctx); 593 audit_log_format(audit_buf, " sec_obj=%s", secctx);
752 security_release_secctx(secctx, secctx_len); 594 security_release_secctx(secctx, secctx_len);
753 } 595 }
@@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
781 const struct in6_addr *mask, 623 const struct in6_addr *mask,
782 struct netlbl_audit *audit_info) 624 struct netlbl_audit *audit_info)
783{ 625{
784 int ret_val = -ENOENT; 626 int ret_val = 0;
627 struct netlbl_af6list *list_entry;
785 struct netlbl_unlhsh_addr6 *entry; 628 struct netlbl_unlhsh_addr6 *entry;
786 struct audit_buffer *audit_buf = NULL; 629 struct audit_buffer *audit_buf;
787 struct net_device *dev; 630 struct net_device *dev;
788 char *secctx = NULL; 631 char *secctx;
789 u32 secctx_len; 632 u32 secctx_len;
790 633
791 spin_lock(&netlbl_unlhsh_lock); 634 spin_lock(&netlbl_unlhsh_lock);
792 entry = netlbl_unlhsh_search_addr6(addr, iface); 635 list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list);
793 if (entry != NULL &&
794 (ipv6_addr_equal(&entry->addr, addr) &&
795 ipv6_addr_equal(&entry->mask, mask))) {
796 entry->valid = 0;
797 list_del_rcu(&entry->list);
798 ret_val = 0;
799 }
800 spin_unlock(&netlbl_unlhsh_lock); 636 spin_unlock(&netlbl_unlhsh_lock);
637 if (list_entry == NULL)
638 ret_val = -ENOENT;
639 entry = netlbl_unlhsh_addr6_entry(list_entry);
801 640
802 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, 641 audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
803 audit_info); 642 audit_info);
804 if (audit_buf != NULL) { 643 if (audit_buf != NULL) {
805 dev = dev_get_by_index(net, iface->ifindex); 644 dev = dev_get_by_index(net, iface->ifindex);
806 netlbl_unlabel_audit_addr6(audit_buf, 645 netlbl_af6list_audit_addr(audit_buf, 1,
807 (dev != NULL ? dev->name : NULL), 646 (dev != NULL ? dev->name : NULL),
808 addr, mask); 647 addr, mask);
809 if (dev != NULL) 648 if (dev != NULL)
810 dev_put(dev); 649 dev_put(dev);
811 if (security_secid_to_secctx(entry->secid, 650 if (entry && security_secid_to_secctx(entry->secid,
812 &secctx, 651 &secctx,
813 &secctx_len) == 0) { 652 &secctx_len) == 0) {
814 audit_log_format(audit_buf, " sec_obj=%s", secctx); 653 audit_log_format(audit_buf, " sec_obj=%s", secctx);
815 security_release_secctx(secctx, secctx_len); 654 security_release_secctx(secctx, secctx_len);
816 } 655 }
@@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
836 */ 675 */
837static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) 676static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
838{ 677{
839 struct netlbl_unlhsh_addr4 *iter4; 678 struct netlbl_af4list *iter4;
840 struct netlbl_unlhsh_addr6 *iter6; 679#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
680 struct netlbl_af6list *iter6;
681#endif /* IPv6 */
841 682
842 spin_lock(&netlbl_unlhsh_lock); 683 spin_lock(&netlbl_unlhsh_lock);
843 list_for_each_entry_rcu(iter4, &iface->addr4_list, list) 684 netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
844 if (iter4->valid) 685 goto unlhsh_condremove_failure;
845 goto unlhsh_condremove_failure; 686#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
846 list_for_each_entry_rcu(iter6, &iface->addr6_list, list) 687 netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
847 if (iter6->valid) 688 goto unlhsh_condremove_failure;
848 goto unlhsh_condremove_failure; 689#endif /* IPv6 */
849 iface->valid = 0; 690 iface->valid = 0;
850 if (iface->ifindex > 0) 691 if (iface->ifindex > 0)
851 list_del_rcu(&iface->list); 692 list_del_rcu(&iface->list);
@@ -1349,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
1349 if (addr4) { 1190 if (addr4) {
1350 struct in_addr addr_struct; 1191 struct in_addr addr_struct;
1351 1192
1352 addr_struct.s_addr = addr4->addr; 1193 addr_struct.s_addr = addr4->list.addr;
1353 ret_val = nla_put(cb_arg->skb, 1194 ret_val = nla_put(cb_arg->skb,
1354 NLBL_UNLABEL_A_IPV4ADDR, 1195 NLBL_UNLABEL_A_IPV4ADDR,
1355 sizeof(struct in_addr), 1196 sizeof(struct in_addr),
@@ -1357,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
1357 if (ret_val != 0) 1198 if (ret_val != 0)
1358 goto list_cb_failure; 1199 goto list_cb_failure;
1359 1200
1360 addr_struct.s_addr = addr4->mask; 1201 addr_struct.s_addr = addr4->list.mask;
1361 ret_val = nla_put(cb_arg->skb, 1202 ret_val = nla_put(cb_arg->skb,
1362 NLBL_UNLABEL_A_IPV4MASK, 1203 NLBL_UNLABEL_A_IPV4MASK,
1363 sizeof(struct in_addr), 1204 sizeof(struct in_addr),
@@ -1370,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
1370 ret_val = nla_put(cb_arg->skb, 1211 ret_val = nla_put(cb_arg->skb,
1371 NLBL_UNLABEL_A_IPV6ADDR, 1212 NLBL_UNLABEL_A_IPV6ADDR,
1372 sizeof(struct in6_addr), 1213 sizeof(struct in6_addr),
1373 &addr6->addr); 1214 &addr6->list.addr);
1374 if (ret_val != 0) 1215 if (ret_val != 0)
1375 goto list_cb_failure; 1216 goto list_cb_failure;
1376 1217
1377 ret_val = nla_put(cb_arg->skb, 1218 ret_val = nla_put(cb_arg->skb,
1378 NLBL_UNLABEL_A_IPV6MASK, 1219 NLBL_UNLABEL_A_IPV6MASK,
1379 sizeof(struct in6_addr), 1220 sizeof(struct in6_addr),
1380 &addr6->mask); 1221 &addr6->list.mask);
1381 if (ret_val != 0) 1222 if (ret_val != 0)
1382 goto list_cb_failure; 1223 goto list_cb_failure;
1383 1224
@@ -1425,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
1425 u32 iter_bkt; 1266 u32 iter_bkt;
1426 u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; 1267 u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
1427 struct netlbl_unlhsh_iface *iface; 1268 struct netlbl_unlhsh_iface *iface;
1428 struct netlbl_unlhsh_addr4 *addr4; 1269 struct list_head *iter_list;
1429 struct netlbl_unlhsh_addr6 *addr6; 1270 struct netlbl_af4list *addr4;
1271#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1272 struct netlbl_af6list *addr6;
1273#endif
1430 1274
1431 cb_arg.nl_cb = cb; 1275 cb_arg.nl_cb = cb;
1432 cb_arg.skb = skb; 1276 cb_arg.skb = skb;
@@ -1436,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
1436 for (iter_bkt = skip_bkt; 1280 for (iter_bkt = skip_bkt;
1437 iter_bkt < rcu_dereference(netlbl_unlhsh)->size; 1281 iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
1438 iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { 1282 iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
1439 list_for_each_entry_rcu(iface, 1283 iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
1440 &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt], 1284 list_for_each_entry_rcu(iface, iter_list, list) {
1441 list) {
1442 if (!iface->valid || 1285 if (!iface->valid ||
1443 iter_chain++ < skip_chain) 1286 iter_chain++ < skip_chain)
1444 continue; 1287 continue;
1445 list_for_each_entry_rcu(addr4, 1288 netlbl_af4list_foreach_rcu(addr4,
1446 &iface->addr4_list, 1289 &iface->addr4_list) {
1447 list) { 1290 if (iter_addr4++ < skip_addr4)
1448 if (!addr4->valid || iter_addr4++ < skip_addr4)
1449 continue; 1291 continue;
1450 if (netlbl_unlabel_staticlist_gen( 1292 if (netlbl_unlabel_staticlist_gen(
1451 NLBL_UNLABEL_C_STATICLIST, 1293 NLBL_UNLABEL_C_STATICLIST,
1452 iface, 1294 iface,
1453 addr4, 1295 netlbl_unlhsh_addr4_entry(addr4),
1454 NULL, 1296 NULL,
1455 &cb_arg) < 0) { 1297 &cb_arg) < 0) {
1456 iter_addr4--; 1298 iter_addr4--;
1457 iter_chain--; 1299 iter_chain--;
1458 goto unlabel_staticlist_return; 1300 goto unlabel_staticlist_return;
1459 } 1301 }
1460 } 1302 }
1461 list_for_each_entry_rcu(addr6, 1303#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1462 &iface->addr6_list, 1304 netlbl_af6list_foreach_rcu(addr6,
1463 list) { 1305 &iface->addr6_list) {
1464 if (!addr6->valid || iter_addr6++ < skip_addr6) 1306 if (iter_addr6++ < skip_addr6)
1465 continue; 1307 continue;
1466 if (netlbl_unlabel_staticlist_gen( 1308 if (netlbl_unlabel_staticlist_gen(
1467 NLBL_UNLABEL_C_STATICLIST, 1309 NLBL_UNLABEL_C_STATICLIST,
1468 iface, 1310 iface,
1469 NULL, 1311 NULL,
1470 addr6, 1312 netlbl_unlhsh_addr6_entry(addr6),
1471 &cb_arg) < 0) { 1313 &cb_arg) < 0) {
1472 iter_addr6--; 1314 iter_addr6--;
1473 iter_chain--; 1315 iter_chain--;
1474 goto unlabel_staticlist_return; 1316 goto unlabel_staticlist_return;
1475 } 1317 }
1476 } 1318 }
1319#endif /* IPv6 */
1477 } 1320 }
1478 } 1321 }
1479 1322
@@ -1504,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
1504 struct netlbl_unlhsh_iface *iface; 1347 struct netlbl_unlhsh_iface *iface;
1505 u32 skip_addr4 = cb->args[0]; 1348 u32 skip_addr4 = cb->args[0];
1506 u32 skip_addr6 = cb->args[1]; 1349 u32 skip_addr6 = cb->args[1];
1507 u32 iter_addr4 = 0, iter_addr6 = 0; 1350 u32 iter_addr4 = 0;
1508 struct netlbl_unlhsh_addr4 *addr4; 1351 struct netlbl_af4list *addr4;
1509 struct netlbl_unlhsh_addr6 *addr6; 1352#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1353 u32 iter_addr6 = 0;
1354 struct netlbl_af6list *addr6;
1355#endif
1510 1356
1511 cb_arg.nl_cb = cb; 1357 cb_arg.nl_cb = cb;
1512 cb_arg.skb = skb; 1358 cb_arg.skb = skb;
@@ -1517,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
1517 if (iface == NULL || !iface->valid) 1363 if (iface == NULL || !iface->valid)
1518 goto unlabel_staticlistdef_return; 1364 goto unlabel_staticlistdef_return;
1519 1365
1520 list_for_each_entry_rcu(addr4, &iface->addr4_list, list) { 1366 netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
1521 if (!addr4->valid || iter_addr4++ < skip_addr4) 1367 if (iter_addr4++ < skip_addr4)
1522 continue; 1368 continue;
1523 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, 1369 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
1524 iface, 1370 iface,
1525 addr4, 1371 netlbl_unlhsh_addr4_entry(addr4),
1526 NULL, 1372 NULL,
1527 &cb_arg) < 0) { 1373 &cb_arg) < 0) {
1528 iter_addr4--; 1374 iter_addr4--;
1529 goto unlabel_staticlistdef_return; 1375 goto unlabel_staticlistdef_return;
1530 } 1376 }
1531 } 1377 }
1532 list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { 1378#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1533 if (!addr6->valid || iter_addr6++ < skip_addr6) 1379 netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
1380 if (iter_addr6++ < skip_addr6)
1534 continue; 1381 continue;
1535 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, 1382 if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
1536 iface, 1383 iface,
1537 NULL, 1384 NULL,
1538 addr6, 1385 netlbl_unlhsh_addr6_entry(addr6),
1539 &cb_arg) < 0) { 1386 &cb_arg) < 0) {
1540 iter_addr6--; 1387 iter_addr6--;
1541 goto unlabel_staticlistdef_return; 1388 goto unlabel_staticlistdef_return;
1542 } 1389 }
1543 } 1390 }
1391#endif /* IPv6 */
1544 1392
1545unlabel_staticlistdef_return: 1393unlabel_staticlistdef_return:
1546 rcu_read_unlock(); 1394 rcu_read_unlock();
@@ -1718,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
1718 switch (family) { 1566 switch (family) {
1719 case PF_INET: { 1567 case PF_INET: {
1720 struct iphdr *hdr4; 1568 struct iphdr *hdr4;
1721 struct netlbl_unlhsh_addr4 *addr4; 1569 struct netlbl_af4list *addr4;
1722 1570
1723 hdr4 = ip_hdr(skb); 1571 hdr4 = ip_hdr(skb);
1724 addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); 1572 addr4 = netlbl_af4list_search(hdr4->saddr,
1573 &iface->addr4_list);
1725 if (addr4 == NULL) 1574 if (addr4 == NULL)
1726 goto unlabel_getattr_nolabel; 1575 goto unlabel_getattr_nolabel;
1727 secattr->attr.secid = addr4->secid; 1576 secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
1728 break; 1577 break;
1729 } 1578 }
1730#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 1579#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1731 case PF_INET6: { 1580 case PF_INET6: {
1732 struct ipv6hdr *hdr6; 1581 struct ipv6hdr *hdr6;
1733 struct netlbl_unlhsh_addr6 *addr6; 1582 struct netlbl_af6list *addr6;
1734 1583
1735 hdr6 = ipv6_hdr(skb); 1584 hdr6 = ipv6_hdr(skb);
1736 addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); 1585 addr6 = netlbl_af6list_search(&hdr6->saddr,
1586 &iface->addr6_list);
1737 if (addr6 == NULL) 1587 if (addr6 == NULL)
1738 goto unlabel_getattr_nolabel; 1588 goto unlabel_getattr_nolabel;
1739 secattr->attr.secid = addr6->secid; 1589 secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid;
1740 break; 1590 break;
1741 } 1591 }
1742#endif /* IPv6 */ 1592#endif /* IPv6 */
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index e5b69556bb5b..21124ec0a73d 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -16,6 +16,7 @@
16#include <linux/workqueue.h> 16#include <linux/workqueue.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/rfkill.h> 18#include <linux/rfkill.h>
19#include <linux/sched.h>
19 20
20#include "rfkill-input.h" 21#include "rfkill-input.h"
21 22
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76739e928d0d..4895c341e46d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
174 clnt->cl_procinfo = version->procs; 174 clnt->cl_procinfo = version->procs;
175 clnt->cl_maxproc = version->nrprocs; 175 clnt->cl_maxproc = version->nrprocs;
176 clnt->cl_protname = program->name; 176 clnt->cl_protname = program->name;
177 clnt->cl_prog = program->number; 177 clnt->cl_prog = args->prognumber ? : program->number;
178 clnt->cl_vers = version->number; 178 clnt->cl_vers = version->number;
179 clnt->cl_stats = program->stats; 179 clnt->cl_stats = program->stats;
180 clnt->cl_metrics = rpc_alloc_iostats(clnt); 180 clnt->cl_metrics = rpc_alloc_iostats(clnt);
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
213 } 213 }
214 214
215 /* save the nodename */ 215 /* save the nodename */
216 clnt->cl_nodelen = strlen(utsname()->nodename); 216 clnt->cl_nodelen = strlen(init_utsname()->nodename);
217 if (clnt->cl_nodelen > UNX_MAXNODENAME) 217 if (clnt->cl_nodelen > UNX_MAXNODENAME)
218 clnt->cl_nodelen = UNX_MAXNODENAME; 218 clnt->cl_nodelen = UNX_MAXNODENAME;
219 memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); 219 memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
220 rpc_register_client(clnt); 220 rpc_register_client(clnt);
221 return clnt; 221 return clnt;
222 222
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24db2b4d12d3..41013dd66ac3 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
20#include <linux/in6.h> 20#include <linux/in6.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <net/ipv6.h>
23 24
24#include <linux/sunrpc/clnt.h> 25#include <linux/sunrpc/clnt.h>
25#include <linux/sunrpc/sched.h> 26#include <linux/sunrpc/sched.h>
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
176} 177}
177 178
178static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, 179static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
179 u32 version, struct rpc_message *msg, 180 u32 version, struct rpc_message *msg)
180 int *result)
181{ 181{
182 struct rpc_clnt *rpcb_clnt; 182 struct rpc_clnt *rpcb_clnt;
183 int error = 0; 183 int result, error = 0;
184 184
185 *result = 0; 185 msg->rpc_resp = &result;
186 186
187 rpcb_clnt = rpcb_create_local(addr, addrlen, version); 187 rpcb_clnt = rpcb_create_local(addr, addrlen, version);
188 if (!IS_ERR(rpcb_clnt)) { 188 if (!IS_ERR(rpcb_clnt)) {
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
191 } else 191 } else
192 error = PTR_ERR(rpcb_clnt); 192 error = PTR_ERR(rpcb_clnt);
193 193
194 if (error < 0) 194 if (error < 0) {
195 printk(KERN_WARNING "RPC: failed to contact local rpcbind " 195 printk(KERN_WARNING "RPC: failed to contact local rpcbind "
196 "server (errno %d).\n", -error); 196 "server (errno %d).\n", -error);
197 dprintk("RPC: registration status %d/%d\n", error, *result); 197 return error;
198 }
198 199
199 return error; 200 if (!result)
201 return -EACCES;
202 return 0;
200} 203}
201 204
202/** 205/**
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
205 * @vers: RPC version number to bind 208 * @vers: RPC version number to bind
206 * @prot: transport protocol to register 209 * @prot: transport protocol to register
207 * @port: port value to register 210 * @port: port value to register
208 * @okay: OUT: result code 211 *
212 * Returns zero if the registration request was dispatched successfully
213 * and the rpcbind daemon returned success. Otherwise, returns an errno
214 * value that reflects the nature of the error (request could not be
215 * dispatched, timed out, or rpcbind returned an error).
209 * 216 *
210 * RPC services invoke this function to advertise their contact 217 * RPC services invoke this function to advertise their contact
211 * information via the system's rpcbind daemon. RPC services 218 * information via the system's rpcbind daemon. RPC services
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
217 * all registered transports for [program, version] from the local 224 * all registered transports for [program, version] from the local
218 * rpcbind database. 225 * rpcbind database.
219 * 226 *
220 * Returns zero if the registration request was dispatched
221 * successfully and a reply was received. The rpcbind daemon's
222 * boolean result code is stored in *okay.
223 *
224 * Returns an errno value and sets *result to zero if there was
225 * some problem that prevented the rpcbind request from being
226 * dispatched, or if the rpcbind daemon did not respond within
227 * the timeout.
228 *
229 * This function uses rpcbind protocol version 2 to contact the 227 * This function uses rpcbind protocol version 2 to contact the
230 * local rpcbind daemon. 228 * local rpcbind daemon.
231 * 229 *
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
236 * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 234 * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
237 * addresses). 235 * addresses).
238 */ 236 */
239int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) 237int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
240{ 238{
241 struct rpcbind_args map = { 239 struct rpcbind_args map = {
242 .r_prog = prog, 240 .r_prog = prog,
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
246 }; 244 };
247 struct rpc_message msg = { 245 struct rpc_message msg = {
248 .rpc_argp = &map, 246 .rpc_argp = &map,
249 .rpc_resp = okay,
250 }; 247 };
251 248
252 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " 249 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
259 256
260 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 257 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
261 sizeof(rpcb_inaddr_loopback), 258 sizeof(rpcb_inaddr_loopback),
262 RPCBVERS_2, &msg, okay); 259 RPCBVERS_2, &msg);
263} 260}
264 261
265/* 262/*
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
290 287
291 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 288 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
292 sizeof(rpcb_inaddr_loopback), 289 sizeof(rpcb_inaddr_loopback),
293 RPCBVERS_4, msg, msg->rpc_resp); 290 RPCBVERS_4, msg);
294} 291}
295 292
296/* 293/*
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
304 char buf[64]; 301 char buf[64];
305 302
306 /* Construct AF_INET6 universal address */ 303 /* Construct AF_INET6 universal address */
307 snprintf(buf, sizeof(buf), 304 if (ipv6_addr_any(&address_to_register->sin6_addr))
308 NIP6_FMT".%u.%u", 305 snprintf(buf, sizeof(buf), "::.%u.%u",
309 NIP6(address_to_register->sin6_addr), 306 port >> 8, port & 0xff);
310 port >> 8, port & 0xff); 307 else
308 snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
309 NIP6(address_to_register->sin6_addr),
310 port >> 8, port & 0xff);
311 map->r_addr = buf; 311 map->r_addr = buf;
312 312
313 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 313 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
321 321
322 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, 322 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
323 sizeof(rpcb_in6addr_loopback), 323 sizeof(rpcb_in6addr_loopback),
324 RPCBVERS_4, msg, msg->rpc_resp); 324 RPCBVERS_4, msg);
325} 325}
326 326
327/** 327/**
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
330 * @version: RPC version number of service to (un)register 330 * @version: RPC version number of service to (un)register
331 * @address: address family, IP address, and port to (un)register 331 * @address: address family, IP address, and port to (un)register
332 * @netid: netid of transport protocol to (un)register 332 * @netid: netid of transport protocol to (un)register
333 * @result: result code from rpcbind RPC call 333 *
334 * Returns zero if the registration request was dispatched successfully
335 * and the rpcbind daemon returned success. Otherwise, returns an errno
336 * value that reflects the nature of the error (request could not be
337 * dispatched, timed out, or rpcbind returned an error).
334 * 338 *
335 * RPC services invoke this function to advertise their contact 339 * RPC services invoke this function to advertise their contact
336 * information via the system's rpcbind daemon. RPC services 340 * information via the system's rpcbind daemon. RPC services
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
342 * to zero. Callers pass a netid of "" to unregister all 346 * to zero. Callers pass a netid of "" to unregister all
343 * transport netids associated with [program, version, address]. 347 * transport netids associated with [program, version, address].
344 * 348 *
345 * Returns zero if the registration request was dispatched
346 * successfully and a reply was received. The rpcbind daemon's
347 * result code is stored in *result.
348 *
349 * Returns an errno value and sets *result to zero if there was
350 * some problem that prevented the rpcbind request from being
351 * dispatched, or if the rpcbind daemon did not respond within
352 * the timeout.
353 *
354 * This function uses rpcbind protocol version 4 to contact the 349 * This function uses rpcbind protocol version 4 to contact the
355 * local rpcbind daemon. The local rpcbind daemon must support 350 * local rpcbind daemon. The local rpcbind daemon must support
356 * version 4 of the rpcbind protocol in order for these functions 351 * version 4 of the rpcbind protocol in order for these functions
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
372 * advertises the service on all IPv4 and IPv6 addresses. 367 * advertises the service on all IPv4 and IPv6 addresses.
373 */ 368 */
374int rpcb_v4_register(const u32 program, const u32 version, 369int rpcb_v4_register(const u32 program, const u32 version,
375 const struct sockaddr *address, const char *netid, 370 const struct sockaddr *address, const char *netid)
376 int *result)
377{ 371{
378 struct rpcbind_args map = { 372 struct rpcbind_args map = {
379 .r_prog = program, 373 .r_prog = program,
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
383 }; 377 };
384 struct rpc_message msg = { 378 struct rpc_message msg = {
385 .rpc_argp = &map, 379 .rpc_argp = &map,
386 .rpc_resp = result,
387 }; 380 };
388 381
389 *result = 0;
390
391 switch (address->sa_family) { 382 switch (address->sa_family) {
392 case AF_INET: 383 case AF_INET:
393 return rpcb_register_netid4((struct sockaddr_in *)address, 384 return rpcb_register_netid4((struct sockaddr_in *)address,
@@ -469,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
469 return rpc_run_task(&task_setup_data); 460 return rpc_run_task(&task_setup_data);
470} 461}
471 462
463/*
464 * In the case where rpc clients have been cloned, we want to make
465 * sure that we use the program number/version etc of the actual
466 * owner of the xprt. To do so, we walk back up the tree of parents
467 * to find whoever created the transport and/or whoever has the
468 * autobind flag set.
469 */
470static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
471{
472 struct rpc_clnt *parent = clnt->cl_parent;
473
474 while (parent != clnt) {
475 if (parent->cl_xprt != clnt->cl_xprt)
476 break;
477 if (clnt->cl_autobind)
478 break;
479 clnt = parent;
480 parent = parent->cl_parent;
481 }
482 return clnt;
483}
484
472/** 485/**
473 * rpcb_getport_async - obtain the port for a given RPC service on a given host 486 * rpcb_getport_async - obtain the port for a given RPC service on a given host
474 * @task: task that is waiting for portmapper request 487 * @task: task that is waiting for portmapper request
@@ -478,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
478 */ 491 */
479void rpcb_getport_async(struct rpc_task *task) 492void rpcb_getport_async(struct rpc_task *task)
480{ 493{
481 struct rpc_clnt *clnt = task->tk_client; 494 struct rpc_clnt *clnt;
482 struct rpc_procinfo *proc; 495 struct rpc_procinfo *proc;
483 u32 bind_version; 496 u32 bind_version;
484 struct rpc_xprt *xprt = task->tk_xprt; 497 struct rpc_xprt *xprt;
485 struct rpc_clnt *rpcb_clnt; 498 struct rpc_clnt *rpcb_clnt;
486 static struct rpcbind_args *map; 499 static struct rpcbind_args *map;
487 struct rpc_task *child; 500 struct rpc_task *child;
@@ -490,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
490 size_t salen; 503 size_t salen;
491 int status; 504 int status;
492 505
506 clnt = rpcb_find_transport_owner(task->tk_client);
507 xprt = clnt->cl_xprt;
508
493 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", 509 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
494 task->tk_pid, __func__, 510 task->tk_pid, __func__,
495 clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); 511 clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
496 512
497 /* Autobind on cloned rpc clients is discouraged */
498 BUG_ON(clnt->cl_parent != clnt);
499
500 /* Put self on the wait queue to ensure we get notified if 513 /* Put self on the wait queue to ensure we get notified if
501 * some other task is already attempting to bind the port */ 514 * some other task is already attempting to bind the port */
502 rpc_sleep_on(&xprt->binding, task, NULL); 515 rpc_sleep_on(&xprt->binding, task, NULL);
@@ -558,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
558 status = -ENOMEM; 571 status = -ENOMEM;
559 dprintk("RPC: %5u %s: no memory available\n", 572 dprintk("RPC: %5u %s: no memory available\n",
560 task->tk_pid, __func__); 573 task->tk_pid, __func__);
561 goto bailout_nofree; 574 goto bailout_release_client;
562 } 575 }
563 map->r_prog = clnt->cl_prog; 576 map->r_prog = clnt->cl_prog;
564 map->r_vers = clnt->cl_vers; 577 map->r_vers = clnt->cl_vers;
@@ -578,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
578 task->tk_pid, __func__); 591 task->tk_pid, __func__);
579 return; 592 return;
580 } 593 }
581 rpc_put_task(child);
582 594
583 task->tk_xprt->stat.bind_count++; 595 xprt->stat.bind_count++;
596 rpc_put_task(child);
584 return; 597 return;
585 598
599bailout_release_client:
600 rpc_release_client(rpcb_clnt);
586bailout_nofree: 601bailout_nofree:
587 rpcb_wake_rpcbind_waiters(xprt, status); 602 rpcb_wake_rpcbind_waiters(xprt, status);
588 task->tk_status = status; 603 task->tk_status = status;
@@ -633,7 +648,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
633static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, 648static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
634 struct rpcbind_args *rpcb) 649 struct rpcbind_args *rpcb)
635{ 650{
636 dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", 651 dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n",
637 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); 652 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
638 *p++ = htonl(rpcb->r_prog); 653 *p++ = htonl(rpcb->r_prog);
639 *p++ = htonl(rpcb->r_vers); 654 *p++ = htonl(rpcb->r_vers);
@@ -648,7 +663,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
648 unsigned short *portp) 663 unsigned short *portp)
649{ 664{
650 *portp = (unsigned short) ntohl(*p++); 665 *portp = (unsigned short) ntohl(*p++);
651 dprintk("RPC: rpcb_decode_getport result %u\n", 666 dprintk("RPC: rpcb getport result: %u\n",
652 *portp); 667 *portp);
653 return 0; 668 return 0;
654} 669}
@@ -657,7 +672,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
657 unsigned int *boolp) 672 unsigned int *boolp)
658{ 673{
659 *boolp = (unsigned int) ntohl(*p++); 674 *boolp = (unsigned int) ntohl(*p++);
660 dprintk("RPC: rpcb_decode_set: call %s\n", 675 dprintk("RPC: rpcb set/unset call %s\n",
661 (*boolp ? "succeeded" : "failed")); 676 (*boolp ? "succeeded" : "failed"));
662 return 0; 677 return 0;
663} 678}
@@ -665,7 +680,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
665static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, 680static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
666 struct rpcbind_args *rpcb) 681 struct rpcbind_args *rpcb)
667{ 682{
668 dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", 683 dprintk("RPC: encoding rpcb request (%u, %u, %s)\n",
669 rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); 684 rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
670 *p++ = htonl(rpcb->r_prog); 685 *p++ = htonl(rpcb->r_prog);
671 *p++ = htonl(rpcb->r_vers); 686 *p++ = htonl(rpcb->r_vers);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a32cb7c4bb4..54c98d876847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -28,6 +28,8 @@
28 28
29#define RPCDBG_FACILITY RPCDBG_SVCDSP 29#define RPCDBG_FACILITY RPCDBG_SVCDSP
30 30
31static void svc_unregister(const struct svc_serv *serv);
32
31#define svc_serv_is_pooled(serv) ((serv)->sv_function) 33#define svc_serv_is_pooled(serv) ((serv)->sv_function)
32 34
33/* 35/*
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
357 */ 359 */
358static struct svc_serv * 360static struct svc_serv *
359__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
360 void (*shutdown)(struct svc_serv *serv)) 362 sa_family_t family, void (*shutdown)(struct svc_serv *serv))
361{ 363{
362 struct svc_serv *serv; 364 struct svc_serv *serv;
363 unsigned int vers; 365 unsigned int vers;
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
366 368
367 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
368 return NULL; 370 return NULL;
371 serv->sv_family = family;
369 serv->sv_name = prog->pg_name; 372 serv->sv_name = prog->pg_name;
370 serv->sv_program = prog; 373 serv->sv_program = prog;
371 serv->sv_nrthreads = 1; 374 serv->sv_nrthreads = 1;
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
416 spin_lock_init(&pool->sp_lock); 419 spin_lock_init(&pool->sp_lock);
417 } 420 }
418 421
419
420 /* Remove any stale portmap registrations */ 422 /* Remove any stale portmap registrations */
421 svc_register(serv, 0, 0); 423 svc_unregister(serv);
422 424
423 return serv; 425 return serv;
424} 426}
425 427
426struct svc_serv * 428struct svc_serv *
427svc_create(struct svc_program *prog, unsigned int bufsize, 429svc_create(struct svc_program *prog, unsigned int bufsize,
428 void (*shutdown)(struct svc_serv *serv)) 430 sa_family_t family, void (*shutdown)(struct svc_serv *serv))
429{ 431{
430 return __svc_create(prog, bufsize, /*npools*/1, shutdown); 432 return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
431} 433}
432EXPORT_SYMBOL(svc_create); 434EXPORT_SYMBOL(svc_create);
433 435
434struct svc_serv * 436struct svc_serv *
435svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 437svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
436 void (*shutdown)(struct svc_serv *serv), 438 sa_family_t family, void (*shutdown)(struct svc_serv *serv),
437 svc_thread_fn func, struct module *mod) 439 svc_thread_fn func, struct module *mod)
438{ 440{
439 struct svc_serv *serv; 441 struct svc_serv *serv;
440 unsigned int npools = svc_pool_map_get(); 442 unsigned int npools = svc_pool_map_get();
441 443
442 serv = __svc_create(prog, bufsize, npools, shutdown); 444 serv = __svc_create(prog, bufsize, npools, family, shutdown);
443 445
444 if (serv != NULL) { 446 if (serv != NULL) {
445 serv->sv_function = func; 447 serv->sv_function = func;
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
486 if (svc_serv_is_pooled(serv)) 488 if (svc_serv_is_pooled(serv))
487 svc_pool_map_put(); 489 svc_pool_map_put();
488 490
489 /* Unregister service with the portmapper */ 491 svc_unregister(serv);
490 svc_register(serv, 0, 0);
491 kfree(serv->sv_pools); 492 kfree(serv->sv_pools);
492 kfree(serv); 493 kfree(serv);
493} 494}
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp)
718} 719}
719EXPORT_SYMBOL(svc_exit_thread); 720EXPORT_SYMBOL(svc_exit_thread);
720 721
722#ifdef CONFIG_SUNRPC_REGISTER_V4
723
721/* 724/*
722 * Register an RPC service with the local portmapper. 725 * Register an "inet" protocol family netid with the local
723 * To unregister a service, call this routine with 726 * rpcbind daemon via an rpcbind v4 SET request.
724 * proto and port == 0. 727 *
728 * No netconfig infrastructure is available in the kernel, so
729 * we map IP_ protocol numbers to netids by hand.
730 *
731 * Returns zero on success; a negative errno value is returned
732 * if any error occurs.
725 */ 733 */
726int 734static int __svc_rpcb_register4(const u32 program, const u32 version,
727svc_register(struct svc_serv *serv, int proto, unsigned short port) 735 const unsigned short protocol,
736 const unsigned short port)
737{
738 struct sockaddr_in sin = {
739 .sin_family = AF_INET,
740 .sin_addr.s_addr = htonl(INADDR_ANY),
741 .sin_port = htons(port),
742 };
743 char *netid;
744
745 switch (protocol) {
746 case IPPROTO_UDP:
747 netid = RPCBIND_NETID_UDP;
748 break;
749 case IPPROTO_TCP:
750 netid = RPCBIND_NETID_TCP;
751 break;
752 default:
753 return -EPROTONOSUPPORT;
754 }
755
756 return rpcb_v4_register(program, version,
757 (struct sockaddr *)&sin, netid);
758}
759
760/*
761 * Register an "inet6" protocol family netid with the local
762 * rpcbind daemon via an rpcbind v4 SET request.
763 *
764 * No netconfig infrastructure is available in the kernel, so
765 * we map IP_ protocol numbers to netids by hand.
766 *
767 * Returns zero on success; a negative errno value is returned
768 * if any error occurs.
769 */
770static int __svc_rpcb_register6(const u32 program, const u32 version,
771 const unsigned short protocol,
772 const unsigned short port)
773{
774 struct sockaddr_in6 sin6 = {
775 .sin6_family = AF_INET6,
776 .sin6_addr = IN6ADDR_ANY_INIT,
777 .sin6_port = htons(port),
778 };
779 char *netid;
780
781 switch (protocol) {
782 case IPPROTO_UDP:
783 netid = RPCBIND_NETID_UDP6;
784 break;
785 case IPPROTO_TCP:
786 netid = RPCBIND_NETID_TCP6;
787 break;
788 default:
789 return -EPROTONOSUPPORT;
790 }
791
792 return rpcb_v4_register(program, version,
793 (struct sockaddr *)&sin6, netid);
794}
795
796/*
797 * Register a kernel RPC service via rpcbind version 4.
798 *
799 * Returns zero on success; a negative errno value is returned
800 * if any error occurs.
801 */
802static int __svc_register(const u32 program, const u32 version,
803 const sa_family_t family,
804 const unsigned short protocol,
805 const unsigned short port)
806{
807 int error;
808
809 switch (family) {
810 case AF_INET:
811 return __svc_rpcb_register4(program, version,
812 protocol, port);
813 case AF_INET6:
814 error = __svc_rpcb_register6(program, version,
815 protocol, port);
816 if (error < 0)
817 return error;
818
819 /*
820 * Work around bug in some versions of Linux rpcbind
821 * which don't allow registration of both inet and
822 * inet6 netids.
823 *
824 * Error return ignored for now.
825 */
826 __svc_rpcb_register4(program, version,
827 protocol, port);
828 return 0;
829 }
830
831 return -EAFNOSUPPORT;
832}
833
834#else /* CONFIG_SUNRPC_REGISTER_V4 */
835
836/*
837 * Register a kernel RPC service via rpcbind version 2.
838 *
839 * Returns zero on success; a negative errno value is returned
840 * if any error occurs.
841 */
842static int __svc_register(const u32 program, const u32 version,
843 sa_family_t family,
844 const unsigned short protocol,
845 const unsigned short port)
846{
847 if (family != AF_INET)
848 return -EAFNOSUPPORT;
849
850 return rpcb_register(program, version, protocol, port);
851}
852
853#endif /* CONFIG_SUNRPC_REGISTER_V4 */
854
855/**
856 * svc_register - register an RPC service with the local portmapper
857 * @serv: svc_serv struct for the service to register
858 * @proto: transport protocol number to advertise
859 * @port: port to advertise
860 *
861 * Service is registered for any address in serv's address family
862 */
863int svc_register(const struct svc_serv *serv, const unsigned short proto,
864 const unsigned short port)
728{ 865{
729 struct svc_program *progp; 866 struct svc_program *progp;
730 unsigned long flags;
731 unsigned int i; 867 unsigned int i;
732 int error = 0, dummy; 868 int error = 0;
733 869
734 if (!port) 870 BUG_ON(proto == 0 && port == 0);
735 clear_thread_flag(TIF_SIGPENDING);
736 871
737 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 872 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
738 for (i = 0; i < progp->pg_nvers; i++) { 873 for (i = 0; i < progp->pg_nvers; i++) {
739 if (progp->pg_vers[i] == NULL) 874 if (progp->pg_vers[i] == NULL)
740 continue; 875 continue;
741 876
742 dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", 877 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
743 progp->pg_name, 878 progp->pg_name,
879 i,
744 proto == IPPROTO_UDP? "udp" : "tcp", 880 proto == IPPROTO_UDP? "udp" : "tcp",
745 port, 881 port,
746 i, 882 serv->sv_family,
747 progp->pg_vers[i]->vs_hidden? 883 progp->pg_vers[i]->vs_hidden?
748 " (but not telling portmap)" : ""); 884 " (but not telling portmap)" : "");
749 885
750 if (progp->pg_vers[i]->vs_hidden) 886 if (progp->pg_vers[i]->vs_hidden)
751 continue; 887 continue;
752 888
753 error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); 889 error = __svc_register(progp->pg_prog, i,
890 serv->sv_family, proto, port);
754 if (error < 0) 891 if (error < 0)
755 break; 892 break;
756 if (port && !dummy) {
757 error = -EACCES;
758 break;
759 }
760 } 893 }
761 } 894 }
762 895
763 if (!port) { 896 return error;
764 spin_lock_irqsave(&current->sighand->siglock, flags); 897}
765 recalc_sigpending(); 898
766 spin_unlock_irqrestore(&current->sighand->siglock, flags); 899#ifdef CONFIG_SUNRPC_REGISTER_V4
900
901static void __svc_unregister(const u32 program, const u32 version,
902 const char *progname)
903{
904 struct sockaddr_in6 sin6 = {
905 .sin6_family = AF_INET6,
906 .sin6_addr = IN6ADDR_ANY_INIT,
907 .sin6_port = 0,
908 };
909 int error;
910
911 error = rpcb_v4_register(program, version,
912 (struct sockaddr *)&sin6, "");
913 dprintk("svc: %s(%sv%u), error %d\n",
914 __func__, progname, version, error);
915}
916
917#else /* CONFIG_SUNRPC_REGISTER_V4 */
918
919static void __svc_unregister(const u32 program, const u32 version,
920 const char *progname)
921{
922 int error;
923
924 error = rpcb_register(program, version, 0, 0);
925 dprintk("svc: %s(%sv%u), error %d\n",
926 __func__, progname, version, error);
927}
928
929#endif /* CONFIG_SUNRPC_REGISTER_V4 */
930
931/*
932 * All netids, bind addresses and ports registered for [program, version]
933 * are removed from the local rpcbind database (if the service is not
934 * hidden) to make way for a new instance of the service.
935 *
936 * The result of unregistration is reported via dprintk for those who want
937 * verification of the result, but is otherwise not important.
938 */
939static void svc_unregister(const struct svc_serv *serv)
940{
941 struct svc_program *progp;
942 unsigned long flags;
943 unsigned int i;
944
945 clear_thread_flag(TIF_SIGPENDING);
946
947 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
948 for (i = 0; i < progp->pg_nvers; i++) {
949 if (progp->pg_vers[i] == NULL)
950 continue;
951 if (progp->pg_vers[i]->vs_hidden)
952 continue;
953
954 __svc_unregister(progp->pg_prog, i, progp->pg_name);
955 }
767 } 956 }
768 957
769 return error; 958 spin_lock_irqsave(&current->sighand->siglock, flags);
959 recalc_sigpending();
960 spin_unlock_irqrestore(&current->sighand->siglock, flags);
770} 961}
771 962
772/* 963/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825f4954..bf5b5cdafebf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
159} 159}
160EXPORT_SYMBOL_GPL(svc_xprt_init); 160EXPORT_SYMBOL_GPL(svc_xprt_init);
161 161
162int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, 162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
163 int flags) 163 struct svc_serv *serv,
164 unsigned short port, int flags)
164{ 165{
165 struct svc_xprt_class *xcl;
166 struct sockaddr_in sin = { 166 struct sockaddr_in sin = {
167 .sin_family = AF_INET, 167 .sin_family = AF_INET,
168 .sin_addr.s_addr = htonl(INADDR_ANY), 168 .sin_addr.s_addr = htonl(INADDR_ANY),
169 .sin_port = htons(port), 169 .sin_port = htons(port),
170 }; 170 };
171 struct sockaddr_in6 sin6 = {
172 .sin6_family = AF_INET6,
173 .sin6_addr = IN6ADDR_ANY_INIT,
174 .sin6_port = htons(port),
175 };
176 struct sockaddr *sap;
177 size_t len;
178
179 switch (serv->sv_family) {
180 case AF_INET:
181 sap = (struct sockaddr *)&sin;
182 len = sizeof(sin);
183 break;
184 case AF_INET6:
185 sap = (struct sockaddr *)&sin6;
186 len = sizeof(sin6);
187 break;
188 default:
189 return ERR_PTR(-EAFNOSUPPORT);
190 }
191
192 return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
193}
194
195int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
196 int flags)
197{
198 struct svc_xprt_class *xcl;
199
171 dprintk("svc: creating transport %s[%d]\n", xprt_name, port); 200 dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
172 spin_lock(&svc_xprt_class_lock); 201 spin_lock(&svc_xprt_class_lock);
173 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { 202 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
180 goto err; 209 goto err;
181 210
182 spin_unlock(&svc_xprt_class_lock); 211 spin_unlock(&svc_xprt_class_lock);
183 newxprt = xcl->xcl_ops-> 212 newxprt = __svc_xpo_create(xcl, serv, port, flags);
184 xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
185 flags);
186 if (IS_ERR(newxprt)) { 213 if (IS_ERR(newxprt)) {
187 module_put(xcl->xcl_owner); 214 module_put(xcl->xcl_owner);
188 return PTR_ERR(newxprt); 215 return PTR_ERR(newxprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719f1ef6..95293f549e9c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1114 struct svc_sock *svsk; 1114 struct svc_sock *svsk;
1115 struct sock *inet; 1115 struct sock *inet;
1116 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1116 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1117 int val;
1117 1118
1118 dprintk("svc: svc_setup_socket %p\n", sock); 1119 dprintk("svc: svc_setup_socket %p\n", sock);
1119 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1120 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1146 else 1147 else
1147 svc_tcp_init(svsk, serv); 1148 svc_tcp_init(svsk, serv);
1148 1149
1150 /*
1151 * We start one listener per sv_serv. We want AF_INET
1152 * requests to be automatically shunted to our AF_INET6
1153 * listener using a mapped IPv4 address. Make sure
1154 * no-one starts an equivalent IPv4 listener, which
1155 * would steal our incoming connections.
1156 */
1157 val = 0;
1158 if (serv->sv_family == AF_INET6)
1159 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1160 (char *)&val, sizeof(val));
1161
1149 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1162 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1150 svsk, svsk->sk_sk); 1163 svsk, svsk->sk_sk);
1151 1164
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1154 1167
1155int svc_addsock(struct svc_serv *serv, 1168int svc_addsock(struct svc_serv *serv,
1156 int fd, 1169 int fd,
1157 char *name_return, 1170 char *name_return)
1158 int *proto)
1159{ 1171{
1160 int err = 0; 1172 int err = 0;
1161 struct socket *so = sockfd_lookup(fd, &err); 1173 struct socket *so = sockfd_lookup(fd, &err);
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
1190 sockfd_put(so); 1202 sockfd_put(so);
1191 return err; 1203 return err;
1192 } 1204 }
1193 if (proto) *proto = so->sk->sk_protocol;
1194 return one_sock_name(name_return, svsk); 1205 return one_sock_name(name_return, svsk);
1195} 1206}
1196EXPORT_SYMBOL_GPL(svc_addsock); 1207EXPORT_SYMBOL_GPL(svc_addsock);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 99a52aabe332..29e401bb612e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
108 goto out; 108 goto out;
109 } 109 }
110 110
111 result = -EINVAL; 111 list_add_tail(&transport->list, &xprt_list);
112 if (try_module_get(THIS_MODULE)) { 112 printk(KERN_INFO "RPC: Registered %s transport module.\n",
113 list_add_tail(&transport->list, &xprt_list); 113 transport->name);
114 printk(KERN_INFO "RPC: Registered %s transport module.\n", 114 result = 0;
115 transport->name);
116 result = 0;
117 }
118 115
119out: 116out:
120 spin_unlock(&xprt_list_lock); 117 spin_unlock(&xprt_list_lock);
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
143 "RPC: Unregistered %s transport module.\n", 140 "RPC: Unregistered %s transport module.\n",
144 transport->name); 141 transport->name);
145 list_del_init(&transport->list); 142 list_del_init(&transport->list);
146 module_put(THIS_MODULE);
147 goto out; 143 goto out;
148 } 144 }
149 } 145 }
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 5c1954d28d09..14106d26bb95 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
118 } 118 }
119 119
120 if (xdrbuf->tail[0].iov_len) { 120 if (xdrbuf->tail[0].iov_len) {
121 /* the rpcrdma protocol allows us to omit any trailing
122 * xdr pad bytes, saving the server an RDMA operation. */
123 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 return n;
121 if (n == nsegs) 125 if (n == nsegs)
122 return 0; 126 return 0;
123 seg[n].mr_page = NULL; 127 seg[n].mr_page = NULL;
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
508 if (hdrlen == 0) 512 if (hdrlen == 0)
509 return -1; 513 return -1;
510 514
511 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" 515 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
512 " headerp 0x%p base 0x%p lkey 0x%x\n", 516 " headerp 0x%p base 0x%p lkey 0x%x\n",
513 __func__, transfertypes[wtype], hdrlen, rpclen, padlen, 517 __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
514 headerp, base, req->rl_iov.lkey); 518 headerp, base, req->rl_iov.lkey);
515 519
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
594 * Scatter inline received data back into provided iov's. 598 * Scatter inline received data back into provided iov's.
595 */ 599 */
596static void 600static void
597rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) 601rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
598{ 602{
599 int i, npages, curlen, olen; 603 int i, npages, curlen, olen;
600 char *destp; 604 char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
660 } else 664 } else
661 rqst->rq_rcv_buf.tail[0].iov_len = 0; 665 rqst->rq_rcv_buf.tail[0].iov_len = 0;
662 666
667 if (pad) {
668 /* implicit padding on terminal chunk */
669 unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
670 while (pad--)
671 p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
672 }
673
663 if (copy_len) 674 if (copy_len)
664 dprintk("RPC: %s: %d bytes in" 675 dprintk("RPC: %s: %d bytes in"
665 " %d extra segments (%d lost)\n", 676 " %d extra segments (%d lost)\n",
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
681 struct rpc_xprt *xprt = ep->rep_xprt; 692 struct rpc_xprt *xprt = ep->rep_xprt;
682 693
683 spin_lock_bh(&xprt->transport_lock); 694 spin_lock_bh(&xprt->transport_lock);
695 if (++xprt->connect_cookie == 0) /* maintain a reserved value */
696 ++xprt->connect_cookie;
684 if (ep->rep_connected > 0) { 697 if (ep->rep_connected > 0) {
685 if (!xprt_test_and_set_connected(xprt)) 698 if (!xprt_test_and_set_connected(xprt))
686 xprt_wake_pending_tasks(xprt, 0); 699 xprt_wake_pending_tasks(xprt, 0);
687 } else { 700 } else {
688 if (xprt_test_and_clear_connected(xprt)) 701 if (xprt_test_and_clear_connected(xprt))
689 xprt_wake_pending_tasks(xprt, ep->rep_connected); 702 xprt_wake_pending_tasks(xprt, -ENOTCONN);
690 } 703 }
691 spin_unlock_bh(&xprt->transport_lock); 704 spin_unlock_bh(&xprt->transport_lock);
692} 705}
@@ -792,14 +805,20 @@ repost:
792 ((unsigned char *)iptr - (unsigned char *)headerp); 805 ((unsigned char *)iptr - (unsigned char *)headerp);
793 status = rep->rr_len + rdmalen; 806 status = rep->rr_len + rdmalen;
794 r_xprt->rx_stats.total_rdma_reply += rdmalen; 807 r_xprt->rx_stats.total_rdma_reply += rdmalen;
808 /* special case - last chunk may omit padding */
809 if (rdmalen &= 3) {
810 rdmalen = 4 - rdmalen;
811 status += rdmalen;
812 }
795 } else { 813 } else {
796 /* else ordinary inline */ 814 /* else ordinary inline */
815 rdmalen = 0;
797 iptr = (__be32 *)((unsigned char *)headerp + 28); 816 iptr = (__be32 *)((unsigned char *)headerp + 28);
798 rep->rr_len -= 28; /*sizeof *headerp;*/ 817 rep->rr_len -= 28; /*sizeof *headerp;*/
799 status = rep->rr_len; 818 status = rep->rr_len;
800 } 819 }
801 /* Fix up the rpc results for upper layer */ 820 /* Fix up the rpc results for upper layer */
802 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); 821 rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
803 break; 822 break;
804 823
805 case htonl(RDMA_NOMSG): 824 case htonl(RDMA_NOMSG):
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 74de31a06616..a4756576d687 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
116 * 116 *
117 * Assumptions: 117 * Assumptions:
118 * - chunk[0]->position points to pages[0] at an offset of 0 118 * - chunk[0]->position points to pages[0] at an offset of 0
119 * - pages[] is not physically or virtually contigous and consists of 119 * - pages[] is not physically or virtually contiguous and consists of
120 * PAGE_SIZE elements. 120 * PAGE_SIZE elements.
121 * 121 *
122 * Output: 122 * Output:
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
125 * chunk in the read list 125 * chunk in the read list
126 * 126 *
127 */ 127 */
128static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, 128static int map_read_chunks(struct svcxprt_rdma *xprt,
129 struct svc_rqst *rqstp, 129 struct svc_rqst *rqstp,
130 struct svc_rdma_op_ctxt *head, 130 struct svc_rdma_op_ctxt *head,
131 struct rpcrdma_msg *rmsgp, 131 struct rpcrdma_msg *rmsgp,
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
211 return sge_no; 211 return sge_no;
212} 212}
213 213
214static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, 214/* Map a read-chunk-list to an XDR and fast register the page-list.
215 struct svc_rdma_op_ctxt *ctxt, 215 *
216 struct kvec *vec, 216 * Assumptions:
217 u64 *sgl_offset, 217 * - chunk[0] position points to pages[0] at an offset of 0
218 int count) 218 * - pages[] will be made physically contiguous by creating a one-off memory
219 * region using the fastreg verb.
220 * - byte_count is # of bytes in read-chunk-list
221 * - ch_count is # of chunks in read-chunk-list
222 *
223 * Output:
224 * - sge array pointing into pages[] array.
225 * - chunk_sge array specifying sge index and count for each
226 * chunk in the read list
227 */
228static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
229 struct svc_rqst *rqstp,
230 struct svc_rdma_op_ctxt *head,
231 struct rpcrdma_msg *rmsgp,
232 struct svc_rdma_req_map *rpl_map,
233 struct svc_rdma_req_map *chl_map,
234 int ch_count,
235 int byte_count)
236{
237 int page_no;
238 int ch_no;
239 u32 offset;
240 struct rpcrdma_read_chunk *ch;
241 struct svc_rdma_fastreg_mr *frmr;
242 int ret = 0;
243
244 frmr = svc_rdma_get_frmr(xprt);
245 if (IS_ERR(frmr))
246 return -ENOMEM;
247
248 head->frmr = frmr;
249 head->arg.head[0] = rqstp->rq_arg.head[0];
250 head->arg.tail[0] = rqstp->rq_arg.tail[0];
251 head->arg.pages = &head->pages[head->count];
252 head->hdr_count = head->count; /* save count of hdr pages */
253 head->arg.page_base = 0;
254 head->arg.page_len = byte_count;
255 head->arg.len = rqstp->rq_arg.len + byte_count;
256 head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
257
258 /* Fast register the page list */
259 frmr->kva = page_address(rqstp->rq_arg.pages[0]);
260 frmr->direction = DMA_FROM_DEVICE;
261 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
262 frmr->map_len = byte_count;
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]),
268 PAGE_SIZE, DMA_TO_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no]))
271 goto fatal_err;
272 atomic_inc(&xprt->sc_dma_used);
273 head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
274 }
275 head->count += page_no;
276
277 /* rq_respages points one past arg pages */
278 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
279
280 /* Create the reply and chunk maps */
281 offset = 0;
282 ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
283 for (ch_no = 0; ch_no < ch_count; ch_no++) {
284 rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
285 rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
286 chl_map->ch[ch_no].count = 1;
287 chl_map->ch[ch_no].start = ch_no;
288 offset += ch->rc_target.rs_length;
289 ch++;
290 }
291
292 ret = svc_rdma_fastreg(xprt, frmr);
293 if (ret)
294 goto fatal_err;
295
296 return ch_no;
297
298 fatal_err:
299 printk("svcrdma: error fast registering xdr for xprt %p", xprt);
300 svc_rdma_put_frmr(xprt, frmr);
301 return -EIO;
302}
303
304static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
305 struct svc_rdma_op_ctxt *ctxt,
306 struct svc_rdma_fastreg_mr *frmr,
307 struct kvec *vec,
308 u64 *sgl_offset,
309 int count)
219{ 310{
220 int i; 311 int i;
221 312
222 ctxt->count = count; 313 ctxt->count = count;
223 ctxt->direction = DMA_FROM_DEVICE; 314 ctxt->direction = DMA_FROM_DEVICE;
224 for (i = 0; i < count; i++) { 315 for (i = 0; i < count; i++) {
225 atomic_inc(&xprt->sc_dma_used); 316 ctxt->sge[i].length = 0; /* in case map fails */
226 ctxt->sge[i].addr = 317 if (!frmr) {
227 ib_dma_map_single(xprt->sc_cm_id->device, 318 ctxt->sge[i].addr =
228 vec[i].iov_base, vec[i].iov_len, 319 ib_dma_map_single(xprt->sc_cm_id->device,
229 DMA_FROM_DEVICE); 320 vec[i].iov_base,
321 vec[i].iov_len,
322 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr))
325 return -EINVAL;
326 ctxt->sge[i].lkey = xprt->sc_dma_lkey;
327 atomic_inc(&xprt->sc_dma_used);
328 } else {
329 ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
330 ctxt->sge[i].lkey = frmr->mr->lkey;
331 }
230 ctxt->sge[i].length = vec[i].iov_len; 332 ctxt->sge[i].length = vec[i].iov_len;
231 ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
232 *sgl_offset = *sgl_offset + vec[i].iov_len; 333 *sgl_offset = *sgl_offset + vec[i].iov_len;
233 } 334 }
335 return 0;
234} 336}
235 337
236static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) 338static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
278 struct svc_rdma_op_ctxt *hdr_ctxt) 380 struct svc_rdma_op_ctxt *hdr_ctxt)
279{ 381{
280 struct ib_send_wr read_wr; 382 struct ib_send_wr read_wr;
383 struct ib_send_wr inv_wr;
281 int err = 0; 384 int err = 0;
282 int ch_no; 385 int ch_no;
283 int ch_count; 386 int ch_count;
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
301 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 404 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
302 if (ch_count > RPCSVC_MAXPAGES) 405 if (ch_count > RPCSVC_MAXPAGES)
303 return -EINVAL; 406 return -EINVAL;
304 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 407
305 rpl_map, chl_map, 408 if (!xprt->sc_frmr_pg_list_len)
306 ch_count, byte_count); 409 sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
410 rpl_map, chl_map, ch_count,
411 byte_count);
412 else
413 sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
414 rpl_map, chl_map, ch_count,
415 byte_count);
416 if (sge_count < 0) {
417 err = -EIO;
418 goto out;
419 }
420
307 sgl_offset = 0; 421 sgl_offset = 0;
308 ch_no = 0; 422 ch_no = 0;
309 423
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
312next_sge: 426next_sge:
313 ctxt = svc_rdma_get_context(xprt); 427 ctxt = svc_rdma_get_context(xprt);
314 ctxt->direction = DMA_FROM_DEVICE; 428 ctxt->direction = DMA_FROM_DEVICE;
429 ctxt->frmr = hdr_ctxt->frmr;
430 ctxt->read_hdr = NULL;
315 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 431 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
432 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
316 433
317 /* Prepare READ WR */ 434 /* Prepare READ WR */
318 memset(&read_wr, 0, sizeof read_wr); 435 memset(&read_wr, 0, sizeof read_wr);
319 ctxt->wr_op = IB_WR_RDMA_READ;
320 read_wr.wr_id = (unsigned long)ctxt; 436 read_wr.wr_id = (unsigned long)ctxt;
321 read_wr.opcode = IB_WR_RDMA_READ; 437 read_wr.opcode = IB_WR_RDMA_READ;
438 ctxt->wr_op = read_wr.opcode;
322 read_wr.send_flags = IB_SEND_SIGNALED; 439 read_wr.send_flags = IB_SEND_SIGNALED;
323 read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; 440 read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
324 read_wr.wr.rdma.remote_addr = 441 read_wr.wr.rdma.remote_addr =
@@ -327,10 +444,15 @@ next_sge:
327 read_wr.sg_list = ctxt->sge; 444 read_wr.sg_list = ctxt->sge;
328 read_wr.num_sge = 445 read_wr.num_sge =
329 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); 446 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
330 rdma_set_ctxt_sge(xprt, ctxt, 447 err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
331 &rpl_map->sge[chl_map->ch[ch_no].start], 448 &rpl_map->sge[chl_map->ch[ch_no].start],
332 &sgl_offset, 449 &sgl_offset,
333 read_wr.num_sge); 450 read_wr.num_sge);
451 if (err) {
452 svc_rdma_unmap_dma(ctxt);
453 svc_rdma_put_context(ctxt, 0);
454 goto out;
455 }
334 if (((ch+1)->rc_discrim == 0) && 456 if (((ch+1)->rc_discrim == 0) &&
335 (read_wr.num_sge == chl_map->ch[ch_no].count)) { 457 (read_wr.num_sge == chl_map->ch[ch_no].count)) {
336 /* 458 /*
@@ -339,6 +461,29 @@ next_sge:
339 * the client and the RPC needs to be enqueued. 461 * the client and the RPC needs to be enqueued.
340 */ 462 */
341 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 463 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
464 if (hdr_ctxt->frmr) {
465 set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
466 /*
467 * Invalidate the local MR used to map the data
468 * sink.
469 */
470 if (xprt->sc_dev_caps &
471 SVCRDMA_DEVCAP_READ_W_INV) {
472 read_wr.opcode =
473 IB_WR_RDMA_READ_WITH_INV;
474 ctxt->wr_op = read_wr.opcode;
475 read_wr.ex.invalidate_rkey =
476 ctxt->frmr->mr->lkey;
477 } else {
478 /* Prepare INVALIDATE WR */
479 memset(&inv_wr, 0, sizeof inv_wr);
480 inv_wr.opcode = IB_WR_LOCAL_INV;
481 inv_wr.send_flags = IB_SEND_SIGNALED;
482 inv_wr.ex.invalidate_rkey =
483 hdr_ctxt->frmr->mr->lkey;
484 read_wr.next = &inv_wr;
485 }
486 }
342 ctxt->read_hdr = hdr_ctxt; 487 ctxt->read_hdr = hdr_ctxt;
343 } 488 }
344 /* Post the read */ 489 /* Post the read */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 84d328329d98..9a7a8e7ae038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,9 +69,127 @@
69 * array is only concerned with the reply we are assured that we have 69 * array is only concerned with the reply we are assured that we have
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static void xdr_to_sge(struct svcxprt_rdma *xprt, 72int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{
76 int sge_no;
77 u32 sge_bytes;
78 u32 page_bytes;
79 u32 page_off;
80 int page_no = 0;
81 u8 *frva;
82 struct svc_rdma_fastreg_mr *frmr;
83
84 frmr = svc_rdma_get_frmr(xprt);
85 if (IS_ERR(frmr))
86 return -ENOMEM;
87 vec->frmr = frmr;
88
89 /* Skip the RPCRDMA header */
90 sge_no = 1;
91
92 /* Map the head. */
93 frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
94 vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
95 vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
96 vec->count = 2;
97 sge_no++;
98
99 /* Build the FRMR */
100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1;
105 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base,
108 PAGE_SIZE, DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no]))
111 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used);
113
114 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes)
117 goto encode_tail;
118
119 /* Map the pages */
120 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
121 vec->sge[sge_no].iov_len = page_bytes;
122 sge_no++;
123 while (page_bytes) {
124 struct page *page;
125
126 page = xdr->pages[page_no++];
127 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
128 page_bytes -= sge_bytes;
129
130 frmr->page_list->page_list[page_no] =
131 ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
132 PAGE_SIZE, DMA_TO_DEVICE);
133 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
134 frmr->page_list->page_list[page_no]))
135 goto fatal_err;
136
137 atomic_inc(&xprt->sc_dma_used);
138 page_off = 0; /* reset for next time through loop */
139 frmr->map_len += PAGE_SIZE;
140 frmr->page_list_len++;
141 }
142 vec->count++;
143
144 encode_tail:
145 /* Map tail */
146 if (0 == xdr->tail[0].iov_len)
147 goto done;
148
149 vec->count++;
150 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
151
152 if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
153 ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
154 /*
155 * If head and tail use the same page, we don't need
156 * to map it again.
157 */
158 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
159 } else {
160 void *va;
161
162 /* Map another page for the tail */
163 page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
164 va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
165 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
166
167 frmr->page_list->page_list[page_no] =
168 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
169 DMA_TO_DEVICE);
170 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
171 frmr->page_list->page_list[page_no]))
172 goto fatal_err;
173 atomic_inc(&xprt->sc_dma_used);
174 frmr->map_len += PAGE_SIZE;
175 frmr->page_list_len++;
176 }
177
178 done:
179 if (svc_rdma_fastreg(xprt, frmr))
180 goto fatal_err;
181
182 return 0;
183
184 fatal_err:
185 printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
186 svc_rdma_put_frmr(xprt, frmr);
187 return -EIO;
188}
189
190static int map_xdr(struct svcxprt_rdma *xprt,
191 struct xdr_buf *xdr,
192 struct svc_rdma_req_map *vec)
75{ 193{
76 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 194 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
77 int sge_no; 195 int sge_no;
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
83 BUG_ON(xdr->len != 201 BUG_ON(xdr->len !=
84 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); 202 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85 203
204 if (xprt->sc_frmr_pg_list_len)
205 return fast_reg_xdr(xprt, xdr, vec);
206
86 /* Skip the first sge, this is for the RPCRDMA header */ 207 /* Skip the first sge, this is for the RPCRDMA header */
87 sge_no = 1; 208 sge_no = 1;
88 209
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
116 237
117 BUG_ON(sge_no > sge_max); 238 BUG_ON(sge_no > sge_max);
118 vec->count = sge_no; 239 vec->count = sge_no;
240 return 0;
119} 241}
120 242
121/* Assumptions: 243/* Assumptions:
244 * - We are using FRMR
245 * - or -
122 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 246 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
123 */ 247 */
124static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 248static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
158 sge_no = 0; 282 sge_no = 0;
159 283
160 /* Copy the remaining SGE */ 284 /* Copy the remaining SGE */
161 while (bc != 0 && xdr_sge_no < vec->count) { 285 while (bc != 0) {
162 sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 286 sge_bytes = min_t(size_t,
163 sge_bytes = min((size_t)bc, 287 bc, vec->sge[xdr_sge_no].iov_len-sge_off);
164 (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
165 sge[sge_no].length = sge_bytes; 288 sge[sge_no].length = sge_bytes;
166 atomic_inc(&xprt->sc_dma_used); 289 if (!vec->frmr) {
167 sge[sge_no].addr = 290 sge[sge_no].addr =
168 ib_dma_map_single(xprt->sc_cm_id->device, 291 ib_dma_map_single(xprt->sc_cm_id->device,
169 (void *) 292 (void *)
170 vec->sge[xdr_sge_no].iov_base + sge_off, 293 vec->sge[xdr_sge_no].iov_base + sge_off,
171 sge_bytes, DMA_TO_DEVICE); 294 sge_bytes, DMA_TO_DEVICE);
172 if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, 295 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
173 sge[sge_no].addr)) 296 sge[sge_no].addr))
174 goto err; 297 goto err;
298 atomic_inc(&xprt->sc_dma_used);
299 sge[sge_no].lkey = xprt->sc_dma_lkey;
300 } else {
301 sge[sge_no].addr = (unsigned long)
302 vec->sge[xdr_sge_no].iov_base + sge_off;
303 sge[sge_no].lkey = vec->frmr->mr->lkey;
304 }
305 ctxt->count++;
306 ctxt->frmr = vec->frmr;
175 sge_off = 0; 307 sge_off = 0;
176 sge_no++; 308 sge_no++;
177 ctxt->count++;
178 xdr_sge_no++; 309 xdr_sge_no++;
310 BUG_ON(xdr_sge_no > vec->count);
179 bc -= sge_bytes; 311 bc -= sge_bytes;
180 } 312 }
181 313
182 BUG_ON(bc != 0);
183 BUG_ON(xdr_sge_no > vec->count);
184
185 /* Prepare WRITE WR */ 314 /* Prepare WRITE WR */
186 memset(&write_wr, 0, sizeof write_wr); 315 memset(&write_wr, 0, sizeof write_wr);
187 ctxt->wr_op = IB_WR_RDMA_WRITE; 316 ctxt->wr_op = IB_WR_RDMA_WRITE;
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
226 res_ary = (struct rpcrdma_write_array *) 355 res_ary = (struct rpcrdma_write_array *)
227 &rdma_resp->rm_body.rm_chunks[1]; 356 &rdma_resp->rm_body.rm_chunks[1];
228 357
229 max_write = xprt->sc_max_sge * PAGE_SIZE; 358 if (vec->frmr)
359 max_write = vec->frmr->map_len;
360 else
361 max_write = xprt->sc_max_sge * PAGE_SIZE;
230 362
231 /* Write chunks start at the pagelist */ 363 /* Write chunks start at the pagelist */
232 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 364 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
297 res_ary = (struct rpcrdma_write_array *) 429 res_ary = (struct rpcrdma_write_array *)
298 &rdma_resp->rm_body.rm_chunks[2]; 430 &rdma_resp->rm_body.rm_chunks[2];
299 431
300 max_write = xprt->sc_max_sge * PAGE_SIZE; 432 if (vec->frmr)
433 max_write = vec->frmr->map_len;
434 else
435 max_write = xprt->sc_max_sge * PAGE_SIZE;
301 436
302 /* xdr offset starts at RPC message */ 437 /* xdr offset starts at RPC message */
303 for (xdr_off = 0, chunk_no = 0; 438 for (xdr_off = 0, chunk_no = 0;
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
307 ch = &arg_ary->wc_array[chunk_no].wc_target; 442 ch = &arg_ary->wc_array[chunk_no].wc_target;
308 write_len = min(xfer_len, ch->rs_length); 443 write_len = min(xfer_len, ch->rs_length);
309 444
310
311 /* Prepare the reply chunk given the length actually 445 /* Prepare the reply chunk given the length actually
312 * written */ 446 * written */
313 rs_offset = get_unaligned(&(ch->rs_offset)); 447 rs_offset = get_unaligned(&(ch->rs_offset));
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
366 int byte_count) 500 int byte_count)
367{ 501{
368 struct ib_send_wr send_wr; 502 struct ib_send_wr send_wr;
503 struct ib_send_wr inv_wr;
369 int sge_no; 504 int sge_no;
370 int sge_bytes; 505 int sge_bytes;
371 int page_no; 506 int page_no;
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
385 /* Prepare the context */ 520 /* Prepare the context */
386 ctxt->pages[0] = page; 521 ctxt->pages[0] = page;
387 ctxt->count = 1; 522 ctxt->count = 1;
523 ctxt->frmr = vec->frmr;
524 if (vec->frmr)
525 set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
526 else
527 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
388 528
389 /* Prepare the SGE for the RPCRDMA Header */ 529 /* Prepare the SGE for the RPCRDMA Header */
390 atomic_inc(&rdma->sc_dma_used);
391 ctxt->sge[0].addr = 530 ctxt->sge[0].addr =
392 ib_dma_map_page(rdma->sc_cm_id->device, 531 ib_dma_map_page(rdma->sc_cm_id->device,
393 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 532 page, 0, PAGE_SIZE, DMA_TO_DEVICE);
533 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
534 goto err;
535 atomic_inc(&rdma->sc_dma_used);
536
394 ctxt->direction = DMA_TO_DEVICE; 537 ctxt->direction = DMA_TO_DEVICE;
538
395 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 539 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
396 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 540 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
397 541
398 /* Determine how many of our SGE are to be transmitted */ 542 /* Determine how many of our SGE are to be transmitted */
399 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 543 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
400 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 544 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
401 byte_count -= sge_bytes; 545 byte_count -= sge_bytes;
402 atomic_inc(&rdma->sc_dma_used); 546 if (!vec->frmr) {
403 ctxt->sge[sge_no].addr = 547 ctxt->sge[sge_no].addr =
404 ib_dma_map_single(rdma->sc_cm_id->device, 548 ib_dma_map_single(rdma->sc_cm_id->device,
405 vec->sge[sge_no].iov_base, 549 vec->sge[sge_no].iov_base,
406 sge_bytes, DMA_TO_DEVICE); 550 sge_bytes, DMA_TO_DEVICE);
551 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
552 ctxt->sge[sge_no].addr))
553 goto err;
554 atomic_inc(&rdma->sc_dma_used);
555 ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
556 } else {
557 ctxt->sge[sge_no].addr = (unsigned long)
558 vec->sge[sge_no].iov_base;
559 ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
560 }
407 ctxt->sge[sge_no].length = sge_bytes; 561 ctxt->sge[sge_no].length = sge_bytes;
408 ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
409 } 562 }
410 BUG_ON(byte_count != 0); 563 BUG_ON(byte_count != 0);
411 564
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
417 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 570 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
418 ctxt->count++; 571 ctxt->count++;
419 rqstp->rq_respages[page_no] = NULL; 572 rqstp->rq_respages[page_no] = NULL;
420 /* If there are more pages than SGE, terminate SGE list */ 573 /*
574 * If there are more pages than SGE, terminate SGE
575 * list so that svc_rdma_unmap_dma doesn't attempt to
576 * unmap garbage.
577 */
421 if (page_no+1 >= sge_no) 578 if (page_no+1 >= sge_no)
422 ctxt->sge[page_no+1].length = 0; 579 ctxt->sge[page_no+1].length = 0;
423 } 580 }
424 BUG_ON(sge_no > rdma->sc_max_sge); 581 BUG_ON(sge_no > rdma->sc_max_sge);
582 BUG_ON(sge_no > ctxt->count);
425 memset(&send_wr, 0, sizeof send_wr); 583 memset(&send_wr, 0, sizeof send_wr);
426 ctxt->wr_op = IB_WR_SEND; 584 ctxt->wr_op = IB_WR_SEND;
427 send_wr.wr_id = (unsigned long)ctxt; 585 send_wr.wr_id = (unsigned long)ctxt;
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
429 send_wr.num_sge = sge_no; 587 send_wr.num_sge = sge_no;
430 send_wr.opcode = IB_WR_SEND; 588 send_wr.opcode = IB_WR_SEND;
431 send_wr.send_flags = IB_SEND_SIGNALED; 589 send_wr.send_flags = IB_SEND_SIGNALED;
590 if (vec->frmr) {
591 /* Prepare INVALIDATE WR */
592 memset(&inv_wr, 0, sizeof inv_wr);
593 inv_wr.opcode = IB_WR_LOCAL_INV;
594 inv_wr.send_flags = IB_SEND_SIGNALED;
595 inv_wr.ex.invalidate_rkey =
596 vec->frmr->mr->lkey;
597 send_wr.next = &inv_wr;
598 }
432 599
433 ret = svc_rdma_send(rdma, &send_wr); 600 ret = svc_rdma_send(rdma, &send_wr);
434 if (ret) 601 if (ret)
435 svc_rdma_put_context(ctxt, 1); 602 goto err;
436 603
437 return ret; 604 return 0;
605
606 err:
607 svc_rdma_put_frmr(rdma, vec->frmr);
608 svc_rdma_put_context(ctxt, 1);
609 return -EIO;
438} 610}
439 611
440void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 612void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
477 ctxt = svc_rdma_get_context(rdma); 649 ctxt = svc_rdma_get_context(rdma);
478 ctxt->direction = DMA_TO_DEVICE; 650 ctxt->direction = DMA_TO_DEVICE;
479 vec = svc_rdma_get_req_map(); 651 vec = svc_rdma_get_req_map();
480 xdr_to_sge(rdma, &rqstp->rq_res, vec); 652 ret = map_xdr(rdma, &rqstp->rq_res, vec);
481 653 if (ret)
654 goto err0;
482 inline_bytes = rqstp->rq_res.len; 655 inline_bytes = rqstp->rq_res.len;
483 656
484 /* Create the RDMA response header */ 657 /* Create the RDMA response header */
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
498 if (ret < 0) { 671 if (ret < 0) {
499 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 672 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
500 ret); 673 ret);
501 goto error; 674 goto err1;
502 } 675 }
503 inline_bytes -= ret; 676 inline_bytes -= ret;
504 677
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
508 if (ret < 0) { 681 if (ret < 0) {
509 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 682 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
510 ret); 683 ret);
511 goto error; 684 goto err1;
512 } 685 }
513 inline_bytes -= ret; 686 inline_bytes -= ret;
514 687
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
517 svc_rdma_put_req_map(vec); 690 svc_rdma_put_req_map(vec);
518 dprintk("svcrdma: send_reply returns %d\n", ret); 691 dprintk("svcrdma: send_reply returns %d\n", ret);
519 return ret; 692 return ret;
520 error: 693
694 err1:
695 put_page(res_page);
696 err0:
521 svc_rdma_put_req_map(vec); 697 svc_rdma_put_req_map(vec);
522 svc_rdma_put_context(ctxt, 0); 698 svc_rdma_put_context(ctxt, 0);
523 put_page(res_page);
524 return ret; 699 return ret;
525} 700}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 900cb69728c6..6fb493cbd29f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
100 ctxt->xprt = xprt; 100 ctxt->xprt = xprt;
101 INIT_LIST_HEAD(&ctxt->dto_q); 101 INIT_LIST_HEAD(&ctxt->dto_q);
102 ctxt->count = 0; 102 ctxt->count = 0;
103 ctxt->frmr = NULL;
103 atomic_inc(&xprt->sc_ctxt_used); 104 atomic_inc(&xprt->sc_ctxt_used);
104 return ctxt; 105 return ctxt;
105} 106}
106 107
107static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) 108void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
108{ 109{
109 struct svcxprt_rdma *xprt = ctxt->xprt; 110 struct svcxprt_rdma *xprt = ctxt->xprt;
110 int i; 111 int i;
111 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { 112 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
112 atomic_dec(&xprt->sc_dma_used); 113 /*
113 ib_dma_unmap_single(xprt->sc_cm_id->device, 114 * Unmap the DMA addr in the SGE if the lkey matches
114 ctxt->sge[i].addr, 115 * the sc_dma_lkey, otherwise, ignore it since it is
115 ctxt->sge[i].length, 116 * an FRMR lkey and will be unmapped later when the
116 ctxt->direction); 117 * last WR that uses it completes.
118 */
119 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
120 atomic_dec(&xprt->sc_dma_used);
121 ib_dma_unmap_single(xprt->sc_cm_id->device,
122 ctxt->sge[i].addr,
123 ctxt->sge[i].length,
124 ctxt->direction);
125 }
117 } 126 }
118} 127}
119 128
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
150 schedule_timeout_uninterruptible(msecs_to_jiffies(500)); 159 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
151 } 160 }
152 map->count = 0; 161 map->count = 0;
162 map->frmr = NULL;
153 return map; 163 return map;
154} 164}
155 165
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
316} 326}
317 327
318/* 328/*
329 * Processs a completion context
330 */
331static void process_context(struct svcxprt_rdma *xprt,
332 struct svc_rdma_op_ctxt *ctxt)
333{
334 svc_rdma_unmap_dma(ctxt);
335
336 switch (ctxt->wr_op) {
337 case IB_WR_SEND:
338 if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
339 svc_rdma_put_frmr(xprt, ctxt->frmr);
340 svc_rdma_put_context(ctxt, 1);
341 break;
342
343 case IB_WR_RDMA_WRITE:
344 svc_rdma_put_context(ctxt, 0);
345 break;
346
347 case IB_WR_RDMA_READ:
348 case IB_WR_RDMA_READ_WITH_INV:
349 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
350 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
351 BUG_ON(!read_hdr);
352 if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
353 svc_rdma_put_frmr(xprt, ctxt->frmr);
354 spin_lock_bh(&xprt->sc_rq_dto_lock);
355 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
356 list_add_tail(&read_hdr->dto_q,
357 &xprt->sc_read_complete_q);
358 spin_unlock_bh(&xprt->sc_rq_dto_lock);
359 svc_xprt_enqueue(&xprt->sc_xprt);
360 }
361 svc_rdma_put_context(ctxt, 0);
362 break;
363
364 default:
365 printk(KERN_ERR "svcrdma: unexpected completion type, "
366 "opcode=%d\n",
367 ctxt->wr_op);
368 break;
369 }
370}
371
372/*
319 * Send Queue Completion Handler - potentially called on interrupt context. 373 * Send Queue Completion Handler - potentially called on interrupt context.
320 * 374 *
321 * Note that caller must hold a transport reference. 375 * Note that caller must hold a transport reference.
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
327 struct ib_cq *cq = xprt->sc_sq_cq; 381 struct ib_cq *cq = xprt->sc_sq_cq;
328 int ret; 382 int ret;
329 383
330
331 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) 384 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
332 return; 385 return;
333 386
334 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); 387 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
335 atomic_inc(&rdma_stat_sq_poll); 388 atomic_inc(&rdma_stat_sq_poll);
336 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 389 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
337 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
338 xprt = ctxt->xprt;
339
340 svc_rdma_unmap_dma(ctxt);
341 if (wc.status != IB_WC_SUCCESS) 390 if (wc.status != IB_WC_SUCCESS)
342 /* Close the transport */ 391 /* Close the transport */
343 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 392 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
346 atomic_dec(&xprt->sc_sq_count); 395 atomic_dec(&xprt->sc_sq_count);
347 wake_up(&xprt->sc_send_wait); 396 wake_up(&xprt->sc_send_wait);
348 397
349 switch (ctxt->wr_op) { 398 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
350 case IB_WR_SEND: 399 if (ctxt)
351 svc_rdma_put_context(ctxt, 1); 400 process_context(xprt, ctxt);
352 break;
353
354 case IB_WR_RDMA_WRITE:
355 svc_rdma_put_context(ctxt, 0);
356 break;
357
358 case IB_WR_RDMA_READ:
359 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
360 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
361 BUG_ON(!read_hdr);
362 spin_lock_bh(&xprt->sc_rq_dto_lock);
363 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
364 list_add_tail(&read_hdr->dto_q,
365 &xprt->sc_read_complete_q);
366 spin_unlock_bh(&xprt->sc_rq_dto_lock);
367 svc_xprt_enqueue(&xprt->sc_xprt);
368 }
369 svc_rdma_put_context(ctxt, 0);
370 break;
371 401
372 default:
373 printk(KERN_ERR "svcrdma: unexpected completion type, "
374 "opcode=%d, status=%d\n",
375 wc.opcode, wc.status);
376 break;
377 }
378 svc_xprt_put(&xprt->sc_xprt); 402 svc_xprt_put(&xprt->sc_xprt);
379 } 403 }
380 404
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
425 INIT_LIST_HEAD(&cma_xprt->sc_dto_q); 449 INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
426 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 450 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
427 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 451 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
452 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
428 init_waitqueue_head(&cma_xprt->sc_send_wait); 453 init_waitqueue_head(&cma_xprt->sc_send_wait);
429 454
430 spin_lock_init(&cma_xprt->sc_lock); 455 spin_lock_init(&cma_xprt->sc_lock);
431 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 456 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
457 spin_lock_init(&cma_xprt->sc_frmr_q_lock);
432 458
433 cma_xprt->sc_ord = svcrdma_ord; 459 cma_xprt->sc_ord = svcrdma_ord;
434 460
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
462 struct ib_recv_wr recv_wr, *bad_recv_wr; 488 struct ib_recv_wr recv_wr, *bad_recv_wr;
463 struct svc_rdma_op_ctxt *ctxt; 489 struct svc_rdma_op_ctxt *ctxt;
464 struct page *page; 490 struct page *page;
465 unsigned long pa; 491 dma_addr_t pa;
466 int sge_no; 492 int sge_no;
467 int buflen; 493 int buflen;
468 int ret; 494 int ret;
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
474 BUG_ON(sge_no >= xprt->sc_max_sge); 500 BUG_ON(sge_no >= xprt->sc_max_sge);
475 page = svc_rdma_get_page(); 501 page = svc_rdma_get_page();
476 ctxt->pages[sge_no] = page; 502 ctxt->pages[sge_no] = page;
477 atomic_inc(&xprt->sc_dma_used);
478 pa = ib_dma_map_page(xprt->sc_cm_id->device, 503 pa = ib_dma_map_page(xprt->sc_cm_id->device,
479 page, 0, PAGE_SIZE, 504 page, 0, PAGE_SIZE,
480 DMA_FROM_DEVICE); 505 DMA_FROM_DEVICE);
506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
507 goto err_put_ctxt;
508 atomic_inc(&xprt->sc_dma_used);
481 ctxt->sge[sge_no].addr = pa; 509 ctxt->sge[sge_no].addr = pa;
482 ctxt->sge[sge_no].length = PAGE_SIZE; 510 ctxt->sge[sge_no].length = PAGE_SIZE;
483 ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 511 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
484 buflen += PAGE_SIZE; 512 buflen += PAGE_SIZE;
485 } 513 }
486 ctxt->count = sge_no; 514 ctxt->count = sge_no;
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
496 svc_rdma_put_context(ctxt, 1); 524 svc_rdma_put_context(ctxt, 1);
497 } 525 }
498 return ret; 526 return ret;
527
528 err_put_ctxt:
529 svc_rdma_put_context(ctxt, 1);
530 return -ENOMEM;
499} 531}
500 532
501/* 533/*
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
566 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 598 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
567 "event=%d\n", cma_id, cma_id->context, event->event); 599 "event=%d\n", cma_id, cma_id->context, event->event);
568 handle_connect_req(cma_id, 600 handle_connect_req(cma_id,
569 event->param.conn.responder_resources); 601 event->param.conn.initiator_depth);
570 break; 602 break;
571 603
572 case RDMA_CM_EVENT_ESTABLISHED: 604 case RDMA_CM_EVENT_ESTABLISHED:
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
686 return ERR_PTR(ret); 718 return ERR_PTR(ret);
687} 719}
688 720
721static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
722{
723 struct ib_mr *mr;
724 struct ib_fast_reg_page_list *pl;
725 struct svc_rdma_fastreg_mr *frmr;
726
727 frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
728 if (!frmr)
729 goto err;
730
731 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
732 if (!mr)
733 goto err_free_frmr;
734
735 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
736 RPCSVC_MAXPAGES);
737 if (!pl)
738 goto err_free_mr;
739
740 frmr->mr = mr;
741 frmr->page_list = pl;
742 INIT_LIST_HEAD(&frmr->frmr_list);
743 return frmr;
744
745 err_free_mr:
746 ib_dereg_mr(mr);
747 err_free_frmr:
748 kfree(frmr);
749 err:
750 return ERR_PTR(-ENOMEM);
751}
752
753static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
754{
755 struct svc_rdma_fastreg_mr *frmr;
756
757 while (!list_empty(&xprt->sc_frmr_q)) {
758 frmr = list_entry(xprt->sc_frmr_q.next,
759 struct svc_rdma_fastreg_mr, frmr_list);
760 list_del_init(&frmr->frmr_list);
761 ib_dereg_mr(frmr->mr);
762 ib_free_fast_reg_page_list(frmr->page_list);
763 kfree(frmr);
764 }
765}
766
767struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
768{
769 struct svc_rdma_fastreg_mr *frmr = NULL;
770
771 spin_lock_bh(&rdma->sc_frmr_q_lock);
772 if (!list_empty(&rdma->sc_frmr_q)) {
773 frmr = list_entry(rdma->sc_frmr_q.next,
774 struct svc_rdma_fastreg_mr, frmr_list);
775 list_del_init(&frmr->frmr_list);
776 frmr->map_len = 0;
777 frmr->page_list_len = 0;
778 }
779 spin_unlock_bh(&rdma->sc_frmr_q_lock);
780 if (frmr)
781 return frmr;
782
783 return rdma_alloc_frmr(rdma);
784}
785
786static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
787 struct svc_rdma_fastreg_mr *frmr)
788{
789 int page_no;
790 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
791 dma_addr_t addr = frmr->page_list->page_list[page_no];
792 if (ib_dma_mapping_error(frmr->mr->device, addr))
793 continue;
794 atomic_dec(&xprt->sc_dma_used);
795 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
796 frmr->direction);
797 }
798}
799
800void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
801 struct svc_rdma_fastreg_mr *frmr)
802{
803 if (frmr) {
804 frmr_unmap_dma(rdma, frmr);
805 spin_lock_bh(&rdma->sc_frmr_q_lock);
806 BUG_ON(!list_empty(&frmr->frmr_list));
807 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
808 spin_unlock_bh(&rdma->sc_frmr_q_lock);
809 }
810}
811
689/* 812/*
690 * This is the xpo_recvfrom function for listening endpoints. Its 813 * This is the xpo_recvfrom function for listening endpoints. Its
691 * purpose is to accept incoming connections. The CMA callback handler 814 * purpose is to accept incoming connections. The CMA callback handler
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
704 struct rdma_conn_param conn_param; 827 struct rdma_conn_param conn_param;
705 struct ib_qp_init_attr qp_attr; 828 struct ib_qp_init_attr qp_attr;
706 struct ib_device_attr devattr; 829 struct ib_device_attr devattr;
830 int dma_mr_acc;
831 int need_dma_mr;
707 int ret; 832 int ret;
708 int i; 833 int i;
709 834
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
819 } 944 }
820 newxprt->sc_qp = newxprt->sc_cm_id->qp; 945 newxprt->sc_qp = newxprt->sc_cm_id->qp;
821 946
822 /* Register all of physical memory */ 947 /*
823 newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, 948 * Use the most secure set of MR resources based on the
824 IB_ACCESS_LOCAL_WRITE | 949 * transport type and available memory management features in
825 IB_ACCESS_REMOTE_WRITE); 950 * the device. Here's the table implemented below:
826 if (IS_ERR(newxprt->sc_phys_mr)) { 951 *
827 dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); 952 * Fast Global DMA Remote WR
953 * Reg LKEY MR Access
954 * Sup'd Sup'd Needed Needed
955 *
956 * IWARP N N Y Y
957 * N Y Y Y
958 * Y N Y N
959 * Y Y N -
960 *
961 * IB N N Y N
962 * N Y N -
963 * Y N Y N
964 * Y Y N -
965 *
966 * NB: iWARP requires remote write access for the data sink
967 * of an RDMA_READ. IB does not.
968 */
969 if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
970 newxprt->sc_frmr_pg_list_len =
971 devattr.max_fast_reg_page_list_len;
972 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
973 }
974
975 /*
976 * Determine if a DMA MR is required and if so, what privs are required
977 */
978 switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
979 case RDMA_TRANSPORT_IWARP:
980 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
981 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
982 need_dma_mr = 1;
983 dma_mr_acc =
984 (IB_ACCESS_LOCAL_WRITE |
985 IB_ACCESS_REMOTE_WRITE);
986 } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
987 need_dma_mr = 1;
988 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
989 } else
990 need_dma_mr = 0;
991 break;
992 case RDMA_TRANSPORT_IB:
993 if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
994 need_dma_mr = 1;
995 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
996 } else
997 need_dma_mr = 0;
998 break;
999 default:
828 goto errout; 1000 goto errout;
829 } 1001 }
830 1002
1003 /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
1004 if (need_dma_mr) {
1005 /* Register all of physical memory */
1006 newxprt->sc_phys_mr =
1007 ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
1008 if (IS_ERR(newxprt->sc_phys_mr)) {
1009 dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
1010 ret);
1011 goto errout;
1012 }
1013 newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
1014 } else
1015 newxprt->sc_dma_lkey =
1016 newxprt->sc_cm_id->device->local_dma_lkey;
1017
831 /* Post receive buffers */ 1018 /* Post receive buffers */
832 for (i = 0; i < newxprt->sc_max_requests; i++) { 1019 for (i = 0; i < newxprt->sc_max_requests; i++) {
833 ret = svc_rdma_post_recv(newxprt); 1020 ret = svc_rdma_post_recv(newxprt);
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
961 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); 1148 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
962 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); 1149 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
963 1150
1151 /* De-allocate fastreg mr */
1152 rdma_dealloc_frmr_q(rdma);
1153
964 /* Destroy the QP if present (not a listener) */ 1154 /* Destroy the QP if present (not a listener) */
965 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 1155 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
966 ib_destroy_qp(rdma->sc_qp); 1156 ib_destroy_qp(rdma->sc_qp);
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
1014 return 1; 1204 return 1;
1015} 1205}
1016 1206
1207/*
1208 * Attempt to register the kvec representing the RPC memory with the
1209 * device.
1210 *
1211 * Returns:
1212 * NULL : The device does not support fastreg or there were no more
1213 * fastreg mr.
1214 * frmr : The kvec register request was successfully posted.
1215 * <0 : An error was encountered attempting to register the kvec.
1216 */
1217int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
1218 struct svc_rdma_fastreg_mr *frmr)
1219{
1220 struct ib_send_wr fastreg_wr;
1221 u8 key;
1222
1223 /* Bump the key */
1224 key = (u8)(frmr->mr->lkey & 0x000000FF);
1225 ib_update_fast_reg_key(frmr->mr, ++key);
1226
1227 /* Prepare FASTREG WR */
1228 memset(&fastreg_wr, 0, sizeof fastreg_wr);
1229 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1230 fastreg_wr.send_flags = IB_SEND_SIGNALED;
1231 fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
1232 fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
1233 fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
1234 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1235 fastreg_wr.wr.fast_reg.length = frmr->map_len;
1236 fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
1237 fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
1238 return svc_rdma_send(xprt, &fastreg_wr);
1239}
1240
1017int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) 1241int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1018{ 1242{
1019 struct ib_send_wr *bad_wr; 1243 struct ib_send_wr *bad_wr, *n_wr;
1244 int wr_count;
1245 int i;
1020 int ret; 1246 int ret;
1021 1247
1022 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1248 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1023 return -ENOTCONN; 1249 return -ENOTCONN;
1024 1250
1025 BUG_ON(wr->send_flags != IB_SEND_SIGNALED); 1251 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1026 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != 1252 wr_count = 1;
1027 wr->opcode); 1253 for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
1254 wr_count++;
1255
1028 /* If the SQ is full, wait until an SQ entry is available */ 1256 /* If the SQ is full, wait until an SQ entry is available */
1029 while (1) { 1257 while (1) {
1030 spin_lock_bh(&xprt->sc_lock); 1258 spin_lock_bh(&xprt->sc_lock);
1031 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { 1259 if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
1032 spin_unlock_bh(&xprt->sc_lock); 1260 spin_unlock_bh(&xprt->sc_lock);
1033 atomic_inc(&rdma_stat_sq_starve); 1261 atomic_inc(&rdma_stat_sq_starve);
1034 1262
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1043 return 0; 1271 return 0;
1044 continue; 1272 continue;
1045 } 1273 }
1046 /* Bumped used SQ WR count and post */ 1274 /* Take a transport ref for each WR posted */
1047 svc_xprt_get(&xprt->sc_xprt); 1275 for (i = 0; i < wr_count; i++)
1276 svc_xprt_get(&xprt->sc_xprt);
1277
1278 /* Bump used SQ WR count and post */
1279 atomic_add(wr_count, &xprt->sc_sq_count);
1048 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1280 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1049 if (!ret) 1281 if (ret) {
1050 atomic_inc(&xprt->sc_sq_count); 1282 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
1051 else { 1283 atomic_sub(wr_count, &xprt->sc_sq_count);
1052 svc_xprt_put(&xprt->sc_xprt); 1284 for (i = 0; i < wr_count; i ++)
1285 svc_xprt_put(&xprt->sc_xprt);
1053 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1286 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1054 "sc_sq_count=%d, sc_sq_depth=%d\n", 1287 "sc_sq_count=%d, sc_sq_depth=%d\n",
1055 ret, atomic_read(&xprt->sc_sq_count), 1288 ret, atomic_read(&xprt->sc_sq_count),
1056 xprt->sc_sq_depth); 1289 xprt->sc_sq_depth);
1057 } 1290 }
1058 spin_unlock_bh(&xprt->sc_lock); 1291 spin_unlock_bh(&xprt->sc_lock);
1292 if (ret)
1293 wake_up(&xprt->sc_send_wait);
1059 break; 1294 break;
1060 } 1295 }
1061 return ret; 1296 return ret;
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1079 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1314 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1080 1315
1081 /* Prepare SGE for local address */ 1316 /* Prepare SGE for local address */
1082 atomic_inc(&xprt->sc_dma_used);
1083 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1317 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1084 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1318 p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1085 sge.lkey = xprt->sc_phys_mr->lkey; 1319 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
1320 put_page(p);
1321 return;
1322 }
1323 atomic_inc(&xprt->sc_dma_used);
1324 sge.lkey = xprt->sc_dma_lkey;
1086 sge.length = length; 1325 sge.length = length;
1087 1326
1088 ctxt = svc_rdma_get_context(xprt); 1327 ctxt = svc_rdma_get_context(xprt);
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1103 if (ret) { 1342 if (ret) {
1104 dprintk("svcrdma: Error %d posting send for protocol error\n", 1343 dprintk("svcrdma: Error %d posting send for protocol error\n",
1105 ret); 1344 ret);
1345 ib_dma_unmap_page(xprt->sc_cm_id->device,
1346 sge.addr, PAGE_SIZE,
1347 DMA_FROM_DEVICE);
1106 svc_rdma_put_context(ctxt, 1); 1348 svc_rdma_put_context(ctxt, 1);
1107 } 1349 }
1108} 1350}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec5..9839c3d94145 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 70static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 71static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
72static unsigned int xprt_rdma_inline_write_padding; 72static unsigned int xprt_rdma_inline_write_padding;
73#if !RPCRDMA_PERSISTENT_REGISTRATION 73static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
74static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ 74 int xprt_rdma_pad_optimize = 0;
75#else
76static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
77#endif
78 75
79#ifdef RPC_DEBUG 76#ifdef RPC_DEBUG
80 77
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = {
140 .extra2 = &max_memreg, 137 .extra2 = &max_memreg,
141 }, 138 },
142 { 139 {
140 .ctl_name = CTL_UNNUMBERED,
141 .procname = "rdma_pad_optimize",
142 .data = &xprt_rdma_pad_optimize,
143 .maxlen = sizeof(unsigned int),
144 .mode = 0644,
145 .proc_handler = &proc_dointvec,
146 },
147 {
143 .ctl_name = 0, 148 .ctl_name = 0,
144 }, 149 },
145}; 150};
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
458 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 463 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
459 464
460 dprintk("RPC: %s: closing\n", __func__); 465 dprintk("RPC: %s: closing\n", __func__);
466 if (r_xprt->rx_ep.rep_connected > 0)
467 xprt->reestablish_timeout = 0;
461 xprt_disconnect_done(xprt); 468 xprt_disconnect_done(xprt);
462 (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); 469 (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
463} 470}
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
485 /* Reconnect */ 492 /* Reconnect */
486 schedule_delayed_work(&r_xprt->rdma_connect, 493 schedule_delayed_work(&r_xprt->rdma_connect,
487 xprt->reestablish_timeout); 494 xprt->reestablish_timeout);
495 xprt->reestablish_timeout <<= 1;
496 if (xprt->reestablish_timeout > (30 * HZ))
497 xprt->reestablish_timeout = (30 * HZ);
498 else if (xprt->reestablish_timeout < (5 * HZ))
499 xprt->reestablish_timeout = (5 * HZ);
488 } else { 500 } else {
489 schedule_delayed_work(&r_xprt->rdma_connect, 0); 501 schedule_delayed_work(&r_xprt->rdma_connect, 0);
490 if (!RPC_IS_ASYNC(task)) 502 if (!RPC_IS_ASYNC(task))
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
591 } 603 }
592 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); 604 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
593out: 605out:
606 req->rl_connect_cookie = 0; /* our reserved value */
594 return req->rl_xdr_buf; 607 return req->rl_xdr_buf;
595 608
596outfail: 609outfail:
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
694 req->rl_reply->rr_xprt = xprt; 707 req->rl_reply->rr_xprt = xprt;
695 } 708 }
696 709
697 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { 710 /* Must suppress retransmit to maintain credits */
698 xprt_disconnect_done(xprt); 711 if (req->rl_connect_cookie == xprt->connect_cookie)
699 return -ENOTCONN; /* implies disconnect */ 712 goto drop_connection;
700 } 713 req->rl_connect_cookie = xprt->connect_cookie;
714
715 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
716 goto drop_connection;
701 717
718 task->tk_bytes_sent += rqst->rq_snd_buf.len;
702 rqst->rq_bytes_sent = 0; 719 rqst->rq_bytes_sent = 0;
703 return 0; 720 return 0;
721
722drop_connection:
723 xprt_disconnect_done(xprt);
724 return -ENOTCONN; /* implies disconnect */
704} 725}
705 726
706static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 727static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
770{ 791{
771 int rc; 792 int rc;
772 793
773 dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); 794 dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
774#ifdef RPC_DEBUG 795#ifdef RPC_DEBUG
775 if (sunrpc_table_header) { 796 if (sunrpc_table_header) {
776 unregister_sysctl_table(sunrpc_table_header); 797 unregister_sysctl_table(sunrpc_table_header);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8ea283ecc522..a5fef5e6c323 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284 switch (event->event) { 284 switch (event->event) {
285 case RDMA_CM_EVENT_ADDR_RESOLVED: 285 case RDMA_CM_EVENT_ADDR_RESOLVED:
286 case RDMA_CM_EVENT_ROUTE_RESOLVED: 286 case RDMA_CM_EVENT_ROUTE_RESOLVED:
287 ia->ri_async_rc = 0;
287 complete(&ia->ri_done); 288 complete(&ia->ri_done);
288 break; 289 break;
289 case RDMA_CM_EVENT_ADDR_ERROR: 290 case RDMA_CM_EVENT_ADDR_ERROR:
@@ -338,13 +339,32 @@ connected:
338 wake_up_all(&ep->rep_connect_wait); 339 wake_up_all(&ep->rep_connect_wait);
339 break; 340 break;
340 default: 341 default:
341 ia->ri_async_rc = -EINVAL; 342 dprintk("RPC: %s: unexpected CM event %d\n",
342 dprintk("RPC: %s: unexpected CM event %X\n",
343 __func__, event->event); 343 __func__, event->event);
344 complete(&ia->ri_done);
345 break; 344 break;
346 } 345 }
347 346
347#ifdef RPC_DEBUG
348 if (connstate == 1) {
349 int ird = attr.max_dest_rd_atomic;
350 int tird = ep->rep_remote_cma.responder_resources;
351 printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
352 "on %s, memreg %d slots %d ird %d%s\n",
353 NIPQUAD(addr->sin_addr.s_addr),
354 ntohs(addr->sin_port),
355 ia->ri_id->device->name,
356 ia->ri_memreg_strategy,
357 xprt->rx_buf.rb_max_requests,
358 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
359 } else if (connstate < 0) {
360 printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
361 "closed (%d)\n",
362 NIPQUAD(addr->sin_addr.s_addr),
363 ntohs(addr->sin_port),
364 connstate);
365 }
366#endif
367
348 return 0; 368 return 0;
349} 369}
350 370
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
355 struct rdma_cm_id *id; 375 struct rdma_cm_id *id;
356 int rc; 376 int rc;
357 377
378 init_completion(&ia->ri_done);
379
358 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); 380 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
359 if (IS_ERR(id)) { 381 if (IS_ERR(id)) {
360 rc = PTR_ERR(id); 382 rc = PTR_ERR(id);
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
363 return id; 385 return id;
364 } 386 }
365 387
366 ia->ri_async_rc = 0; 388 ia->ri_async_rc = -ETIMEDOUT;
367 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); 389 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
368 if (rc) { 390 if (rc) {
369 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", 391 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
370 __func__, rc); 392 __func__, rc);
371 goto out; 393 goto out;
372 } 394 }
373 wait_for_completion(&ia->ri_done); 395 wait_for_completion_interruptible_timeout(&ia->ri_done,
396 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
374 rc = ia->ri_async_rc; 397 rc = ia->ri_async_rc;
375 if (rc) 398 if (rc)
376 goto out; 399 goto out;
377 400
378 ia->ri_async_rc = 0; 401 ia->ri_async_rc = -ETIMEDOUT;
379 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 402 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
380 if (rc) { 403 if (rc) {
381 dprintk("RPC: %s: rdma_resolve_route() failed %i\n", 404 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
382 __func__, rc); 405 __func__, rc);
383 goto out; 406 goto out;
384 } 407 }
385 wait_for_completion(&ia->ri_done); 408 wait_for_completion_interruptible_timeout(&ia->ri_done,
409 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
386 rc = ia->ri_async_rc; 410 rc = ia->ri_async_rc;
387 if (rc) 411 if (rc)
388 goto out; 412 goto out;
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
423int 447int
424rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) 448rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
425{ 449{
426 int rc; 450 int rc, mem_priv;
451 struct ib_device_attr devattr;
427 struct rpcrdma_ia *ia = &xprt->rx_ia; 452 struct rpcrdma_ia *ia = &xprt->rx_ia;
428 453
429 init_completion(&ia->ri_done);
430
431 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 454 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
432 if (IS_ERR(ia->ri_id)) { 455 if (IS_ERR(ia->ri_id)) {
433 rc = PTR_ERR(ia->ri_id); 456 rc = PTR_ERR(ia->ri_id);
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
443 } 466 }
444 467
445 /* 468 /*
469 * Query the device to determine if the requested memory
470 * registration strategy is supported. If it isn't, set the
471 * strategy to a globally supported model.
472 */
473 rc = ib_query_device(ia->ri_id->device, &devattr);
474 if (rc) {
475 dprintk("RPC: %s: ib_query_device failed %d\n",
476 __func__, rc);
477 goto out2;
478 }
479
480 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
481 ia->ri_have_dma_lkey = 1;
482 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
483 }
484
485 switch (memreg) {
486 case RPCRDMA_MEMWINDOWS:
487 case RPCRDMA_MEMWINDOWS_ASYNC:
488 if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
489 dprintk("RPC: %s: MEMWINDOWS registration "
490 "specified but not supported by adapter, "
491 "using slower RPCRDMA_REGISTER\n",
492 __func__);
493 memreg = RPCRDMA_REGISTER;
494 }
495 break;
496 case RPCRDMA_MTHCAFMR:
497 if (!ia->ri_id->device->alloc_fmr) {
498#if RPCRDMA_PERSISTENT_REGISTRATION
499 dprintk("RPC: %s: MTHCAFMR registration "
500 "specified but not supported by adapter, "
501 "using riskier RPCRDMA_ALLPHYSICAL\n",
502 __func__);
503 memreg = RPCRDMA_ALLPHYSICAL;
504#else
505 dprintk("RPC: %s: MTHCAFMR registration "
506 "specified but not supported by adapter, "
507 "using slower RPCRDMA_REGISTER\n",
508 __func__);
509 memreg = RPCRDMA_REGISTER;
510#endif
511 }
512 break;
513 case RPCRDMA_FRMR:
514 /* Requires both frmr reg and local dma lkey */
515 if ((devattr.device_cap_flags &
516 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
517 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
518#if RPCRDMA_PERSISTENT_REGISTRATION
519 dprintk("RPC: %s: FRMR registration "
520 "specified but not supported by adapter, "
521 "using riskier RPCRDMA_ALLPHYSICAL\n",
522 __func__);
523 memreg = RPCRDMA_ALLPHYSICAL;
524#else
525 dprintk("RPC: %s: FRMR registration "
526 "specified but not supported by adapter, "
527 "using slower RPCRDMA_REGISTER\n",
528 __func__);
529 memreg = RPCRDMA_REGISTER;
530#endif
531 }
532 break;
533 }
534
535 /*
446 * Optionally obtain an underlying physical identity mapping in 536 * Optionally obtain an underlying physical identity mapping in
447 * order to do a memory window-based bind. This base registration 537 * order to do a memory window-based bind. This base registration
448 * is protected from remote access - that is enabled only by binding 538 * is protected from remote access - that is enabled only by binding
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
450 * revoked after the corresponding completion similar to a storage 540 * revoked after the corresponding completion similar to a storage
451 * adapter. 541 * adapter.
452 */ 542 */
453 if (memreg > RPCRDMA_REGISTER) { 543 switch (memreg) {
454 int mem_priv = IB_ACCESS_LOCAL_WRITE; 544 case RPCRDMA_BOUNCEBUFFERS:
455 switch (memreg) { 545 case RPCRDMA_REGISTER:
546 case RPCRDMA_FRMR:
547 break;
456#if RPCRDMA_PERSISTENT_REGISTRATION 548#if RPCRDMA_PERSISTENT_REGISTRATION
457 case RPCRDMA_ALLPHYSICAL: 549 case RPCRDMA_ALLPHYSICAL:
458 mem_priv |= IB_ACCESS_REMOTE_WRITE; 550 mem_priv = IB_ACCESS_LOCAL_WRITE |
459 mem_priv |= IB_ACCESS_REMOTE_READ; 551 IB_ACCESS_REMOTE_WRITE |
460 break; 552 IB_ACCESS_REMOTE_READ;
553 goto register_setup;
461#endif 554#endif
462 case RPCRDMA_MEMWINDOWS_ASYNC: 555 case RPCRDMA_MEMWINDOWS_ASYNC:
463 case RPCRDMA_MEMWINDOWS: 556 case RPCRDMA_MEMWINDOWS:
464 mem_priv |= IB_ACCESS_MW_BIND; 557 mem_priv = IB_ACCESS_LOCAL_WRITE |
465 break; 558 IB_ACCESS_MW_BIND;
466 default: 559 goto register_setup;
560 case RPCRDMA_MTHCAFMR:
561 if (ia->ri_have_dma_lkey)
467 break; 562 break;
468 } 563 mem_priv = IB_ACCESS_LOCAL_WRITE;
564 register_setup:
469 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); 565 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
470 if (IS_ERR(ia->ri_bind_mem)) { 566 if (IS_ERR(ia->ri_bind_mem)) {
471 printk(KERN_ALERT "%s: ib_get_dma_mr for " 567 printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
475 memreg = RPCRDMA_REGISTER; 571 memreg = RPCRDMA_REGISTER;
476 ia->ri_bind_mem = NULL; 572 ia->ri_bind_mem = NULL;
477 } 573 }
574 break;
575 default:
576 printk(KERN_ERR "%s: invalid memory registration mode %d\n",
577 __func__, memreg);
578 rc = -EINVAL;
579 goto out2;
478 } 580 }
581 dprintk("RPC: %s: memory registration strategy is %d\n",
582 __func__, memreg);
479 583
480 /* Else will do memory reg/dereg for each chunk */ 584 /* Else will do memory reg/dereg for each chunk */
481 ia->ri_memreg_strategy = memreg; 585 ia->ri_memreg_strategy = memreg;
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
483 return 0; 587 return 0;
484out2: 588out2:
485 rdma_destroy_id(ia->ri_id); 589 rdma_destroy_id(ia->ri_id);
590 ia->ri_id = NULL;
486out1: 591out1:
487 return rc; 592 return rc;
488} 593}
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
503 dprintk("RPC: %s: ib_dereg_mr returned %i\n", 608 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
504 __func__, rc); 609 __func__, rc);
505 } 610 }
506 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) 611 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
507 rdma_destroy_qp(ia->ri_id); 612 if (ia->ri_id->qp)
613 rdma_destroy_qp(ia->ri_id);
614 rdma_destroy_id(ia->ri_id);
615 ia->ri_id = NULL;
616 }
508 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { 617 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
509 rc = ib_dealloc_pd(ia->ri_pd); 618 rc = ib_dealloc_pd(ia->ri_pd);
510 dprintk("RPC: %s: ib_dealloc_pd returned %i\n", 619 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
511 __func__, rc); 620 __func__, rc);
512 } 621 }
513 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
514 rdma_destroy_id(ia->ri_id);
515} 622}
516 623
517/* 624/*
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
541 ep->rep_attr.srq = NULL; 648 ep->rep_attr.srq = NULL;
542 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 649 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
543 switch (ia->ri_memreg_strategy) { 650 switch (ia->ri_memreg_strategy) {
651 case RPCRDMA_FRMR:
652 /* Add room for frmr register and invalidate WRs */
653 ep->rep_attr.cap.max_send_wr *= 3;
654 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
655 return -EINVAL;
656 break;
544 case RPCRDMA_MEMWINDOWS_ASYNC: 657 case RPCRDMA_MEMWINDOWS_ASYNC:
545 case RPCRDMA_MEMWINDOWS: 658 case RPCRDMA_MEMWINDOWS:
546 /* Add room for mw_binds+unbinds - overkill! */ 659 /* Add room for mw_binds+unbinds - overkill! */
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
617 ep->rep_remote_cma.private_data_len = 0; 730 ep->rep_remote_cma.private_data_len = 0;
618 731
619 /* Client offers RDMA Read but does not initiate */ 732 /* Client offers RDMA Read but does not initiate */
620 switch (ia->ri_memreg_strategy) { 733 ep->rep_remote_cma.initiator_depth = 0;
621 case RPCRDMA_BOUNCEBUFFERS: 734 if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
622 ep->rep_remote_cma.responder_resources = 0; 735 ep->rep_remote_cma.responder_resources = 0;
623 break; 736 else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
624 case RPCRDMA_MTHCAFMR: 737 ep->rep_remote_cma.responder_resources = 32;
625 case RPCRDMA_REGISTER: 738 else
626 ep->rep_remote_cma.responder_resources = cdata->max_requests *
627 (RPCRDMA_MAX_DATA_SEGS / 8);
628 break;
629 case RPCRDMA_MEMWINDOWS:
630 case RPCRDMA_MEMWINDOWS_ASYNC:
631#if RPCRDMA_PERSISTENT_REGISTRATION
632 case RPCRDMA_ALLPHYSICAL:
633#endif
634 ep->rep_remote_cma.responder_resources = cdata->max_requests *
635 (RPCRDMA_MAX_DATA_SEGS / 2);
636 break;
637 default:
638 break;
639 }
640 if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
641 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; 739 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
642 ep->rep_remote_cma.initiator_depth = 0;
643 740
644 ep->rep_remote_cma.retry_count = 7; 741 ep->rep_remote_cma.retry_count = 7;
645 ep->rep_remote_cma.flow_control = 0; 742 ep->rep_remote_cma.flow_control = 0;
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
679 if (rc) 776 if (rc)
680 dprintk("RPC: %s: rpcrdma_ep_disconnect" 777 dprintk("RPC: %s: rpcrdma_ep_disconnect"
681 " returned %i\n", __func__, rc); 778 " returned %i\n", __func__, rc);
779 rdma_destroy_qp(ia->ri_id);
780 ia->ri_id->qp = NULL;
682 } 781 }
683 782
684 ep->rep_func = NULL;
685
686 /* padding - could be done in rpcrdma_buffer_destroy... */ 783 /* padding - could be done in rpcrdma_buffer_destroy... */
687 if (ep->rep_pad_mr) { 784 if (ep->rep_pad_mr) {
688 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); 785 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
689 ep->rep_pad_mr = NULL; 786 ep->rep_pad_mr = NULL;
690 } 787 }
691 788
692 if (ia->ri_id->qp) {
693 rdma_destroy_qp(ia->ri_id);
694 ia->ri_id->qp = NULL;
695 }
696
697 rpcrdma_clean_cq(ep->rep_cq); 789 rpcrdma_clean_cq(ep->rep_cq);
698 rc = ib_destroy_cq(ep->rep_cq); 790 rc = ib_destroy_cq(ep->rep_cq);
699 if (rc) 791 if (rc)
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
712 struct rdma_cm_id *id; 804 struct rdma_cm_id *id;
713 int rc = 0; 805 int rc = 0;
714 int retry_count = 0; 806 int retry_count = 0;
715 int reconnect = (ep->rep_connected != 0);
716 807
717 if (reconnect) { 808 if (ep->rep_connected != 0) {
718 struct rpcrdma_xprt *xprt; 809 struct rpcrdma_xprt *xprt;
719retry: 810retry:
720 rc = rpcrdma_ep_disconnect(ep, ia); 811 rc = rpcrdma_ep_disconnect(ep, ia);
@@ -745,6 +836,7 @@ retry:
745 goto out; 836 goto out;
746 } 837 }
747 /* END TEMP */ 838 /* END TEMP */
839 rdma_destroy_qp(ia->ri_id);
748 rdma_destroy_id(ia->ri_id); 840 rdma_destroy_id(ia->ri_id);
749 ia->ri_id = id; 841 ia->ri_id = id;
750 } 842 }
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
769 } 861 }
770} 862}
771 863
772 /* Theoretically a client initiator_depth > 0 is not needed,
773 * but many peers fail to complete the connection unless they
774 * == responder_resources! */
775 if (ep->rep_remote_cma.initiator_depth !=
776 ep->rep_remote_cma.responder_resources)
777 ep->rep_remote_cma.initiator_depth =
778 ep->rep_remote_cma.responder_resources;
779
780 ep->rep_connected = 0; 864 ep->rep_connected = 0;
781 865
782 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); 866 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
786 goto out; 870 goto out;
787 } 871 }
788 872
789 if (reconnect)
790 return 0;
791
792 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); 873 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
793 874
794 /* 875 /*
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
805 if (ep->rep_connected <= 0) { 886 if (ep->rep_connected <= 0) {
806 /* Sometimes, the only way to reliably connect to remote 887 /* Sometimes, the only way to reliably connect to remote
807 * CMs is to use same nonzero values for ORD and IRD. */ 888 * CMs is to use same nonzero values for ORD and IRD. */
808 ep->rep_remote_cma.initiator_depth = 889 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
809 ep->rep_remote_cma.responder_resources; 890 (ep->rep_remote_cma.responder_resources == 0 ||
810 if (ep->rep_remote_cma.initiator_depth == 0) 891 ep->rep_remote_cma.initiator_depth !=
811 ++ep->rep_remote_cma.initiator_depth; 892 ep->rep_remote_cma.responder_resources)) {
812 if (ep->rep_remote_cma.responder_resources == 0) 893 if (ep->rep_remote_cma.responder_resources == 0)
813 ++ep->rep_remote_cma.responder_resources; 894 ep->rep_remote_cma.responder_resources = 1;
814 if (retry_count++ == 0) 895 ep->rep_remote_cma.initiator_depth =
896 ep->rep_remote_cma.responder_resources;
815 goto retry; 897 goto retry;
898 }
816 rc = ep->rep_connected; 899 rc = ep->rep_connected;
817 } else { 900 } else {
818 dprintk("RPC: %s: connected\n", __func__); 901 dprintk("RPC: %s: connected\n", __func__);
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
863 char *p; 946 char *p;
864 size_t len; 947 size_t len;
865 int i, rc; 948 int i, rc;
949 struct rpcrdma_mw *r;
866 950
867 buf->rb_max_requests = cdata->max_requests; 951 buf->rb_max_requests = cdata->max_requests;
868 spin_lock_init(&buf->rb_lock); 952 spin_lock_init(&buf->rb_lock);
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
873 * 2. arrays of struct rpcrdma_req to fill in pointers 957 * 2. arrays of struct rpcrdma_req to fill in pointers
874 * 3. array of struct rpcrdma_rep for replies 958 * 3. array of struct rpcrdma_rep for replies
875 * 4. padding, if any 959 * 4. padding, if any
876 * 5. mw's, if any 960 * 5. mw's, fmr's or frmr's, if any
877 * Send/recv buffers in req/rep need to be registered 961 * Send/recv buffers in req/rep need to be registered
878 */ 962 */
879 963
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
881 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 965 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
882 len += cdata->padding; 966 len += cdata->padding;
883 switch (ia->ri_memreg_strategy) { 967 switch (ia->ri_memreg_strategy) {
968 case RPCRDMA_FRMR:
969 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
970 sizeof(struct rpcrdma_mw);
971 break;
884 case RPCRDMA_MTHCAFMR: 972 case RPCRDMA_MTHCAFMR:
885 /* TBD we are perhaps overallocating here */ 973 /* TBD we are perhaps overallocating here */
886 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * 974 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
927 * and also reduce unbind-to-bind collision. 1015 * and also reduce unbind-to-bind collision.
928 */ 1016 */
929 INIT_LIST_HEAD(&buf->rb_mws); 1017 INIT_LIST_HEAD(&buf->rb_mws);
1018 r = (struct rpcrdma_mw *)p;
930 switch (ia->ri_memreg_strategy) { 1019 switch (ia->ri_memreg_strategy) {
1020 case RPCRDMA_FRMR:
1021 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1022 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1023 RPCRDMA_MAX_SEGS);
1024 if (IS_ERR(r->r.frmr.fr_mr)) {
1025 rc = PTR_ERR(r->r.frmr.fr_mr);
1026 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1027 " failed %i\n", __func__, rc);
1028 goto out;
1029 }
1030 r->r.frmr.fr_pgl =
1031 ib_alloc_fast_reg_page_list(ia->ri_id->device,
1032 RPCRDMA_MAX_SEGS);
1033 if (IS_ERR(r->r.frmr.fr_pgl)) {
1034 rc = PTR_ERR(r->r.frmr.fr_pgl);
1035 dprintk("RPC: %s: "
1036 "ib_alloc_fast_reg_page_list "
1037 "failed %i\n", __func__, rc);
1038 goto out;
1039 }
1040 list_add(&r->mw_list, &buf->rb_mws);
1041 ++r;
1042 }
1043 break;
931 case RPCRDMA_MTHCAFMR: 1044 case RPCRDMA_MTHCAFMR:
932 {
933 struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
934 struct ib_fmr_attr fa = {
935 RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
936 };
937 /* TBD we are perhaps overallocating here */ 1045 /* TBD we are perhaps overallocating here */
938 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1046 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
1047 static struct ib_fmr_attr fa =
1048 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
939 r->r.fmr = ib_alloc_fmr(ia->ri_pd, 1049 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
940 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, 1050 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
941 &fa); 1051 &fa);
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
948 list_add(&r->mw_list, &buf->rb_mws); 1058 list_add(&r->mw_list, &buf->rb_mws);
949 ++r; 1059 ++r;
950 } 1060 }
951 }
952 break; 1061 break;
953 case RPCRDMA_MEMWINDOWS_ASYNC: 1062 case RPCRDMA_MEMWINDOWS_ASYNC:
954 case RPCRDMA_MEMWINDOWS: 1063 case RPCRDMA_MEMWINDOWS:
955 {
956 struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
957 /* Allocate one extra request's worth, for full cycling */ 1064 /* Allocate one extra request's worth, for full cycling */
958 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1065 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
959 r->r.mw = ib_alloc_mw(ia->ri_pd); 1066 r->r.mw = ib_alloc_mw(ia->ri_pd);
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
966 list_add(&r->mw_list, &buf->rb_mws); 1073 list_add(&r->mw_list, &buf->rb_mws);
967 ++r; 1074 ++r;
968 } 1075 }
969 }
970 break; 1076 break;
971 default: 1077 default:
972 break; 1078 break;
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1046{ 1152{
1047 int rc, i; 1153 int rc, i;
1048 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1154 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1155 struct rpcrdma_mw *r;
1049 1156
1050 /* clean up in reverse order from create 1157 /* clean up in reverse order from create
1051 * 1. recv mr memory (mr free, then kfree) 1158 * 1. recv mr memory (mr free, then kfree)
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1065 } 1172 }
1066 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { 1173 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
1067 while (!list_empty(&buf->rb_mws)) { 1174 while (!list_empty(&buf->rb_mws)) {
1068 struct rpcrdma_mw *r;
1069 r = list_entry(buf->rb_mws.next, 1175 r = list_entry(buf->rb_mws.next,
1070 struct rpcrdma_mw, mw_list); 1176 struct rpcrdma_mw, mw_list);
1071 list_del(&r->mw_list); 1177 list_del(&r->mw_list);
1072 switch (ia->ri_memreg_strategy) { 1178 switch (ia->ri_memreg_strategy) {
1179 case RPCRDMA_FRMR:
1180 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1181 if (rc)
1182 dprintk("RPC: %s:"
1183 " ib_dereg_mr"
1184 " failed %i\n",
1185 __func__, rc);
1186 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1187 break;
1073 case RPCRDMA_MTHCAFMR: 1188 case RPCRDMA_MTHCAFMR:
1074 rc = ib_dealloc_fmr(r->r.fmr); 1189 rc = ib_dealloc_fmr(r->r.fmr);
1075 if (rc) 1190 if (rc)
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1115{ 1230{
1116 struct rpcrdma_req *req; 1231 struct rpcrdma_req *req;
1117 unsigned long flags; 1232 unsigned long flags;
1233 int i;
1234 struct rpcrdma_mw *r;
1118 1235
1119 spin_lock_irqsave(&buffers->rb_lock, flags); 1236 spin_lock_irqsave(&buffers->rb_lock, flags);
1120 if (buffers->rb_send_index == buffers->rb_max_requests) { 1237 if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1135 } 1252 }
1136 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; 1253 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1137 if (!list_empty(&buffers->rb_mws)) { 1254 if (!list_empty(&buffers->rb_mws)) {
1138 int i = RPCRDMA_MAX_SEGS - 1; 1255 i = RPCRDMA_MAX_SEGS - 1;
1139 do { 1256 do {
1140 struct rpcrdma_mw *r;
1141 r = list_entry(buffers->rb_mws.next, 1257 r = list_entry(buffers->rb_mws.next,
1142 struct rpcrdma_mw, mw_list); 1258 struct rpcrdma_mw, mw_list);
1143 list_del(&r->mw_list); 1259 list_del(&r->mw_list);
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1171 req->rl_reply = NULL; 1287 req->rl_reply = NULL;
1172 } 1288 }
1173 switch (ia->ri_memreg_strategy) { 1289 switch (ia->ri_memreg_strategy) {
1290 case RPCRDMA_FRMR:
1174 case RPCRDMA_MTHCAFMR: 1291 case RPCRDMA_MTHCAFMR:
1175 case RPCRDMA_MEMWINDOWS_ASYNC: 1292 case RPCRDMA_MEMWINDOWS_ASYNC:
1176 case RPCRDMA_MEMWINDOWS: 1293 case RPCRDMA_MEMWINDOWS:
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1252 va, len, DMA_BIDIRECTIONAL); 1369 va, len, DMA_BIDIRECTIONAL);
1253 iov->length = len; 1370 iov->length = len;
1254 1371
1255 if (ia->ri_bind_mem != NULL) { 1372 if (ia->ri_have_dma_lkey) {
1373 *mrp = NULL;
1374 iov->lkey = ia->ri_dma_lkey;
1375 return 0;
1376 } else if (ia->ri_bind_mem != NULL) {
1256 *mrp = NULL; 1377 *mrp = NULL;
1257 iov->lkey = ia->ri_bind_mem->lkey; 1378 iov->lkey = ia->ri_bind_mem->lkey;
1258 return 0; 1379 return 0;
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1329 seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 1450 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1330} 1451}
1331 1452
1453static int
1454rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1455 int *nsegs, int writing, struct rpcrdma_ia *ia,
1456 struct rpcrdma_xprt *r_xprt)
1457{
1458 struct rpcrdma_mr_seg *seg1 = seg;
1459 struct ib_send_wr frmr_wr, *bad_wr;
1460 u8 key;
1461 int len, pageoff;
1462 int i, rc;
1463
1464 pageoff = offset_in_page(seg1->mr_offset);
1465 seg1->mr_offset -= pageoff; /* start of page */
1466 seg1->mr_len += pageoff;
1467 len = -pageoff;
1468 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1469 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1470 for (i = 0; i < *nsegs;) {
1471 rpcrdma_map_one(ia, seg, writing);
1472 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1473 len += seg->mr_len;
1474 ++seg;
1475 ++i;
1476 /* Check for holes */
1477 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1478 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1479 break;
1480 }
1481 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1482 __func__, seg1->mr_chunk.rl_mw, i);
1483
1484 /* Bump the key */
1485 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1486 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1487
1488 /* Prepare FRMR WR */
1489 memset(&frmr_wr, 0, sizeof frmr_wr);
1490 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1491 frmr_wr.send_flags = 0; /* unsignaled */
1492 frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
1493 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1494 frmr_wr.wr.fast_reg.page_list_len = i;
1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1497 frmr_wr.wr.fast_reg.access_flags = (writing ?
1498 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
1499 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1500 DECR_CQCOUNT(&r_xprt->rx_ep);
1501
1502 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
1503
1504 if (rc) {
1505 dprintk("RPC: %s: failed ib_post_send for register,"
1506 " status %i\n", __func__, rc);
1507 while (i--)
1508 rpcrdma_unmap_one(ia, --seg);
1509 } else {
1510 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1511 seg1->mr_base = seg1->mr_dma + pageoff;
1512 seg1->mr_nsegs = i;
1513 seg1->mr_len = len;
1514 }
1515 *nsegs = i;
1516 return rc;
1517}
1518
1519static int
1520rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1521 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1522{
1523 struct rpcrdma_mr_seg *seg1 = seg;
1524 struct ib_send_wr invalidate_wr, *bad_wr;
1525 int rc;
1526
1527 while (seg1->mr_nsegs--)
1528 rpcrdma_unmap_one(ia, seg++);
1529
1530 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1531 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1532 invalidate_wr.send_flags = 0; /* unsignaled */
1533 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1534 DECR_CQCOUNT(&r_xprt->rx_ep);
1535
1536 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1537 if (rc)
1538 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1539 " status %i\n", __func__, rc);
1540 return rc;
1541}
1542
1543static int
1544rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1545 int *nsegs, int writing, struct rpcrdma_ia *ia)
1546{
1547 struct rpcrdma_mr_seg *seg1 = seg;
1548 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1549 int len, pageoff, i, rc;
1550
1551 pageoff = offset_in_page(seg1->mr_offset);
1552 seg1->mr_offset -= pageoff; /* start of page */
1553 seg1->mr_len += pageoff;
1554 len = -pageoff;
1555 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1556 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1557 for (i = 0; i < *nsegs;) {
1558 rpcrdma_map_one(ia, seg, writing);
1559 physaddrs[i] = seg->mr_dma;
1560 len += seg->mr_len;
1561 ++seg;
1562 ++i;
1563 /* Check for holes */
1564 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1565 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1566 break;
1567 }
1568 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1569 physaddrs, i, seg1->mr_dma);
1570 if (rc) {
1571 dprintk("RPC: %s: failed ib_map_phys_fmr "
1572 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1573 len, (unsigned long long)seg1->mr_dma,
1574 pageoff, i, rc);
1575 while (i--)
1576 rpcrdma_unmap_one(ia, --seg);
1577 } else {
1578 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1579 seg1->mr_base = seg1->mr_dma + pageoff;
1580 seg1->mr_nsegs = i;
1581 seg1->mr_len = len;
1582 }
1583 *nsegs = i;
1584 return rc;
1585}
1586
1587static int
1588rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1589 struct rpcrdma_ia *ia)
1590{
1591 struct rpcrdma_mr_seg *seg1 = seg;
1592 LIST_HEAD(l);
1593 int rc;
1594
1595 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1596 rc = ib_unmap_fmr(&l);
1597 while (seg1->mr_nsegs--)
1598 rpcrdma_unmap_one(ia, seg++);
1599 if (rc)
1600 dprintk("RPC: %s: failed ib_unmap_fmr,"
1601 " status %i\n", __func__, rc);
1602 return rc;
1603}
1604
1605static int
1606rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
1607 int *nsegs, int writing, struct rpcrdma_ia *ia,
1608 struct rpcrdma_xprt *r_xprt)
1609{
1610 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1611 IB_ACCESS_REMOTE_READ);
1612 struct ib_mw_bind param;
1613 int rc;
1614
1615 *nsegs = 1;
1616 rpcrdma_map_one(ia, seg, writing);
1617 param.mr = ia->ri_bind_mem;
1618 param.wr_id = 0ULL; /* no send cookie */
1619 param.addr = seg->mr_dma;
1620 param.length = seg->mr_len;
1621 param.send_flags = 0;
1622 param.mw_access_flags = mem_priv;
1623
1624 DECR_CQCOUNT(&r_xprt->rx_ep);
1625 rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
1626 if (rc) {
1627 dprintk("RPC: %s: failed ib_bind_mw "
1628 "%u@0x%llx status %i\n",
1629 __func__, seg->mr_len,
1630 (unsigned long long)seg->mr_dma, rc);
1631 rpcrdma_unmap_one(ia, seg);
1632 } else {
1633 seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
1634 seg->mr_base = param.addr;
1635 seg->mr_nsegs = 1;
1636 }
1637 return rc;
1638}
1639
1640static int
1641rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
1642 struct rpcrdma_ia *ia,
1643 struct rpcrdma_xprt *r_xprt, void **r)
1644{
1645 struct ib_mw_bind param;
1646 LIST_HEAD(l);
1647 int rc;
1648
1649 BUG_ON(seg->mr_nsegs != 1);
1650 param.mr = ia->ri_bind_mem;
1651 param.addr = 0ULL; /* unbind */
1652 param.length = 0;
1653 param.mw_access_flags = 0;
1654 if (*r) {
1655 param.wr_id = (u64) (unsigned long) *r;
1656 param.send_flags = IB_SEND_SIGNALED;
1657 INIT_CQCOUNT(&r_xprt->rx_ep);
1658 } else {
1659 param.wr_id = 0ULL;
1660 param.send_flags = 0;
1661 DECR_CQCOUNT(&r_xprt->rx_ep);
1662 }
1663 rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
1664 rpcrdma_unmap_one(ia, seg);
1665 if (rc)
1666 dprintk("RPC: %s: failed ib_(un)bind_mw,"
1667 " status %i\n", __func__, rc);
1668 else
1669 *r = NULL; /* will upcall on completion */
1670 return rc;
1671}
1672
1673static int
1674rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
1675 int *nsegs, int writing, struct rpcrdma_ia *ia)
1676{
1677 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1678 IB_ACCESS_REMOTE_READ);
1679 struct rpcrdma_mr_seg *seg1 = seg;
1680 struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
1681 int len, i, rc = 0;
1682
1683 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1684 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1685 for (len = 0, i = 0; i < *nsegs;) {
1686 rpcrdma_map_one(ia, seg, writing);
1687 ipb[i].addr = seg->mr_dma;
1688 ipb[i].size = seg->mr_len;
1689 len += seg->mr_len;
1690 ++seg;
1691 ++i;
1692 /* Check for holes */
1693 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1694 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1695 break;
1696 }
1697 seg1->mr_base = seg1->mr_dma;
1698 seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
1699 ipb, i, mem_priv, &seg1->mr_base);
1700 if (IS_ERR(seg1->mr_chunk.rl_mr)) {
1701 rc = PTR_ERR(seg1->mr_chunk.rl_mr);
1702 dprintk("RPC: %s: failed ib_reg_phys_mr "
1703 "%u@0x%llx (%d)... status %i\n",
1704 __func__, len,
1705 (unsigned long long)seg1->mr_dma, i, rc);
1706 while (i--)
1707 rpcrdma_unmap_one(ia, --seg);
1708 } else {
1709 seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
1710 seg1->mr_nsegs = i;
1711 seg1->mr_len = len;
1712 }
1713 *nsegs = i;
1714 return rc;
1715}
1716
1717static int
1718rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
1719 struct rpcrdma_ia *ia)
1720{
1721 struct rpcrdma_mr_seg *seg1 = seg;
1722 int rc;
1723
1724 rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
1725 seg1->mr_chunk.rl_mr = NULL;
1726 while (seg1->mr_nsegs--)
1727 rpcrdma_unmap_one(ia, seg++);
1728 if (rc)
1729 dprintk("RPC: %s: failed ib_dereg_mr,"
1730 " status %i\n", __func__, rc);
1731 return rc;
1732}
1733
1332int 1734int
1333rpcrdma_register_external(struct rpcrdma_mr_seg *seg, 1735rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1334 int nsegs, int writing, struct rpcrdma_xprt *r_xprt) 1736 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1335{ 1737{
1336 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1738 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1337 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1338 IB_ACCESS_REMOTE_READ);
1339 struct rpcrdma_mr_seg *seg1 = seg;
1340 int i;
1341 int rc = 0; 1739 int rc = 0;
1342 1740
1343 switch (ia->ri_memreg_strategy) { 1741 switch (ia->ri_memreg_strategy) {
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1352 break; 1750 break;
1353#endif 1751#endif
1354 1752
1355 /* Registration using fast memory registration */ 1753 /* Registration using frmr registration */
1754 case RPCRDMA_FRMR:
1755 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1756 break;
1757
1758 /* Registration using fmr memory registration */
1356 case RPCRDMA_MTHCAFMR: 1759 case RPCRDMA_MTHCAFMR:
1357 { 1760 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
1358 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1359 int len, pageoff = offset_in_page(seg->mr_offset);
1360 seg1->mr_offset -= pageoff; /* start of page */
1361 seg1->mr_len += pageoff;
1362 len = -pageoff;
1363 if (nsegs > RPCRDMA_MAX_DATA_SEGS)
1364 nsegs = RPCRDMA_MAX_DATA_SEGS;
1365 for (i = 0; i < nsegs;) {
1366 rpcrdma_map_one(ia, seg, writing);
1367 physaddrs[i] = seg->mr_dma;
1368 len += seg->mr_len;
1369 ++seg;
1370 ++i;
1371 /* Check for holes */
1372 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
1373 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1374 break;
1375 }
1376 nsegs = i;
1377 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1378 physaddrs, nsegs, seg1->mr_dma);
1379 if (rc) {
1380 dprintk("RPC: %s: failed ib_map_phys_fmr "
1381 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1382 len, (unsigned long long)seg1->mr_dma,
1383 pageoff, nsegs, rc);
1384 while (nsegs--)
1385 rpcrdma_unmap_one(ia, --seg);
1386 } else {
1387 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1388 seg1->mr_base = seg1->mr_dma + pageoff;
1389 seg1->mr_nsegs = nsegs;
1390 seg1->mr_len = len;
1391 }
1392 }
1393 break; 1761 break;
1394 1762
1395 /* Registration using memory windows */ 1763 /* Registration using memory windows */
1396 case RPCRDMA_MEMWINDOWS_ASYNC: 1764 case RPCRDMA_MEMWINDOWS_ASYNC:
1397 case RPCRDMA_MEMWINDOWS: 1765 case RPCRDMA_MEMWINDOWS:
1398 { 1766 rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
1399 struct ib_mw_bind param;
1400 rpcrdma_map_one(ia, seg, writing);
1401 param.mr = ia->ri_bind_mem;
1402 param.wr_id = 0ULL; /* no send cookie */
1403 param.addr = seg->mr_dma;
1404 param.length = seg->mr_len;
1405 param.send_flags = 0;
1406 param.mw_access_flags = mem_priv;
1407
1408 DECR_CQCOUNT(&r_xprt->rx_ep);
1409 rc = ib_bind_mw(ia->ri_id->qp,
1410 seg->mr_chunk.rl_mw->r.mw, &param);
1411 if (rc) {
1412 dprintk("RPC: %s: failed ib_bind_mw "
1413 "%u@0x%llx status %i\n",
1414 __func__, seg->mr_len,
1415 (unsigned long long)seg->mr_dma, rc);
1416 rpcrdma_unmap_one(ia, seg);
1417 } else {
1418 seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
1419 seg->mr_base = param.addr;
1420 seg->mr_nsegs = 1;
1421 nsegs = 1;
1422 }
1423 }
1424 break; 1767 break;
1425 1768
1426 /* Default registration each time */ 1769 /* Default registration each time */
1427 default: 1770 default:
1428 { 1771 rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
1429 struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
1430 int len = 0;
1431 if (nsegs > RPCRDMA_MAX_DATA_SEGS)
1432 nsegs = RPCRDMA_MAX_DATA_SEGS;
1433 for (i = 0; i < nsegs;) {
1434 rpcrdma_map_one(ia, seg, writing);
1435 ipb[i].addr = seg->mr_dma;
1436 ipb[i].size = seg->mr_len;
1437 len += seg->mr_len;
1438 ++seg;
1439 ++i;
1440 /* Check for holes */
1441 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
1442 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1443 break;
1444 }
1445 nsegs = i;
1446 seg1->mr_base = seg1->mr_dma;
1447 seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
1448 ipb, nsegs, mem_priv, &seg1->mr_base);
1449 if (IS_ERR(seg1->mr_chunk.rl_mr)) {
1450 rc = PTR_ERR(seg1->mr_chunk.rl_mr);
1451 dprintk("RPC: %s: failed ib_reg_phys_mr "
1452 "%u@0x%llx (%d)... status %i\n",
1453 __func__, len,
1454 (unsigned long long)seg1->mr_dma, nsegs, rc);
1455 while (nsegs--)
1456 rpcrdma_unmap_one(ia, --seg);
1457 } else {
1458 seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
1459 seg1->mr_nsegs = nsegs;
1460 seg1->mr_len = len;
1461 }
1462 }
1463 break; 1772 break;
1464 } 1773 }
1465 if (rc) 1774 if (rc)
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1473 struct rpcrdma_xprt *r_xprt, void *r) 1782 struct rpcrdma_xprt *r_xprt, void *r)
1474{ 1783{
1475 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1784 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1476 struct rpcrdma_mr_seg *seg1 = seg;
1477 int nsegs = seg->mr_nsegs, rc; 1785 int nsegs = seg->mr_nsegs, rc;
1478 1786
1479 switch (ia->ri_memreg_strategy) { 1787 switch (ia->ri_memreg_strategy) {
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1486 break; 1794 break;
1487#endif 1795#endif
1488 1796
1797 case RPCRDMA_FRMR:
1798 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1799 break;
1800
1489 case RPCRDMA_MTHCAFMR: 1801 case RPCRDMA_MTHCAFMR:
1490 { 1802 rc = rpcrdma_deregister_fmr_external(seg, ia);
1491 LIST_HEAD(l);
1492 list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
1493 rc = ib_unmap_fmr(&l);
1494 while (seg1->mr_nsegs--)
1495 rpcrdma_unmap_one(ia, seg++);
1496 }
1497 if (rc)
1498 dprintk("RPC: %s: failed ib_unmap_fmr,"
1499 " status %i\n", __func__, rc);
1500 break; 1803 break;
1501 1804
1502 case RPCRDMA_MEMWINDOWS_ASYNC: 1805 case RPCRDMA_MEMWINDOWS_ASYNC:
1503 case RPCRDMA_MEMWINDOWS: 1806 case RPCRDMA_MEMWINDOWS:
1504 { 1807 rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
1505 struct ib_mw_bind param;
1506 BUG_ON(nsegs != 1);
1507 param.mr = ia->ri_bind_mem;
1508 param.addr = 0ULL; /* unbind */
1509 param.length = 0;
1510 param.mw_access_flags = 0;
1511 if (r) {
1512 param.wr_id = (u64) (unsigned long) r;
1513 param.send_flags = IB_SEND_SIGNALED;
1514 INIT_CQCOUNT(&r_xprt->rx_ep);
1515 } else {
1516 param.wr_id = 0ULL;
1517 param.send_flags = 0;
1518 DECR_CQCOUNT(&r_xprt->rx_ep);
1519 }
1520 rc = ib_bind_mw(ia->ri_id->qp,
1521 seg->mr_chunk.rl_mw->r.mw, &param);
1522 rpcrdma_unmap_one(ia, seg);
1523 }
1524 if (rc)
1525 dprintk("RPC: %s: failed ib_(un)bind_mw,"
1526 " status %i\n", __func__, rc);
1527 else
1528 r = NULL; /* will upcall on completion */
1529 break; 1808 break;
1530 1809
1531 default: 1810 default:
1532 rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); 1811 rc = rpcrdma_deregister_default_external(seg, ia);
1533 seg1->mr_chunk.rl_mr = NULL;
1534 while (seg1->mr_nsegs--)
1535 rpcrdma_unmap_one(ia, seg++);
1536 if (rc)
1537 dprintk("RPC: %s: failed ib_dereg_mr,"
1538 " status %i\n", __func__, rc);
1539 break; 1812 break;
1540 } 1813 }
1541 if (r) { 1814 if (r) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2427822f8bd4..c7a7eba991bc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,9 @@
51#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 51#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
52#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 52#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
53 53
54#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
55#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
56
54/* 57/*
55 * Interface Adapter -- one per transport instance 58 * Interface Adapter -- one per transport instance
56 */ 59 */
@@ -58,6 +61,8 @@ struct rpcrdma_ia {
58 struct rdma_cm_id *ri_id; 61 struct rdma_cm_id *ri_id;
59 struct ib_pd *ri_pd; 62 struct ib_pd *ri_pd;
60 struct ib_mr *ri_bind_mem; 63 struct ib_mr *ri_bind_mem;
64 u32 ri_dma_lkey;
65 int ri_have_dma_lkey;
61 struct completion ri_done; 66 struct completion ri_done;
62 int ri_async_rc; 67 int ri_async_rc;
63 enum rpcrdma_memreg ri_memreg_strategy; 68 enum rpcrdma_memreg ri_memreg_strategy;
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
156 union { 161 union {
157 struct ib_mw *mw; 162 struct ib_mw *mw;
158 struct ib_fmr *fmr; 163 struct ib_fmr *fmr;
164 struct {
165 struct ib_fast_reg_page_list *fr_pgl;
166 struct ib_mr *fr_mr;
167 } frmr;
159 } r; 168 } r;
160 struct list_head mw_list; 169 struct list_head mw_list;
161 } *rl_mw; 170 } *rl_mw;
@@ -175,6 +184,7 @@ struct rpcrdma_req {
175 size_t rl_size; /* actual length of buffer */ 184 size_t rl_size; /* actual length of buffer */
176 unsigned int rl_niovs; /* 0, 2 or 4 */ 185 unsigned int rl_niovs; /* 0, 2 or 4 */
177 unsigned int rl_nchunks; /* non-zero if chunks */ 186 unsigned int rl_nchunks; /* non-zero if chunks */
187 unsigned int rl_connect_cookie; /* retry detection */
178 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ 188 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
179 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ 189 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
180 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ 190 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
198 atomic_t rb_credits; /* most recent server credits */ 208 atomic_t rb_credits; /* most recent server credits */
199 unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ 209 unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
200 int rb_max_requests;/* client max requests */ 210 int rb_max_requests;/* client max requests */
201 struct list_head rb_mws; /* optional memory windows/fmrs */ 211 struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
202 int rb_send_index; 212 int rb_send_index;
203 struct rpcrdma_req **rb_send_bufs; 213 struct rpcrdma_req **rb_send_bufs;
204 int rb_recv_index; 214 int rb_recv_index;
@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
273#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) 283#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
274#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) 284#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
275 285
286/* Setting this to 0 ensures interoperability with early servers.
287 * Setting this to 1 enhances certain unaligned read/write performance.
288 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
289extern int xprt_rdma_pad_optimize;
290
276/* 291/*
277 * Interface Adapter calls - xprtrdma/verbs.c 292 * Interface Adapter calls - xprtrdma/verbs.c
278 */ 293 */