diff options
Diffstat (limited to 'net')
40 files changed, 3950 insertions, 1421 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 10e320307ec0..e053e06028a5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c | |||
@@ -52,7 +52,7 @@ enum { | |||
52 | Opt_err, | 52 | Opt_err, |
53 | }; | 53 | }; |
54 | 54 | ||
55 | static match_table_t tokens = { | 55 | static const match_table_t tokens = { |
56 | {Opt_msize, "msize=%u"}, | 56 | {Opt_msize, "msize=%u"}, |
57 | {Opt_legacy, "noextend"}, | 57 | {Opt_legacy, "noextend"}, |
58 | {Opt_trans, "trans=%s"}, | 58 | {Opt_trans, "trans=%s"}, |
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d652baf5ff91..6dabbdb66651 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -86,7 +86,7 @@ enum { | |||
86 | Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, | 86 | Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static match_table_t tokens = { | 89 | static const match_table_t tokens = { |
90 | {Opt_port, "port=%u"}, | 90 | {Opt_port, "port=%u"}, |
91 | {Opt_rfdno, "rfdno=%u"}, | 91 | {Opt_rfdno, "rfdno=%u"}, |
92 | {Opt_wfdno, "wfdno=%u"}, | 92 | {Opt_wfdno, "wfdno=%u"}, |
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 96434d774c84..acdeab3d9807 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c | |||
@@ -578,7 +578,7 @@ static int hidp_session(void *arg) | |||
578 | if (session->hid) { | 578 | if (session->hid) { |
579 | if (session->hid->claimed & HID_CLAIMED_INPUT) | 579 | if (session->hid->claimed & HID_CLAIMED_INPUT) |
580 | hidinput_disconnect(session->hid); | 580 | hidinput_disconnect(session->hid); |
581 | hid_free_device(session->hid); | 581 | hid_destroy_device(session->hid); |
582 | } | 582 | } |
583 | 583 | ||
584 | /* Wakeup user-space polling for socket errors */ | 584 | /* Wakeup user-space polling for socket errors */ |
@@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session) | |||
623 | static int hidp_setup_input(struct hidp_session *session, | 623 | static int hidp_setup_input(struct hidp_session *session, |
624 | struct hidp_connadd_req *req) | 624 | struct hidp_connadd_req *req) |
625 | { | 625 | { |
626 | struct input_dev *input = session->input; | 626 | struct input_dev *input; |
627 | int i; | 627 | int i; |
628 | 628 | ||
629 | input = input_allocate_device(); | ||
630 | if (!input) | ||
631 | return -ENOMEM; | ||
632 | |||
633 | session->input = input; | ||
634 | |||
629 | input_set_drvdata(input, session); | 635 | input_set_drvdata(input, session); |
630 | 636 | ||
631 | input->name = "Bluetooth HID Boot Protocol Device"; | 637 | input->name = "Bluetooth HID Boot Protocol Device"; |
@@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid) | |||
677 | { | 683 | { |
678 | } | 684 | } |
679 | 685 | ||
680 | static const struct { | 686 | static int hidp_parse(struct hid_device *hid) |
681 | __u16 idVendor; | 687 | { |
682 | __u16 idProduct; | 688 | struct hidp_session *session = hid->driver_data; |
683 | unsigned quirks; | 689 | struct hidp_connadd_req *req = session->req; |
684 | } hidp_blacklist[] = { | 690 | unsigned char *buf; |
685 | /* Apple wireless Mighty Mouse */ | 691 | int ret; |
686 | { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL }, | ||
687 | 692 | ||
688 | { } /* Terminating entry */ | 693 | buf = kmalloc(req->rd_size, GFP_KERNEL); |
689 | }; | 694 | if (!buf) |
695 | return -ENOMEM; | ||
696 | |||
697 | if (copy_from_user(buf, req->rd_data, req->rd_size)) { | ||
698 | kfree(buf); | ||
699 | return -EFAULT; | ||
700 | } | ||
701 | |||
702 | ret = hid_parse_report(session->hid, buf, req->rd_size); | ||
703 | |||
704 | kfree(buf); | ||
705 | |||
706 | if (ret) | ||
707 | return ret; | ||
708 | |||
709 | session->req = NULL; | ||
710 | |||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | static int hidp_start(struct hid_device *hid) | ||
715 | { | ||
716 | struct hidp_session *session = hid->driver_data; | ||
717 | struct hid_report *report; | ||
690 | 718 | ||
691 | static void hidp_setup_quirks(struct hid_device *hid) | 719 | list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT]. |
720 | report_list, list) | ||
721 | hidp_send_report(session, report); | ||
722 | |||
723 | list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT]. | ||
724 | report_list, list) | ||
725 | hidp_send_report(session, report); | ||
726 | |||
727 | return 0; | ||
728 | } | ||
729 | |||
730 | static void hidp_stop(struct hid_device *hid) | ||
692 | { | 731 | { |
693 | unsigned int n; | 732 | struct hidp_session *session = hid->driver_data; |
733 | |||
734 | skb_queue_purge(&session->ctrl_transmit); | ||
735 | skb_queue_purge(&session->intr_transmit); | ||
694 | 736 | ||
695 | for (n = 0; hidp_blacklist[n].idVendor; n++) | 737 | if (hid->claimed & HID_CLAIMED_INPUT) |
696 | if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) && | 738 | hidinput_disconnect(hid); |
697 | hidp_blacklist[n].idProduct == le16_to_cpu(hid->product)) | 739 | hid->claimed = 0; |
698 | hid->quirks = hidp_blacklist[n].quirks; | ||
699 | } | 740 | } |
700 | 741 | ||
701 | static void hidp_setup_hid(struct hidp_session *session, | 742 | static struct hid_ll_driver hidp_hid_driver = { |
743 | .parse = hidp_parse, | ||
744 | .start = hidp_start, | ||
745 | .stop = hidp_stop, | ||
746 | .open = hidp_open, | ||
747 | .close = hidp_close, | ||
748 | .hidinput_input_event = hidp_hidinput_event, | ||
749 | }; | ||
750 | |||
751 | static int hidp_setup_hid(struct hidp_session *session, | ||
702 | struct hidp_connadd_req *req) | 752 | struct hidp_connadd_req *req) |
703 | { | 753 | { |
704 | struct hid_device *hid = session->hid; | 754 | struct hid_device *hid; |
705 | struct hid_report *report; | ||
706 | bdaddr_t src, dst; | 755 | bdaddr_t src, dst; |
756 | int ret; | ||
707 | 757 | ||
708 | baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); | 758 | hid = hid_allocate_device(); |
709 | baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); | 759 | if (IS_ERR(hid)) { |
760 | ret = PTR_ERR(session->hid); | ||
761 | goto err; | ||
762 | } | ||
710 | 763 | ||
764 | session->hid = hid; | ||
765 | session->req = req; | ||
711 | hid->driver_data = session; | 766 | hid->driver_data = session; |
712 | 767 | ||
713 | hid->country = req->country; | 768 | baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); |
769 | baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); | ||
714 | 770 | ||
715 | hid->bus = BUS_BLUETOOTH; | 771 | hid->bus = BUS_BLUETOOTH; |
716 | hid->vendor = req->vendor; | 772 | hid->vendor = req->vendor; |
717 | hid->product = req->product; | 773 | hid->product = req->product; |
718 | hid->version = req->version; | 774 | hid->version = req->version; |
775 | hid->country = req->country; | ||
719 | 776 | ||
720 | strncpy(hid->name, req->name, 128); | 777 | strncpy(hid->name, req->name, 128); |
721 | strncpy(hid->phys, batostr(&src), 64); | 778 | strncpy(hid->phys, batostr(&src), 64); |
722 | strncpy(hid->uniq, batostr(&dst), 64); | 779 | strncpy(hid->uniq, batostr(&dst), 64); |
723 | 780 | ||
724 | hid->dev = hidp_get_device(session); | 781 | hid->dev.parent = hidp_get_device(session); |
725 | 782 | hid->ll_driver = &hidp_hid_driver; | |
726 | hid->hid_open = hidp_open; | ||
727 | hid->hid_close = hidp_close; | ||
728 | |||
729 | hid->hidinput_input_event = hidp_hidinput_event; | ||
730 | 783 | ||
731 | hidp_setup_quirks(hid); | 784 | ret = hid_add_device(hid); |
785 | if (ret) | ||
786 | goto err_hid; | ||
732 | 787 | ||
733 | list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list) | 788 | return 0; |
734 | hidp_send_report(session, report); | 789 | err_hid: |
735 | 790 | hid_destroy_device(hid); | |
736 | list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) | 791 | session->hid = NULL; |
737 | hidp_send_report(session, report); | 792 | err: |
738 | 793 | return ret; | |
739 | if (hidinput_connect(hid) == 0) | ||
740 | hid->claimed |= HID_CLAIMED_INPUT; | ||
741 | } | 794 | } |
742 | 795 | ||
743 | int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) | 796 | int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) |
@@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, | |||
757 | 810 | ||
758 | BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); | 811 | BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); |
759 | 812 | ||
760 | if (req->rd_size > 0) { | ||
761 | unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL); | ||
762 | |||
763 | if (!buf) { | ||
764 | kfree(session); | ||
765 | return -ENOMEM; | ||
766 | } | ||
767 | |||
768 | if (copy_from_user(buf, req->rd_data, req->rd_size)) { | ||
769 | kfree(buf); | ||
770 | kfree(session); | ||
771 | return -EFAULT; | ||
772 | } | ||
773 | |||
774 | session->hid = hid_parse_report(buf, req->rd_size); | ||
775 | |||
776 | kfree(buf); | ||
777 | |||
778 | if (!session->hid) { | ||
779 | kfree(session); | ||
780 | return -EINVAL; | ||
781 | } | ||
782 | } | ||
783 | |||
784 | if (!session->hid) { | ||
785 | session->input = input_allocate_device(); | ||
786 | if (!session->input) { | ||
787 | kfree(session); | ||
788 | return -ENOMEM; | ||
789 | } | ||
790 | } | ||
791 | |||
792 | down_write(&hidp_session_sem); | 813 | down_write(&hidp_session_sem); |
793 | 814 | ||
794 | s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); | 815 | s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); |
@@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, | |||
816 | session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); | 837 | session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); |
817 | session->idle_to = req->idle_to; | 838 | session->idle_to = req->idle_to; |
818 | 839 | ||
819 | if (session->input) { | 840 | if (req->rd_size > 0) { |
841 | err = hidp_setup_hid(session, req); | ||
842 | if (err && err != -ENODEV) | ||
843 | goto err_skb; | ||
844 | } | ||
845 | |||
846 | if (!session->hid) { | ||
820 | err = hidp_setup_input(session, req); | 847 | err = hidp_setup_input(session, req); |
821 | if (err < 0) | 848 | if (err < 0) |
822 | goto failed; | 849 | goto err_skb; |
823 | } | 850 | } |
824 | 851 | ||
825 | if (session->hid) | ||
826 | hidp_setup_hid(session, req); | ||
827 | |||
828 | __hidp_link_session(session); | 852 | __hidp_link_session(session); |
829 | 853 | ||
830 | hidp_set_timer(session); | 854 | hidp_set_timer(session); |
@@ -850,17 +874,16 @@ unlink: | |||
850 | 874 | ||
851 | __hidp_unlink_session(session); | 875 | __hidp_unlink_session(session); |
852 | 876 | ||
853 | if (session->input) { | 877 | if (session->input) |
854 | input_unregister_device(session->input); | 878 | input_unregister_device(session->input); |
855 | session->input = NULL; /* don't try to free it here */ | 879 | if (session->hid) |
856 | } | 880 | hid_destroy_device(session->hid); |
857 | 881 | err_skb: | |
882 | skb_queue_purge(&session->ctrl_transmit); | ||
883 | skb_queue_purge(&session->intr_transmit); | ||
858 | failed: | 884 | failed: |
859 | up_write(&hidp_session_sem); | 885 | up_write(&hidp_session_sem); |
860 | 886 | ||
861 | if (session->hid) | ||
862 | hid_free_device(session->hid); | ||
863 | |||
864 | input_free_device(session->input); | 887 | input_free_device(session->input); |
865 | kfree(session); | 888 | kfree(session); |
866 | return err; | 889 | return err; |
@@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci) | |||
950 | return err; | 973 | return err; |
951 | } | 974 | } |
952 | 975 | ||
976 | static const struct hid_device_id hidp_table[] = { | ||
977 | { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) }, | ||
978 | { } | ||
979 | }; | ||
980 | |||
981 | static struct hid_driver hidp_driver = { | ||
982 | .name = "generic-bluetooth", | ||
983 | .id_table = hidp_table, | ||
984 | }; | ||
985 | |||
953 | static int __init hidp_init(void) | 986 | static int __init hidp_init(void) |
954 | { | 987 | { |
988 | int ret; | ||
989 | |||
955 | l2cap_load(); | 990 | l2cap_load(); |
956 | 991 | ||
957 | BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); | 992 | BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); |
958 | 993 | ||
959 | return hidp_init_sockets(); | 994 | ret = hid_register_driver(&hidp_driver); |
995 | if (ret) | ||
996 | goto err; | ||
997 | |||
998 | ret = hidp_init_sockets(); | ||
999 | if (ret) | ||
1000 | goto err_drv; | ||
1001 | |||
1002 | return 0; | ||
1003 | err_drv: | ||
1004 | hid_unregister_driver(&hidp_driver); | ||
1005 | err: | ||
1006 | return ret; | ||
960 | } | 1007 | } |
961 | 1008 | ||
962 | static void __exit hidp_exit(void) | 1009 | static void __exit hidp_exit(void) |
963 | { | 1010 | { |
964 | hidp_cleanup_sockets(); | 1011 | hidp_cleanup_sockets(); |
1012 | hid_unregister_driver(&hidp_driver); | ||
965 | } | 1013 | } |
966 | 1014 | ||
967 | module_init(hidp_init); | 1015 | module_init(hidp_init); |
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 343fb0566b3e..e503c89057ad 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h | |||
@@ -151,6 +151,8 @@ struct hidp_session { | |||
151 | 151 | ||
152 | struct sk_buff_head ctrl_transmit; | 152 | struct sk_buff_head ctrl_transmit; |
153 | struct sk_buff_head intr_transmit; | 153 | struct sk_buff_head intr_transmit; |
154 | |||
155 | struct hidp_connadd_req *req; | ||
154 | }; | 156 | }; |
155 | 157 | ||
156 | static inline void hidp_schedule(struct hidp_session *session) | 158 | static inline void hidp_schedule(struct hidp_session *session) |
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4fd4a4f74e82..28e26bd08e24 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ | |||
166 | 166 | ||
167 | static int dn_forwarding_proc(ctl_table *, int, struct file *, | 167 | static int dn_forwarding_proc(ctl_table *, int, struct file *, |
168 | void __user *, size_t *, loff_t *); | 168 | void __user *, size_t *, loff_t *); |
169 | static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, | 169 | static int dn_forwarding_sysctl(ctl_table *table, |
170 | void __user *oldval, size_t __user *oldlenp, | 170 | void __user *oldval, size_t __user *oldlenp, |
171 | void __user *newval, size_t newlen); | 171 | void __user *newval, size_t newlen); |
172 | 172 | ||
@@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, | |||
318 | #endif | 318 | #endif |
319 | } | 319 | } |
320 | 320 | ||
321 | static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, | 321 | static int dn_forwarding_sysctl(ctl_table *table, |
322 | void __user *oldval, size_t __user *oldlenp, | 322 | void __user *oldval, size_t __user *oldlenp, |
323 | void __user *newval, size_t newlen) | 323 | void __user *newval, size_t newlen) |
324 | { | 324 | { |
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 228067c571ba..36400b266896 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c | |||
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) | |||
132 | } | 132 | } |
133 | 133 | ||
134 | 134 | ||
135 | static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, | 135 | static int dn_node_address_strategy(ctl_table *table, |
136 | void __user *oldval, size_t __user *oldlenp, | 136 | void __user *oldval, size_t __user *oldlenp, |
137 | void __user *newval, size_t newlen) | 137 | void __user *newval, size_t newlen) |
138 | { | 138 | { |
@@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write, | |||
217 | } | 217 | } |
218 | 218 | ||
219 | 219 | ||
220 | static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, | 220 | static int dn_def_dev_strategy(ctl_table *table, |
221 | void __user *oldval, size_t __user *oldlenp, | 221 | void __user *oldval, size_t __user *oldlenp, |
222 | void __user *newval, size_t newlen) | 222 | void __user *newval, size_t newlen) |
223 | { | 223 | { |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c0e4572cc90..490e035c6d90 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -13,7 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | 16 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 |
17 | * | 17 | * |
18 | * This program is free software; you can redistribute it and/or modify | 18 | * This program is free software; you can redistribute it and/or modify |
19 | * it under the terms of the GNU General Public License as published by | 19 | * it under the terms of the GNU General Public License as published by |
@@ -47,17 +47,7 @@ | |||
47 | #include <asm/bug.h> | 47 | #include <asm/bug.h> |
48 | #include <asm/unaligned.h> | 48 | #include <asm/unaligned.h> |
49 | 49 | ||
50 | struct cipso_v4_domhsh_entry { | ||
51 | char *domain; | ||
52 | u32 valid; | ||
53 | struct list_head list; | ||
54 | struct rcu_head rcu; | ||
55 | }; | ||
56 | |||
57 | /* List of available DOI definitions */ | 50 | /* List of available DOI definitions */ |
58 | /* XXX - Updates should be minimal so having a single lock for the | ||
59 | * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be | ||
60 | * okay. */ | ||
61 | /* XXX - This currently assumes a minimal number of different DOIs in use, | 51 | /* XXX - This currently assumes a minimal number of different DOIs in use, |
62 | * if in practice there are a lot of different DOIs this list should | 52 | * if in practice there are a lot of different DOIs this list should |
63 | * probably be turned into a hash table or something similar so we | 53 | * probably be turned into a hash table or something similar so we |
@@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1; | |||
119 | * be omitted. */ | 109 | * be omitted. */ |
120 | #define CIPSO_V4_TAG_RNG_CAT_MAX 8 | 110 | #define CIPSO_V4_TAG_RNG_CAT_MAX 8 |
121 | 111 | ||
112 | /* Base length of the local tag (non-standard tag). | ||
113 | * Tag definition (may change between kernel versions) | ||
114 | * | ||
115 | * 0 8 16 24 32 | ||
116 | * +----------+----------+----------+----------+ | ||
117 | * | 10000000 | 00000110 | 32-bit secid value | | ||
118 | * +----------+----------+----------+----------+ | ||
119 | * | in (host byte order)| | ||
120 | * +----------+----------+ | ||
121 | * | ||
122 | */ | ||
123 | #define CIPSO_V4_TAG_LOC_BLEN 6 | ||
124 | |||
122 | /* | 125 | /* |
123 | * Helper Functions | 126 | * Helper Functions |
124 | */ | 127 | */ |
@@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap, | |||
194 | } | 197 | } |
195 | 198 | ||
196 | /** | 199 | /** |
197 | * cipso_v4_doi_domhsh_free - Frees a domain list entry | ||
198 | * @entry: the entry's RCU field | ||
199 | * | ||
200 | * Description: | ||
201 | * This function is designed to be used as a callback to the call_rcu() | ||
202 | * function so that the memory allocated to a domain list entry can be released | ||
203 | * safely. | ||
204 | * | ||
205 | */ | ||
206 | static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) | ||
207 | { | ||
208 | struct cipso_v4_domhsh_entry *ptr; | ||
209 | |||
210 | ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); | ||
211 | kfree(ptr->domain); | ||
212 | kfree(ptr); | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * cipso_v4_cache_entry_free - Frees a cache entry | 200 | * cipso_v4_cache_entry_free - Frees a cache entry |
217 | * @entry: the entry to free | 201 | * @entry: the entry to free |
218 | * | 202 | * |
@@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) | |||
457 | struct cipso_v4_doi *iter; | 441 | struct cipso_v4_doi *iter; |
458 | 442 | ||
459 | list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) | 443 | list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) |
460 | if (iter->doi == doi && iter->valid) | 444 | if (iter->doi == doi && atomic_read(&iter->refcount)) |
461 | return iter; | 445 | return iter; |
462 | return NULL; | 446 | return NULL; |
463 | } | 447 | } |
@@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) | |||
496 | if (doi_def->type != CIPSO_V4_MAP_PASS) | 480 | if (doi_def->type != CIPSO_V4_MAP_PASS) |
497 | return -EINVAL; | 481 | return -EINVAL; |
498 | break; | 482 | break; |
483 | case CIPSO_V4_TAG_LOCAL: | ||
484 | if (doi_def->type != CIPSO_V4_MAP_LOCAL) | ||
485 | return -EINVAL; | ||
486 | break; | ||
499 | default: | 487 | default: |
500 | return -EINVAL; | 488 | return -EINVAL; |
501 | } | 489 | } |
502 | } | 490 | } |
503 | 491 | ||
504 | doi_def->valid = 1; | 492 | atomic_set(&doi_def->refcount, 1); |
505 | INIT_RCU_HEAD(&doi_def->rcu); | 493 | INIT_RCU_HEAD(&doi_def->rcu); |
506 | INIT_LIST_HEAD(&doi_def->dom_list); | ||
507 | 494 | ||
508 | spin_lock(&cipso_v4_doi_list_lock); | 495 | spin_lock(&cipso_v4_doi_list_lock); |
509 | if (cipso_v4_doi_search(doi_def->doi) != NULL) | 496 | if (cipso_v4_doi_search(doi_def->doi) != NULL) |
@@ -519,59 +506,129 @@ doi_add_failure: | |||
519 | } | 506 | } |
520 | 507 | ||
521 | /** | 508 | /** |
509 | * cipso_v4_doi_free - Frees a DOI definition | ||
510 | * @entry: the entry's RCU field | ||
511 | * | ||
512 | * Description: | ||
513 | * This function frees all of the memory associated with a DOI definition. | ||
514 | * | ||
515 | */ | ||
516 | void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) | ||
517 | { | ||
518 | if (doi_def == NULL) | ||
519 | return; | ||
520 | |||
521 | switch (doi_def->type) { | ||
522 | case CIPSO_V4_MAP_TRANS: | ||
523 | kfree(doi_def->map.std->lvl.cipso); | ||
524 | kfree(doi_def->map.std->lvl.local); | ||
525 | kfree(doi_def->map.std->cat.cipso); | ||
526 | kfree(doi_def->map.std->cat.local); | ||
527 | break; | ||
528 | } | ||
529 | kfree(doi_def); | ||
530 | } | ||
531 | |||
532 | /** | ||
533 | * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer | ||
534 | * @entry: the entry's RCU field | ||
535 | * | ||
536 | * Description: | ||
537 | * This function is designed to be used as a callback to the call_rcu() | ||
538 | * function so that the memory allocated to the DOI definition can be released | ||
539 | * safely. | ||
540 | * | ||
541 | */ | ||
542 | static void cipso_v4_doi_free_rcu(struct rcu_head *entry) | ||
543 | { | ||
544 | struct cipso_v4_doi *doi_def; | ||
545 | |||
546 | doi_def = container_of(entry, struct cipso_v4_doi, rcu); | ||
547 | cipso_v4_doi_free(doi_def); | ||
548 | } | ||
549 | |||
550 | /** | ||
522 | * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine | 551 | * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine |
523 | * @doi: the DOI value | 552 | * @doi: the DOI value |
524 | * @audit_secid: the LSM secid to use in the audit message | 553 | * @audit_secid: the LSM secid to use in the audit message |
525 | * @callback: the DOI cleanup/free callback | ||
526 | * | 554 | * |
527 | * Description: | 555 | * Description: |
528 | * Removes a DOI definition from the CIPSO engine, @callback is called to | 556 | * Removes a DOI definition from the CIPSO engine. The NetLabel routines will |
529 | * free any memory. The NetLabel routines will be called to release their own | 557 | * be called to release their own LSM domain mappings as well as our own |
530 | * LSM domain mappings as well as our own domain list. Returns zero on | 558 | * domain list. Returns zero on success and negative values on failure. |
531 | * success and negative values on failure. | ||
532 | * | 559 | * |
533 | */ | 560 | */ |
534 | int cipso_v4_doi_remove(u32 doi, | 561 | int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) |
535 | struct netlbl_audit *audit_info, | ||
536 | void (*callback) (struct rcu_head * head)) | ||
537 | { | 562 | { |
538 | struct cipso_v4_doi *doi_def; | 563 | struct cipso_v4_doi *doi_def; |
539 | struct cipso_v4_domhsh_entry *dom_iter; | ||
540 | 564 | ||
541 | spin_lock(&cipso_v4_doi_list_lock); | 565 | spin_lock(&cipso_v4_doi_list_lock); |
542 | doi_def = cipso_v4_doi_search(doi); | 566 | doi_def = cipso_v4_doi_search(doi); |
543 | if (doi_def != NULL) { | 567 | if (doi_def == NULL) { |
544 | doi_def->valid = 0; | ||
545 | list_del_rcu(&doi_def->list); | ||
546 | spin_unlock(&cipso_v4_doi_list_lock); | 568 | spin_unlock(&cipso_v4_doi_list_lock); |
547 | rcu_read_lock(); | 569 | return -ENOENT; |
548 | list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) | 570 | } |
549 | if (dom_iter->valid) | 571 | if (!atomic_dec_and_test(&doi_def->refcount)) { |
550 | netlbl_cfg_map_del(dom_iter->domain, | 572 | spin_unlock(&cipso_v4_doi_list_lock); |
551 | audit_info); | 573 | return -EBUSY; |
552 | rcu_read_unlock(); | ||
553 | cipso_v4_cache_invalidate(); | ||
554 | call_rcu(&doi_def->rcu, callback); | ||
555 | return 0; | ||
556 | } | 574 | } |
575 | list_del_rcu(&doi_def->list); | ||
557 | spin_unlock(&cipso_v4_doi_list_lock); | 576 | spin_unlock(&cipso_v4_doi_list_lock); |
558 | 577 | ||
559 | return -ENOENT; | 578 | cipso_v4_cache_invalidate(); |
579 | call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); | ||
580 | |||
581 | return 0; | ||
560 | } | 582 | } |
561 | 583 | ||
562 | /** | 584 | /** |
563 | * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition | 585 | * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition |
564 | * @doi: the DOI value | 586 | * @doi: the DOI value |
565 | * | 587 | * |
566 | * Description: | 588 | * Description: |
567 | * Searches for a valid DOI definition and if one is found it is returned to | 589 | * Searches for a valid DOI definition and if one is found it is returned to |
568 | * the caller. Otherwise NULL is returned. The caller must ensure that | 590 | * the caller. Otherwise NULL is returned. The caller must ensure that |
569 | * rcu_read_lock() is held while accessing the returned definition. | 591 | * rcu_read_lock() is held while accessing the returned definition and the DOI |
592 | * definition reference count is decremented when the caller is done. | ||
570 | * | 593 | * |
571 | */ | 594 | */ |
572 | struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) | 595 | struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) |
573 | { | 596 | { |
574 | return cipso_v4_doi_search(doi); | 597 | struct cipso_v4_doi *doi_def; |
598 | |||
599 | rcu_read_lock(); | ||
600 | doi_def = cipso_v4_doi_search(doi); | ||
601 | if (doi_def == NULL) | ||
602 | goto doi_getdef_return; | ||
603 | if (!atomic_inc_not_zero(&doi_def->refcount)) | ||
604 | doi_def = NULL; | ||
605 | |||
606 | doi_getdef_return: | ||
607 | rcu_read_unlock(); | ||
608 | return doi_def; | ||
609 | } | ||
610 | |||
611 | /** | ||
612 | * cipso_v4_doi_putdef - Releases a reference for the given DOI definition | ||
613 | * @doi_def: the DOI definition | ||
614 | * | ||
615 | * Description: | ||
616 | * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). | ||
617 | * | ||
618 | */ | ||
619 | void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) | ||
620 | { | ||
621 | if (doi_def == NULL) | ||
622 | return; | ||
623 | |||
624 | if (!atomic_dec_and_test(&doi_def->refcount)) | ||
625 | return; | ||
626 | spin_lock(&cipso_v4_doi_list_lock); | ||
627 | list_del_rcu(&doi_def->list); | ||
628 | spin_unlock(&cipso_v4_doi_list_lock); | ||
629 | |||
630 | cipso_v4_cache_invalidate(); | ||
631 | call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); | ||
575 | } | 632 | } |
576 | 633 | ||
577 | /** | 634 | /** |
@@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, | |||
597 | 654 | ||
598 | rcu_read_lock(); | 655 | rcu_read_lock(); |
599 | list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) | 656 | list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) |
600 | if (iter_doi->valid) { | 657 | if (atomic_read(&iter_doi->refcount) > 0) { |
601 | if (doi_cnt++ < *skip_cnt) | 658 | if (doi_cnt++ < *skip_cnt) |
602 | continue; | 659 | continue; |
603 | ret_val = callback(iter_doi, cb_arg); | 660 | ret_val = callback(iter_doi, cb_arg); |
@@ -613,85 +670,6 @@ doi_walk_return: | |||
613 | return ret_val; | 670 | return ret_val; |
614 | } | 671 | } |
615 | 672 | ||
616 | /** | ||
617 | * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition | ||
618 | * @doi_def: the DOI definition | ||
619 | * @domain: the domain to add | ||
620 | * | ||
621 | * Description: | ||
622 | * Adds the @domain to the DOI specified by @doi_def, this function | ||
623 | * should only be called by external functions (i.e. NetLabel). This function | ||
624 | * does allocate memory. Returns zero on success, negative values on failure. | ||
625 | * | ||
626 | */ | ||
627 | int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) | ||
628 | { | ||
629 | struct cipso_v4_domhsh_entry *iter; | ||
630 | struct cipso_v4_domhsh_entry *new_dom; | ||
631 | |||
632 | new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); | ||
633 | if (new_dom == NULL) | ||
634 | return -ENOMEM; | ||
635 | if (domain) { | ||
636 | new_dom->domain = kstrdup(domain, GFP_KERNEL); | ||
637 | if (new_dom->domain == NULL) { | ||
638 | kfree(new_dom); | ||
639 | return -ENOMEM; | ||
640 | } | ||
641 | } | ||
642 | new_dom->valid = 1; | ||
643 | INIT_RCU_HEAD(&new_dom->rcu); | ||
644 | |||
645 | spin_lock(&cipso_v4_doi_list_lock); | ||
646 | list_for_each_entry(iter, &doi_def->dom_list, list) | ||
647 | if (iter->valid && | ||
648 | ((domain != NULL && iter->domain != NULL && | ||
649 | strcmp(iter->domain, domain) == 0) || | ||
650 | (domain == NULL && iter->domain == NULL))) { | ||
651 | spin_unlock(&cipso_v4_doi_list_lock); | ||
652 | kfree(new_dom->domain); | ||
653 | kfree(new_dom); | ||
654 | return -EEXIST; | ||
655 | } | ||
656 | list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); | ||
657 | spin_unlock(&cipso_v4_doi_list_lock); | ||
658 | |||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition | ||
664 | * @doi_def: the DOI definition | ||
665 | * @domain: the domain to remove | ||
666 | * | ||
667 | * Description: | ||
668 | * Removes the @domain from the DOI specified by @doi_def, this function | ||
669 | * should only be called by external functions (i.e. NetLabel). Returns zero | ||
670 | * on success and negative values on error. | ||
671 | * | ||
672 | */ | ||
673 | int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, | ||
674 | const char *domain) | ||
675 | { | ||
676 | struct cipso_v4_domhsh_entry *iter; | ||
677 | |||
678 | spin_lock(&cipso_v4_doi_list_lock); | ||
679 | list_for_each_entry(iter, &doi_def->dom_list, list) | ||
680 | if (iter->valid && | ||
681 | ((domain != NULL && iter->domain != NULL && | ||
682 | strcmp(iter->domain, domain) == 0) || | ||
683 | (domain == NULL && iter->domain == NULL))) { | ||
684 | iter->valid = 0; | ||
685 | list_del_rcu(&iter->list); | ||
686 | spin_unlock(&cipso_v4_doi_list_lock); | ||
687 | call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); | ||
688 | return 0; | ||
689 | } | ||
690 | spin_unlock(&cipso_v4_doi_list_lock); | ||
691 | |||
692 | return -ENOENT; | ||
693 | } | ||
694 | |||
695 | /* | 673 | /* |
696 | * Label Mapping Functions | 674 | * Label Mapping Functions |
697 | */ | 675 | */ |
@@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) | |||
712 | switch (doi_def->type) { | 690 | switch (doi_def->type) { |
713 | case CIPSO_V4_MAP_PASS: | 691 | case CIPSO_V4_MAP_PASS: |
714 | return 0; | 692 | return 0; |
715 | case CIPSO_V4_MAP_STD: | 693 | case CIPSO_V4_MAP_TRANS: |
716 | if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) | 694 | if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) |
717 | return 0; | 695 | return 0; |
718 | break; | 696 | break; |
@@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, | |||
741 | case CIPSO_V4_MAP_PASS: | 719 | case CIPSO_V4_MAP_PASS: |
742 | *net_lvl = host_lvl; | 720 | *net_lvl = host_lvl; |
743 | return 0; | 721 | return 0; |
744 | case CIPSO_V4_MAP_STD: | 722 | case CIPSO_V4_MAP_TRANS: |
745 | if (host_lvl < doi_def->map.std->lvl.local_size && | 723 | if (host_lvl < doi_def->map.std->lvl.local_size && |
746 | doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { | 724 | doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { |
747 | *net_lvl = doi_def->map.std->lvl.local[host_lvl]; | 725 | *net_lvl = doi_def->map.std->lvl.local[host_lvl]; |
@@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, | |||
775 | case CIPSO_V4_MAP_PASS: | 753 | case CIPSO_V4_MAP_PASS: |
776 | *host_lvl = net_lvl; | 754 | *host_lvl = net_lvl; |
777 | return 0; | 755 | return 0; |
778 | case CIPSO_V4_MAP_STD: | 756 | case CIPSO_V4_MAP_TRANS: |
779 | map_tbl = doi_def->map.std; | 757 | map_tbl = doi_def->map.std; |
780 | if (net_lvl < map_tbl->lvl.cipso_size && | 758 | if (net_lvl < map_tbl->lvl.cipso_size && |
781 | map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { | 759 | map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { |
@@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, | |||
812 | switch (doi_def->type) { | 790 | switch (doi_def->type) { |
813 | case CIPSO_V4_MAP_PASS: | 791 | case CIPSO_V4_MAP_PASS: |
814 | return 0; | 792 | return 0; |
815 | case CIPSO_V4_MAP_STD: | 793 | case CIPSO_V4_MAP_TRANS: |
816 | cipso_cat_size = doi_def->map.std->cat.cipso_size; | 794 | cipso_cat_size = doi_def->map.std->cat.cipso_size; |
817 | cipso_array = doi_def->map.std->cat.cipso; | 795 | cipso_array = doi_def->map.std->cat.cipso; |
818 | for (;;) { | 796 | for (;;) { |
@@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, | |||
860 | u32 host_cat_size = 0; | 838 | u32 host_cat_size = 0; |
861 | u32 *host_cat_array = NULL; | 839 | u32 *host_cat_array = NULL; |
862 | 840 | ||
863 | if (doi_def->type == CIPSO_V4_MAP_STD) { | 841 | if (doi_def->type == CIPSO_V4_MAP_TRANS) { |
864 | host_cat_size = doi_def->map.std->cat.local_size; | 842 | host_cat_size = doi_def->map.std->cat.local_size; |
865 | host_cat_array = doi_def->map.std->cat.local; | 843 | host_cat_array = doi_def->map.std->cat.local; |
866 | } | 844 | } |
@@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, | |||
875 | case CIPSO_V4_MAP_PASS: | 853 | case CIPSO_V4_MAP_PASS: |
876 | net_spot = host_spot; | 854 | net_spot = host_spot; |
877 | break; | 855 | break; |
878 | case CIPSO_V4_MAP_STD: | 856 | case CIPSO_V4_MAP_TRANS: |
879 | if (host_spot >= host_cat_size) | 857 | if (host_spot >= host_cat_size) |
880 | return -EPERM; | 858 | return -EPERM; |
881 | net_spot = host_cat_array[host_spot]; | 859 | net_spot = host_cat_array[host_spot]; |
@@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, | |||
921 | u32 net_cat_size = 0; | 899 | u32 net_cat_size = 0; |
922 | u32 *net_cat_array = NULL; | 900 | u32 *net_cat_array = NULL; |
923 | 901 | ||
924 | if (doi_def->type == CIPSO_V4_MAP_STD) { | 902 | if (doi_def->type == CIPSO_V4_MAP_TRANS) { |
925 | net_cat_size = doi_def->map.std->cat.cipso_size; | 903 | net_cat_size = doi_def->map.std->cat.cipso_size; |
926 | net_cat_array = doi_def->map.std->cat.cipso; | 904 | net_cat_array = doi_def->map.std->cat.cipso; |
927 | } | 905 | } |
@@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, | |||
941 | case CIPSO_V4_MAP_PASS: | 919 | case CIPSO_V4_MAP_PASS: |
942 | host_spot = net_spot; | 920 | host_spot = net_spot; |
943 | break; | 921 | break; |
944 | case CIPSO_V4_MAP_STD: | 922 | case CIPSO_V4_MAP_TRANS: |
945 | if (net_spot >= net_cat_size) | 923 | if (net_spot >= net_cat_size) |
946 | return -EPERM; | 924 | return -EPERM; |
947 | host_spot = net_cat_array[net_spot]; | 925 | host_spot = net_cat_array[net_spot]; |
@@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, | |||
1277 | } else | 1255 | } else |
1278 | tag_len = 4; | 1256 | tag_len = 4; |
1279 | 1257 | ||
1280 | buffer[0] = 0x01; | 1258 | buffer[0] = CIPSO_V4_TAG_RBITMAP; |
1281 | buffer[1] = tag_len; | 1259 | buffer[1] = tag_len; |
1282 | buffer[3] = level; | 1260 | buffer[3] = level; |
1283 | 1261 | ||
@@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, | |||
1373 | } else | 1351 | } else |
1374 | tag_len = 4; | 1352 | tag_len = 4; |
1375 | 1353 | ||
1376 | buffer[0] = 0x02; | 1354 | buffer[0] = CIPSO_V4_TAG_ENUM; |
1377 | buffer[1] = tag_len; | 1355 | buffer[1] = tag_len; |
1378 | buffer[3] = level; | 1356 | buffer[3] = level; |
1379 | 1357 | ||
@@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, | |||
1469 | } else | 1447 | } else |
1470 | tag_len = 4; | 1448 | tag_len = 4; |
1471 | 1449 | ||
1472 | buffer[0] = 0x05; | 1450 | buffer[0] = CIPSO_V4_TAG_RANGE; |
1473 | buffer[1] = tag_len; | 1451 | buffer[1] = tag_len; |
1474 | buffer[3] = level; | 1452 | buffer[3] = level; |
1475 | 1453 | ||
@@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, | |||
1523 | } | 1501 | } |
1524 | 1502 | ||
1525 | /** | 1503 | /** |
1504 | * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) | ||
1505 | * @doi_def: the DOI definition | ||
1506 | * @secattr: the security attributes | ||
1507 | * @buffer: the option buffer | ||
1508 | * @buffer_len: length of buffer in bytes | ||
1509 | * | ||
1510 | * Description: | ||
1511 | * Generate a CIPSO option using the local tag. Returns the size of the tag | ||
1512 | * on success, negative values on failure. | ||
1513 | * | ||
1514 | */ | ||
1515 | static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, | ||
1516 | const struct netlbl_lsm_secattr *secattr, | ||
1517 | unsigned char *buffer, | ||
1518 | u32 buffer_len) | ||
1519 | { | ||
1520 | if (!(secattr->flags & NETLBL_SECATTR_SECID)) | ||
1521 | return -EPERM; | ||
1522 | |||
1523 | buffer[0] = CIPSO_V4_TAG_LOCAL; | ||
1524 | buffer[1] = CIPSO_V4_TAG_LOC_BLEN; | ||
1525 | *(u32 *)&buffer[2] = secattr->attr.secid; | ||
1526 | |||
1527 | return CIPSO_V4_TAG_LOC_BLEN; | ||
1528 | } | ||
1529 | |||
1530 | /** | ||
1531 | * cipso_v4_parsetag_loc - Parse a CIPSO local tag | ||
1532 | * @doi_def: the DOI definition | ||
1533 | * @tag: the CIPSO tag | ||
1534 | * @secattr: the security attributes | ||
1535 | * | ||
1536 | * Description: | ||
1537 | * Parse a CIPSO local tag and return the security attributes in @secattr. | ||
1538 | * Return zero on success, negatives values on failure. | ||
1539 | * | ||
1540 | */ | ||
1541 | static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, | ||
1542 | const unsigned char *tag, | ||
1543 | struct netlbl_lsm_secattr *secattr) | ||
1544 | { | ||
1545 | secattr->attr.secid = *(u32 *)&tag[2]; | ||
1546 | secattr->flags |= NETLBL_SECATTR_SECID; | ||
1547 | |||
1548 | return 0; | ||
1549 | } | ||
1550 | |||
1551 | /** | ||
1526 | * cipso_v4_validate - Validate a CIPSO option | 1552 | * cipso_v4_validate - Validate a CIPSO option |
1527 | * @option: the start of the option, on error it is set to point to the error | 1553 | * @option: the start of the option, on error it is set to point to the error |
1528 | * | 1554 | * |
@@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, | |||
1541 | * that is unrecognized." | 1567 | * that is unrecognized." |
1542 | * | 1568 | * |
1543 | */ | 1569 | */ |
1544 | int cipso_v4_validate(unsigned char **option) | 1570 | int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) |
1545 | { | 1571 | { |
1546 | unsigned char *opt = *option; | 1572 | unsigned char *opt = *option; |
1547 | unsigned char *tag; | 1573 | unsigned char *tag; |
@@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1566 | goto validate_return_locked; | 1592 | goto validate_return_locked; |
1567 | } | 1593 | } |
1568 | 1594 | ||
1569 | opt_iter = 6; | 1595 | opt_iter = CIPSO_V4_HDR_LEN; |
1570 | tag = opt + opt_iter; | 1596 | tag = opt + opt_iter; |
1571 | while (opt_iter < opt_len) { | 1597 | while (opt_iter < opt_len) { |
1572 | for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) | 1598 | for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) |
@@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1584 | 1610 | ||
1585 | switch (tag[0]) { | 1611 | switch (tag[0]) { |
1586 | case CIPSO_V4_TAG_RBITMAP: | 1612 | case CIPSO_V4_TAG_RBITMAP: |
1587 | if (tag_len < 4) { | 1613 | if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { |
1588 | err_offset = opt_iter + 1; | 1614 | err_offset = opt_iter + 1; |
1589 | goto validate_return_locked; | 1615 | goto validate_return_locked; |
1590 | } | 1616 | } |
@@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1602 | err_offset = opt_iter + 3; | 1628 | err_offset = opt_iter + 3; |
1603 | goto validate_return_locked; | 1629 | goto validate_return_locked; |
1604 | } | 1630 | } |
1605 | if (tag_len > 4 && | 1631 | if (tag_len > CIPSO_V4_TAG_RBM_BLEN && |
1606 | cipso_v4_map_cat_rbm_valid(doi_def, | 1632 | cipso_v4_map_cat_rbm_valid(doi_def, |
1607 | &tag[4], | 1633 | &tag[4], |
1608 | tag_len - 4) < 0) { | 1634 | tag_len - 4) < 0) { |
@@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1612 | } | 1638 | } |
1613 | break; | 1639 | break; |
1614 | case CIPSO_V4_TAG_ENUM: | 1640 | case CIPSO_V4_TAG_ENUM: |
1615 | if (tag_len < 4) { | 1641 | if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { |
1616 | err_offset = opt_iter + 1; | 1642 | err_offset = opt_iter + 1; |
1617 | goto validate_return_locked; | 1643 | goto validate_return_locked; |
1618 | } | 1644 | } |
@@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1622 | err_offset = opt_iter + 3; | 1648 | err_offset = opt_iter + 3; |
1623 | goto validate_return_locked; | 1649 | goto validate_return_locked; |
1624 | } | 1650 | } |
1625 | if (tag_len > 4 && | 1651 | if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && |
1626 | cipso_v4_map_cat_enum_valid(doi_def, | 1652 | cipso_v4_map_cat_enum_valid(doi_def, |
1627 | &tag[4], | 1653 | &tag[4], |
1628 | tag_len - 4) < 0) { | 1654 | tag_len - 4) < 0) { |
@@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1631 | } | 1657 | } |
1632 | break; | 1658 | break; |
1633 | case CIPSO_V4_TAG_RANGE: | 1659 | case CIPSO_V4_TAG_RANGE: |
1634 | if (tag_len < 4) { | 1660 | if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { |
1635 | err_offset = opt_iter + 1; | 1661 | err_offset = opt_iter + 1; |
1636 | goto validate_return_locked; | 1662 | goto validate_return_locked; |
1637 | } | 1663 | } |
@@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option) | |||
1641 | err_offset = opt_iter + 3; | 1667 | err_offset = opt_iter + 3; |
1642 | goto validate_return_locked; | 1668 | goto validate_return_locked; |
1643 | } | 1669 | } |
1644 | if (tag_len > 4 && | 1670 | if (tag_len > CIPSO_V4_TAG_RNG_BLEN && |
1645 | cipso_v4_map_cat_rng_valid(doi_def, | 1671 | cipso_v4_map_cat_rng_valid(doi_def, |
1646 | &tag[4], | 1672 | &tag[4], |
1647 | tag_len - 4) < 0) { | 1673 | tag_len - 4) < 0) { |
@@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option) | |||
1649 | goto validate_return_locked; | 1675 | goto validate_return_locked; |
1650 | } | 1676 | } |
1651 | break; | 1677 | break; |
1678 | case CIPSO_V4_TAG_LOCAL: | ||
1679 | /* This is a non-standard tag that we only allow for | ||
1680 | * local connections, so if the incoming interface is | ||
1681 | * not the loopback device drop the packet. */ | ||
1682 | if (!(skb->dev->flags & IFF_LOOPBACK)) { | ||
1683 | err_offset = opt_iter; | ||
1684 | goto validate_return_locked; | ||
1685 | } | ||
1686 | if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { | ||
1687 | err_offset = opt_iter + 1; | ||
1688 | goto validate_return_locked; | ||
1689 | } | ||
1690 | break; | ||
1652 | default: | 1691 | default: |
1653 | err_offset = opt_iter; | 1692 | err_offset = opt_iter; |
1654 | goto validate_return_locked; | 1693 | goto validate_return_locked; |
@@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) | |||
1704 | } | 1743 | } |
1705 | 1744 | ||
1706 | /** | 1745 | /** |
1707 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket | 1746 | * cipso_v4_genopt - Generate a CIPSO option |
1708 | * @sk: the socket | 1747 | * @buf: the option buffer |
1748 | * @buf_len: the size of opt_buf | ||
1709 | * @doi_def: the CIPSO DOI to use | 1749 | * @doi_def: the CIPSO DOI to use |
1710 | * @secattr: the specific security attributes of the socket | 1750 | * @secattr: the security attributes |
1711 | * | 1751 | * |
1712 | * Description: | 1752 | * Description: |
1713 | * Set the CIPSO option on the given socket using the DOI definition and | 1753 | * Generate a CIPSO option using the DOI definition and security attributes |
1714 | * security attributes passed to the function. This function requires | 1754 | * passed to the function. Returns the length of the option on success and |
1715 | * exclusive access to @sk, which means it either needs to be in the | 1755 | * negative values on failure. |
1716 | * process of being created or locked. Returns zero on success and negative | ||
1717 | * values on failure. | ||
1718 | * | 1756 | * |
1719 | */ | 1757 | */ |
1720 | int cipso_v4_sock_setattr(struct sock *sk, | 1758 | static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, |
1721 | const struct cipso_v4_doi *doi_def, | 1759 | const struct cipso_v4_doi *doi_def, |
1722 | const struct netlbl_lsm_secattr *secattr) | 1760 | const struct netlbl_lsm_secattr *secattr) |
1723 | { | 1761 | { |
1724 | int ret_val = -EPERM; | 1762 | int ret_val; |
1725 | u32 iter; | 1763 | u32 iter; |
1726 | unsigned char *buf; | ||
1727 | u32 buf_len = 0; | ||
1728 | u32 opt_len; | ||
1729 | struct ip_options *opt = NULL; | ||
1730 | struct inet_sock *sk_inet; | ||
1731 | struct inet_connection_sock *sk_conn; | ||
1732 | 1764 | ||
1733 | /* In the case of sock_create_lite(), the sock->sk field is not | 1765 | if (buf_len <= CIPSO_V4_HDR_LEN) |
1734 | * defined yet but it is not a problem as the only users of these | 1766 | return -ENOSPC; |
1735 | * "lite" PF_INET sockets are functions which do an accept() call | ||
1736 | * afterwards so we will label the socket as part of the accept(). */ | ||
1737 | if (sk == NULL) | ||
1738 | return 0; | ||
1739 | |||
1740 | /* We allocate the maximum CIPSO option size here so we are probably | ||
1741 | * being a little wasteful, but it makes our life _much_ easier later | ||
1742 | * on and after all we are only talking about 40 bytes. */ | ||
1743 | buf_len = CIPSO_V4_OPT_LEN_MAX; | ||
1744 | buf = kmalloc(buf_len, GFP_ATOMIC); | ||
1745 | if (buf == NULL) { | ||
1746 | ret_val = -ENOMEM; | ||
1747 | goto socket_setattr_failure; | ||
1748 | } | ||
1749 | 1767 | ||
1750 | /* XXX - This code assumes only one tag per CIPSO option which isn't | 1768 | /* XXX - This code assumes only one tag per CIPSO option which isn't |
1751 | * really a good assumption to make but since we only support the MAC | 1769 | * really a good assumption to make but since we only support the MAC |
@@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1772 | &buf[CIPSO_V4_HDR_LEN], | 1790 | &buf[CIPSO_V4_HDR_LEN], |
1773 | buf_len - CIPSO_V4_HDR_LEN); | 1791 | buf_len - CIPSO_V4_HDR_LEN); |
1774 | break; | 1792 | break; |
1793 | case CIPSO_V4_TAG_LOCAL: | ||
1794 | ret_val = cipso_v4_gentag_loc(doi_def, | ||
1795 | secattr, | ||
1796 | &buf[CIPSO_V4_HDR_LEN], | ||
1797 | buf_len - CIPSO_V4_HDR_LEN); | ||
1798 | break; | ||
1775 | default: | 1799 | default: |
1776 | ret_val = -EPERM; | 1800 | return -EPERM; |
1777 | goto socket_setattr_failure; | ||
1778 | } | 1801 | } |
1779 | 1802 | ||
1780 | iter++; | 1803 | iter++; |
@@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1782 | iter < CIPSO_V4_TAG_MAXCNT && | 1805 | iter < CIPSO_V4_TAG_MAXCNT && |
1783 | doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); | 1806 | doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); |
1784 | if (ret_val < 0) | 1807 | if (ret_val < 0) |
1785 | goto socket_setattr_failure; | 1808 | return ret_val; |
1786 | cipso_v4_gentag_hdr(doi_def, buf, ret_val); | 1809 | cipso_v4_gentag_hdr(doi_def, buf, ret_val); |
1787 | buf_len = CIPSO_V4_HDR_LEN + ret_val; | 1810 | return CIPSO_V4_HDR_LEN + ret_val; |
1811 | } | ||
1812 | |||
1813 | /** | ||
1814 | * cipso_v4_sock_setattr - Add a CIPSO option to a socket | ||
1815 | * @sk: the socket | ||
1816 | * @doi_def: the CIPSO DOI to use | ||
1817 | * @secattr: the specific security attributes of the socket | ||
1818 | * | ||
1819 | * Description: | ||
1820 | * Set the CIPSO option on the given socket using the DOI definition and | ||
1821 | * security attributes passed to the function. This function requires | ||
1822 | * exclusive access to @sk, which means it either needs to be in the | ||
1823 | * process of being created or locked. Returns zero on success and negative | ||
1824 | * values on failure. | ||
1825 | * | ||
1826 | */ | ||
1827 | int cipso_v4_sock_setattr(struct sock *sk, | ||
1828 | const struct cipso_v4_doi *doi_def, | ||
1829 | const struct netlbl_lsm_secattr *secattr) | ||
1830 | { | ||
1831 | int ret_val = -EPERM; | ||
1832 | unsigned char *buf = NULL; | ||
1833 | u32 buf_len; | ||
1834 | u32 opt_len; | ||
1835 | struct ip_options *opt = NULL; | ||
1836 | struct inet_sock *sk_inet; | ||
1837 | struct inet_connection_sock *sk_conn; | ||
1838 | |||
1839 | /* In the case of sock_create_lite(), the sock->sk field is not | ||
1840 | * defined yet but it is not a problem as the only users of these | ||
1841 | * "lite" PF_INET sockets are functions which do an accept() call | ||
1842 | * afterwards so we will label the socket as part of the accept(). */ | ||
1843 | if (sk == NULL) | ||
1844 | return 0; | ||
1845 | |||
1846 | /* We allocate the maximum CIPSO option size here so we are probably | ||
1847 | * being a little wasteful, but it makes our life _much_ easier later | ||
1848 | * on and after all we are only talking about 40 bytes. */ | ||
1849 | buf_len = CIPSO_V4_OPT_LEN_MAX; | ||
1850 | buf = kmalloc(buf_len, GFP_ATOMIC); | ||
1851 | if (buf == NULL) { | ||
1852 | ret_val = -ENOMEM; | ||
1853 | goto socket_setattr_failure; | ||
1854 | } | ||
1855 | |||
1856 | ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); | ||
1857 | if (ret_val < 0) | ||
1858 | goto socket_setattr_failure; | ||
1859 | buf_len = ret_val; | ||
1788 | 1860 | ||
1789 | /* We can't use ip_options_get() directly because it makes a call to | 1861 | /* We can't use ip_options_get() directly because it makes a call to |
1790 | * ip_options_get_alloc() which allocates memory with GFP_KERNEL and | 1862 | * ip_options_get_alloc() which allocates memory with GFP_KERNEL and |
@@ -1822,6 +1894,80 @@ socket_setattr_failure: | |||
1822 | } | 1894 | } |
1823 | 1895 | ||
1824 | /** | 1896 | /** |
1897 | * cipso_v4_sock_delattr - Delete the CIPSO option from a socket | ||
1898 | * @sk: the socket | ||
1899 | * | ||
1900 | * Description: | ||
1901 | * Removes the CIPSO option from a socket, if present. | ||
1902 | * | ||
1903 | */ | ||
1904 | void cipso_v4_sock_delattr(struct sock *sk) | ||
1905 | { | ||
1906 | u8 hdr_delta; | ||
1907 | struct ip_options *opt; | ||
1908 | struct inet_sock *sk_inet; | ||
1909 | |||
1910 | sk_inet = inet_sk(sk); | ||
1911 | opt = sk_inet->opt; | ||
1912 | if (opt == NULL || opt->cipso == 0) | ||
1913 | return; | ||
1914 | |||
1915 | if (opt->srr || opt->rr || opt->ts || opt->router_alert) { | ||
1916 | u8 cipso_len; | ||
1917 | u8 cipso_off; | ||
1918 | unsigned char *cipso_ptr; | ||
1919 | int iter; | ||
1920 | int optlen_new; | ||
1921 | |||
1922 | cipso_off = opt->cipso - sizeof(struct iphdr); | ||
1923 | cipso_ptr = &opt->__data[cipso_off]; | ||
1924 | cipso_len = cipso_ptr[1]; | ||
1925 | |||
1926 | if (opt->srr > opt->cipso) | ||
1927 | opt->srr -= cipso_len; | ||
1928 | if (opt->rr > opt->cipso) | ||
1929 | opt->rr -= cipso_len; | ||
1930 | if (opt->ts > opt->cipso) | ||
1931 | opt->ts -= cipso_len; | ||
1932 | if (opt->router_alert > opt->cipso) | ||
1933 | opt->router_alert -= cipso_len; | ||
1934 | opt->cipso = 0; | ||
1935 | |||
1936 | memmove(cipso_ptr, cipso_ptr + cipso_len, | ||
1937 | opt->optlen - cipso_off - cipso_len); | ||
1938 | |||
1939 | /* determining the new total option length is tricky because of | ||
1940 | * the padding necessary, the only thing i can think to do at | ||
1941 | * this point is walk the options one-by-one, skipping the | ||
1942 | * padding at the end to determine the actual option size and | ||
1943 | * from there we can determine the new total option length */ | ||
1944 | iter = 0; | ||
1945 | optlen_new = 0; | ||
1946 | while (iter < opt->optlen) | ||
1947 | if (opt->__data[iter] != IPOPT_NOP) { | ||
1948 | iter += opt->__data[iter + 1]; | ||
1949 | optlen_new = iter; | ||
1950 | } else | ||
1951 | iter++; | ||
1952 | hdr_delta = opt->optlen; | ||
1953 | opt->optlen = (optlen_new + 3) & ~3; | ||
1954 | hdr_delta -= opt->optlen; | ||
1955 | } else { | ||
1956 | /* only the cipso option was present on the socket so we can | ||
1957 | * remove the entire option struct */ | ||
1958 | sk_inet->opt = NULL; | ||
1959 | hdr_delta = opt->optlen; | ||
1960 | kfree(opt); | ||
1961 | } | ||
1962 | |||
1963 | if (sk_inet->is_icsk && hdr_delta > 0) { | ||
1964 | struct inet_connection_sock *sk_conn = inet_csk(sk); | ||
1965 | sk_conn->icsk_ext_hdr_len -= hdr_delta; | ||
1966 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); | ||
1967 | } | ||
1968 | } | ||
1969 | |||
1970 | /** | ||
1825 | * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions | 1971 | * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions |
1826 | * @cipso: the CIPSO v4 option | 1972 | * @cipso: the CIPSO v4 option |
1827 | * @secattr: the security attributes | 1973 | * @secattr: the security attributes |
@@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso, | |||
1859 | case CIPSO_V4_TAG_RANGE: | 2005 | case CIPSO_V4_TAG_RANGE: |
1860 | ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); | 2006 | ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); |
1861 | break; | 2007 | break; |
2008 | case CIPSO_V4_TAG_LOCAL: | ||
2009 | ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); | ||
2010 | break; | ||
1862 | } | 2011 | } |
1863 | if (ret_val == 0) | 2012 | if (ret_val == 0) |
1864 | secattr->type = NETLBL_NLTYPE_CIPSOV4; | 2013 | secattr->type = NETLBL_NLTYPE_CIPSOV4; |
@@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) | |||
1893 | } | 2042 | } |
1894 | 2043 | ||
1895 | /** | 2044 | /** |
2045 | * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet | ||
2046 | * @skb: the packet | ||
2047 | * @secattr: the security attributes | ||
2048 | * | ||
2049 | * Description: | ||
2050 | * Set the CIPSO option on the given packet based on the security attributes. | ||
2051 | * Returns a pointer to the IP header on success and NULL on failure. | ||
2052 | * | ||
2053 | */ | ||
2054 | int cipso_v4_skbuff_setattr(struct sk_buff *skb, | ||
2055 | const struct cipso_v4_doi *doi_def, | ||
2056 | const struct netlbl_lsm_secattr *secattr) | ||
2057 | { | ||
2058 | int ret_val; | ||
2059 | struct iphdr *iph; | ||
2060 | struct ip_options *opt = &IPCB(skb)->opt; | ||
2061 | unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; | ||
2062 | u32 buf_len = CIPSO_V4_OPT_LEN_MAX; | ||
2063 | u32 opt_len; | ||
2064 | int len_delta; | ||
2065 | |||
2066 | buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); | ||
2067 | if (buf_len < 0) | ||
2068 | return buf_len; | ||
2069 | opt_len = (buf_len + 3) & ~3; | ||
2070 | |||
2071 | /* we overwrite any existing options to ensure that we have enough | ||
2072 | * room for the CIPSO option, the reason is that we _need_ to guarantee | ||
2073 | * that the security label is applied to the packet - we do the same | ||
2074 | * thing when using the socket options and it hasn't caused a problem, | ||
2075 | * if we need to we can always revisit this choice later */ | ||
2076 | |||
2077 | len_delta = opt_len - opt->optlen; | ||
2078 | /* if we don't ensure enough headroom we could panic on the skb_push() | ||
2079 | * call below so make sure we have enough, we are also "mangling" the | ||
2080 | * packet so we should probably do a copy-on-write call anyway */ | ||
2081 | ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); | ||
2082 | if (ret_val < 0) | ||
2083 | return ret_val; | ||
2084 | |||
2085 | if (len_delta > 0) { | ||
2086 | /* we assume that the header + opt->optlen have already been | ||
2087 | * "pushed" in ip_options_build() or similar */ | ||
2088 | iph = ip_hdr(skb); | ||
2089 | skb_push(skb, len_delta); | ||
2090 | memmove((char *)iph - len_delta, iph, iph->ihl << 2); | ||
2091 | skb_reset_network_header(skb); | ||
2092 | iph = ip_hdr(skb); | ||
2093 | } else if (len_delta < 0) { | ||
2094 | iph = ip_hdr(skb); | ||
2095 | memset(iph + 1, IPOPT_NOP, opt->optlen); | ||
2096 | } else | ||
2097 | iph = ip_hdr(skb); | ||
2098 | |||
2099 | if (opt->optlen > 0) | ||
2100 | memset(opt, 0, sizeof(*opt)); | ||
2101 | opt->optlen = opt_len; | ||
2102 | opt->cipso = sizeof(struct iphdr); | ||
2103 | opt->is_changed = 1; | ||
2104 | |||
2105 | /* we have to do the following because we are being called from a | ||
2106 | * netfilter hook which means the packet already has had the header | ||
2107 | * fields populated and the checksum calculated - yes this means we | ||
2108 | * are doing more work than needed but we do it to keep the core | ||
2109 | * stack clean and tidy */ | ||
2110 | memcpy(iph + 1, buf, buf_len); | ||
2111 | if (opt_len > buf_len) | ||
2112 | memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); | ||
2113 | if (len_delta != 0) { | ||
2114 | iph->ihl = 5 + (opt_len >> 2); | ||
2115 | iph->tot_len = htons(skb->len); | ||
2116 | } | ||
2117 | ip_send_check(iph); | ||
2118 | |||
2119 | return 0; | ||
2120 | } | ||
2121 | |||
2122 | /** | ||
2123 | * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet | ||
2124 | * @skb: the packet | ||
2125 | * | ||
2126 | * Description: | ||
2127 | * Removes any and all CIPSO options from the given packet. Returns zero on | ||
2128 | * success, negative values on failure. | ||
2129 | * | ||
2130 | */ | ||
2131 | int cipso_v4_skbuff_delattr(struct sk_buff *skb) | ||
2132 | { | ||
2133 | int ret_val; | ||
2134 | struct iphdr *iph; | ||
2135 | struct ip_options *opt = &IPCB(skb)->opt; | ||
2136 | unsigned char *cipso_ptr; | ||
2137 | |||
2138 | if (opt->cipso == 0) | ||
2139 | return 0; | ||
2140 | |||
2141 | /* since we are changing the packet we should make a copy */ | ||
2142 | ret_val = skb_cow(skb, skb_headroom(skb)); | ||
2143 | if (ret_val < 0) | ||
2144 | return ret_val; | ||
2145 | |||
2146 | /* the easiest thing to do is just replace the cipso option with noop | ||
2147 | * options since we don't change the size of the packet, although we | ||
2148 | * still need to recalculate the checksum */ | ||
2149 | |||
2150 | iph = ip_hdr(skb); | ||
2151 | cipso_ptr = (unsigned char *)iph + opt->cipso; | ||
2152 | memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); | ||
2153 | opt->cipso = 0; | ||
2154 | opt->is_changed = 1; | ||
2155 | |||
2156 | ip_send_check(iph); | ||
2157 | |||
2158 | return 0; | ||
2159 | } | ||
2160 | |||
2161 | /** | ||
1896 | * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option | 2162 | * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option |
1897 | * @skb: the packet | 2163 | * @skb: the packet |
1898 | * @secattr: the security attributes | 2164 | * @secattr: the security attributes |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index abef49376ac8..56fce3ab6c55 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1281,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, | |||
1281 | return ret; | 1281 | return ret; |
1282 | } | 1282 | } |
1283 | 1283 | ||
1284 | static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, | 1284 | static int devinet_conf_sysctl(ctl_table *table, |
1285 | void __user *oldval, size_t __user *oldlenp, | 1285 | void __user *oldval, size_t __user *oldlenp, |
1286 | void __user *newval, size_t newlen) | 1286 | void __user *newval, size_t newlen) |
1287 | { | 1287 | { |
@@ -1377,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
1377 | return ret; | 1377 | return ret; |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, | 1380 | int ipv4_doint_and_flush_strategy(ctl_table *table, |
1381 | void __user *oldval, size_t __user *oldlenp, | 1381 | void __user *oldval, size_t __user *oldlenp, |
1382 | void __user *newval, size_t newlen) | 1382 | void __user *newval, size_t newlen) |
1383 | { | 1383 | { |
1384 | int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, | 1384 | int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); |
1385 | newval, newlen); | ||
1386 | struct net *net = table->extra2; | 1385 | struct net *net = table->extra2; |
1387 | 1386 | ||
1388 | if (ret == 1) | 1387 | if (ret == 1) |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index be3f18a7a40e..2c88da6e7862 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -438,7 +438,7 @@ int ip_options_compile(struct net *net, | |||
438 | goto error; | 438 | goto error; |
439 | } | 439 | } |
440 | opt->cipso = optptr - iph; | 440 | opt->cipso = optptr - iph; |
441 | if (cipso_v4_validate(&optptr)) { | 441 | if (cipso_v4_validate(skb, &optptr)) { |
442 | pp_ptr = optptr; | 442 | pp_ptr = optptr; |
443 | goto error; | 443 | goto error; |
444 | } | 444 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8d23cc7efbad..2ea6dcc3e2cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -2913,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | |||
2913 | } | 2913 | } |
2914 | 2914 | ||
2915 | static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, | 2915 | static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, |
2916 | int __user *name, | ||
2917 | int nlen, | ||
2918 | void __user *oldval, | 2916 | void __user *oldval, |
2919 | size_t __user *oldlenp, | 2917 | size_t __user *oldlenp, |
2920 | void __user *newval, | 2918 | void __user *newval, |
@@ -2977,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, | |||
2977 | } | 2975 | } |
2978 | 2976 | ||
2979 | static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, | 2977 | static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, |
2980 | int __user *name, | ||
2981 | int nlen, | ||
2982 | void __user *oldval, | 2978 | void __user *oldval, |
2983 | size_t __user *oldlenp, | 2979 | size_t __user *oldlenp, |
2984 | void __user *newval, | 2980 | void __user *newval, |
2985 | size_t newlen) | 2981 | size_t newlen) |
2986 | { | 2982 | { |
2987 | int old = ip_rt_secret_interval; | 2983 | int old = ip_rt_secret_interval; |
2988 | int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, | 2984 | int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); |
2989 | newlen); | ||
2990 | 2985 | ||
2991 | rt_secret_reschedule(old); | 2986 | rt_secret_reschedule(old); |
2992 | 2987 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 276d047fb85a..1bb10df8ce7d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, | |||
64 | } | 64 | } |
65 | 65 | ||
66 | /* Validate changes from sysctl interface. */ | 66 | /* Validate changes from sysctl interface. */ |
67 | static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, | 67 | static int ipv4_sysctl_local_port_range(ctl_table *table, |
68 | int nlen, void __user *oldval, | 68 | void __user *oldval, |
69 | size_t __user *oldlenp, | 69 | size_t __user *oldlenp, |
70 | void __user *newval, size_t newlen) | 70 | void __user *newval, size_t newlen) |
71 | { | 71 | { |
@@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, | |||
80 | }; | 80 | }; |
81 | 81 | ||
82 | inet_get_local_port_range(range, range + 1); | 82 | inet_get_local_port_range(range, range + 1); |
83 | ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); | 83 | ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); |
84 | if (ret == 0 && newval && newlen) { | 84 | if (ret == 0 && newval && newlen) { |
85 | if (range[1] < range[0]) | 85 | if (range[1] < range[0]) |
86 | ret = -EINVAL; | 86 | ret = -EINVAL; |
@@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * | |||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
111 | 111 | ||
112 | static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, | 112 | static int sysctl_tcp_congestion_control(ctl_table *table, |
113 | int nlen, void __user *oldval, | 113 | void __user *oldval, |
114 | size_t __user *oldlenp, | 114 | size_t __user *oldlenp, |
115 | void __user *newval, size_t newlen) | 115 | void __user *newval, size_t newlen) |
116 | { | 116 | { |
@@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, | |||
122 | int ret; | 122 | int ret; |
123 | 123 | ||
124 | tcp_get_default_congestion_control(val); | 124 | tcp_get_default_congestion_control(val); |
125 | ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); | 125 | ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); |
126 | if (ret == 1 && newval && newlen) | 126 | if (ret == 1 && newval && newlen) |
127 | ret = tcp_set_default_congestion_control(val); | 127 | ret = tcp_set_default_congestion_control(val); |
128 | return ret; | 128 | return ret; |
@@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, | |||
165 | return ret; | 165 | return ret; |
166 | } | 166 | } |
167 | 167 | ||
168 | static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, | 168 | static int strategy_allowed_congestion_control(ctl_table *table, |
169 | int nlen, void __user *oldval, | 169 | void __user *oldval, |
170 | size_t __user *oldlenp, | 170 | size_t __user *oldlenp, |
171 | void __user *newval, | 171 | void __user *newval, |
172 | size_t newlen) | 172 | size_t newlen) |
@@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam | |||
179 | return -ENOMEM; | 179 | return -ENOMEM; |
180 | 180 | ||
181 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); | 181 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); |
182 | ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); | 182 | ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); |
183 | if (ret == 1 && newval && newlen) | 183 | if (ret == 1 && newval && newlen) |
184 | ret = tcp_set_allowed_congestion_control(tbl.data); | 184 | ret = tcp_set_allowed_congestion_control(tbl.data); |
185 | kfree(tbl.data); | 185 | kfree(tbl.data); |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7b6a584b62dd..eea9542728ca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, | |||
3982 | } | 3982 | } |
3983 | 3983 | ||
3984 | static int addrconf_sysctl_forward_strategy(ctl_table *table, | 3984 | static int addrconf_sysctl_forward_strategy(ctl_table *table, |
3985 | int __user *name, int nlen, | ||
3986 | void __user *oldval, | 3985 | void __user *oldval, |
3987 | size_t __user *oldlenp, | 3986 | size_t __user *oldlenp, |
3988 | void __user *newval, size_t newlen) | 3987 | void __user *newval, size_t newlen) |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index aae7ddcc8a2e..172438320eec 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f | |||
1730 | return ret; | 1730 | return ret; |
1731 | } | 1731 | } |
1732 | 1732 | ||
1733 | int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, | 1733 | int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, |
1734 | int nlen, void __user *oldval, | 1734 | void __user *oldval, size_t __user *oldlenp, |
1735 | size_t __user *oldlenp, | ||
1736 | void __user *newval, size_t newlen) | 1735 | void __user *newval, size_t newlen) |
1737 | { | 1736 | { |
1738 | struct net_device *dev = ctl->extra1; | 1737 | struct net_device *dev = ctl->extra1; |
@@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, | |||
1745 | 1744 | ||
1746 | switch (ctl->ctl_name) { | 1745 | switch (ctl->ctl_name) { |
1747 | case NET_NEIGH_REACHABLE_TIME: | 1746 | case NET_NEIGH_REACHABLE_TIME: |
1748 | ret = sysctl_jiffies(ctl, name, nlen, | 1747 | ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); |
1749 | oldval, oldlenp, newval, newlen); | ||
1750 | break; | 1748 | break; |
1751 | case NET_NEIGH_RETRANS_TIME_MS: | 1749 | case NET_NEIGH_RETRANS_TIME_MS: |
1752 | case NET_NEIGH_REACHABLE_TIME_MS: | 1750 | case NET_NEIGH_REACHABLE_TIME_MS: |
1753 | ret = sysctl_ms_jiffies(ctl, name, nlen, | 1751 | ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); |
1754 | oldval, oldlenp, newval, newlen); | ||
1755 | break; | 1752 | break; |
1756 | default: | 1753 | default: |
1757 | ret = 0; | 1754 | ret = 0; |
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 03591d37b9cc..b92df5c1dfcf 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c | |||
@@ -115,7 +115,7 @@ int nf_conntrack_acct_init(struct net *net) | |||
115 | 115 | ||
116 | if (net_eq(net, &init_net)) { | 116 | if (net_eq(net, &init_net)) { |
117 | #ifdef CONFIG_NF_CT_ACCT | 117 | #ifdef CONFIG_NF_CT_ACCT |
118 | printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); | 118 | printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); |
119 | printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); | 119 | printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); |
120 | printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); | 120 | printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); |
121 | #endif | 121 | #endif |
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 373e51e91ce5..1bc3001d1827 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c | |||
@@ -65,7 +65,7 @@ void | |||
65 | struct nf_conntrack_expect *exp) __read_mostly; | 65 | struct nf_conntrack_expect *exp) __read_mostly; |
66 | EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); | 66 | EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); |
67 | 67 | ||
68 | #ifdef DEBUG | 68 | #if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) |
69 | /* PptpControlMessageType names */ | 69 | /* PptpControlMessageType names */ |
70 | const char *const pptp_msg_name[] = { | 70 | const char *const pptp_msg_name[] = { |
71 | "UNKNOWN_MESSAGE", | 71 | "UNKNOWN_MESSAGE", |
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index 8af18c0a47d9..ea750e9df65f 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile | |||
@@ -5,7 +5,8 @@ | |||
5 | # | 5 | # |
6 | 6 | ||
7 | # base objects | 7 | # base objects |
8 | obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o | 8 | obj-y := netlabel_user.o netlabel_kapi.o |
9 | obj-y += netlabel_domainhash.o netlabel_addrlist.o | ||
9 | 10 | ||
10 | # management objects | 11 | # management objects |
11 | obj-y += netlabel_mgmt.o | 12 | obj-y += netlabel_mgmt.o |
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c new file mode 100644 index 000000000000..b0925a303353 --- /dev/null +++ b/net/netlabel/netlabel_addrlist.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * NetLabel Network Address Lists | ||
3 | * | ||
4 | * This file contains network address list functions used to manage ordered | ||
5 | * lists of network addresses for use by the NetLabel subsystem. The NetLabel | ||
6 | * system manages static and dynamic label mappings for network protocols such | ||
7 | * as CIPSO and RIPSO. | ||
8 | * | ||
9 | * Author: Paul Moore <paul.moore@hp.com> | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or modify | ||
17 | * it under the terms of the GNU General Public License as published by | ||
18 | * the Free Software Foundation; either version 2 of the License, or | ||
19 | * (at your option) any later version. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
24 | * the GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/types.h> | ||
33 | #include <linux/rcupdate.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/in.h> | ||
37 | #include <linux/in6.h> | ||
38 | #include <linux/ip.h> | ||
39 | #include <linux/ipv6.h> | ||
40 | #include <net/ip.h> | ||
41 | #include <net/ipv6.h> | ||
42 | #include <linux/audit.h> | ||
43 | |||
44 | #include "netlabel_addrlist.h" | ||
45 | |||
46 | /* | ||
47 | * Address List Functions | ||
48 | */ | ||
49 | |||
50 | /** | ||
51 | * netlbl_af4list_search - Search for a matching IPv4 address entry | ||
52 | * @addr: IPv4 address | ||
53 | * @head: the list head | ||
54 | * | ||
55 | * Description: | ||
56 | * Searches the IPv4 address list given by @head. If a matching address entry | ||
57 | * is found it is returned, otherwise NULL is returned. The caller is | ||
58 | * responsible for calling the rcu_read_[un]lock() functions. | ||
59 | * | ||
60 | */ | ||
61 | struct netlbl_af4list *netlbl_af4list_search(__be32 addr, | ||
62 | struct list_head *head) | ||
63 | { | ||
64 | struct netlbl_af4list *iter; | ||
65 | |||
66 | list_for_each_entry_rcu(iter, head, list) | ||
67 | if (iter->valid && (addr & iter->mask) == iter->addr) | ||
68 | return iter; | ||
69 | |||
70 | return NULL; | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * netlbl_af4list_search_exact - Search for an exact IPv4 address entry | ||
75 | * @addr: IPv4 address | ||
76 | * @mask: IPv4 address mask | ||
77 | * @head: the list head | ||
78 | * | ||
79 | * Description: | ||
80 | * Searches the IPv4 address list given by @head. If an exact match if found | ||
81 | * it is returned, otherwise NULL is returned. The caller is responsible for | ||
82 | * calling the rcu_read_[un]lock() functions. | ||
83 | * | ||
84 | */ | ||
85 | struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, | ||
86 | __be32 mask, | ||
87 | struct list_head *head) | ||
88 | { | ||
89 | struct netlbl_af4list *iter; | ||
90 | |||
91 | list_for_each_entry_rcu(iter, head, list) | ||
92 | if (iter->valid && iter->addr == addr && iter->mask == mask) | ||
93 | return iter; | ||
94 | |||
95 | return NULL; | ||
96 | } | ||
97 | |||
98 | |||
99 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
100 | /** | ||
101 | * netlbl_af6list_search - Search for a matching IPv6 address entry | ||
102 | * @addr: IPv6 address | ||
103 | * @head: the list head | ||
104 | * | ||
105 | * Description: | ||
106 | * Searches the IPv6 address list given by @head. If a matching address entry | ||
107 | * is found it is returned, otherwise NULL is returned. The caller is | ||
108 | * responsible for calling the rcu_read_[un]lock() functions. | ||
109 | * | ||
110 | */ | ||
111 | struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, | ||
112 | struct list_head *head) | ||
113 | { | ||
114 | struct netlbl_af6list *iter; | ||
115 | |||
116 | list_for_each_entry_rcu(iter, head, list) | ||
117 | if (iter->valid && | ||
118 | ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) | ||
119 | return iter; | ||
120 | |||
121 | return NULL; | ||
122 | } | ||
123 | |||
124 | /** | ||
125 | * netlbl_af6list_search_exact - Search for an exact IPv6 address entry | ||
126 | * @addr: IPv6 address | ||
127 | * @mask: IPv6 address mask | ||
128 | * @head: the list head | ||
129 | * | ||
130 | * Description: | ||
131 | * Searches the IPv6 address list given by @head. If an exact match if found | ||
132 | * it is returned, otherwise NULL is returned. The caller is responsible for | ||
133 | * calling the rcu_read_[un]lock() functions. | ||
134 | * | ||
135 | */ | ||
136 | struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, | ||
137 | const struct in6_addr *mask, | ||
138 | struct list_head *head) | ||
139 | { | ||
140 | struct netlbl_af6list *iter; | ||
141 | |||
142 | list_for_each_entry_rcu(iter, head, list) | ||
143 | if (iter->valid && | ||
144 | ipv6_addr_equal(&iter->addr, addr) && | ||
145 | ipv6_addr_equal(&iter->mask, mask)) | ||
146 | return iter; | ||
147 | |||
148 | return NULL; | ||
149 | } | ||
150 | #endif /* IPv6 */ | ||
151 | |||
152 | /** | ||
153 | * netlbl_af4list_add - Add a new IPv4 address entry to a list | ||
154 | * @entry: address entry | ||
155 | * @head: the list head | ||
156 | * | ||
157 | * Description: | ||
158 | * Add a new address entry to the list pointed to by @head. On success zero is | ||
159 | * returned, otherwise a negative value is returned. The caller is responsible | ||
160 | * for calling the necessary locking functions. | ||
161 | * | ||
162 | */ | ||
163 | int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head) | ||
164 | { | ||
165 | struct netlbl_af4list *iter; | ||
166 | |||
167 | iter = netlbl_af4list_search(entry->addr, head); | ||
168 | if (iter != NULL && | ||
169 | iter->addr == entry->addr && iter->mask == entry->mask) | ||
170 | return -EEXIST; | ||
171 | |||
172 | /* in order to speed up address searches through the list (the common | ||
173 | * case) we need to keep the list in order based on the size of the | ||
174 | * address mask such that the entry with the widest mask (smallest | ||
175 | * numerical value) appears first in the list */ | ||
176 | list_for_each_entry_rcu(iter, head, list) | ||
177 | if (iter->valid && | ||
178 | ntohl(entry->mask) > ntohl(iter->mask)) { | ||
179 | __list_add_rcu(&entry->list, | ||
180 | iter->list.prev, | ||
181 | &iter->list); | ||
182 | return 0; | ||
183 | } | ||
184 | list_add_tail_rcu(&entry->list, head); | ||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
189 | /** | ||
190 | * netlbl_af6list_add - Add a new IPv6 address entry to a list | ||
191 | * @entry: address entry | ||
192 | * @head: the list head | ||
193 | * | ||
194 | * Description: | ||
195 | * Add a new address entry to the list pointed to by @head. On success zero is | ||
196 | * returned, otherwise a negative value is returned. The caller is responsible | ||
197 | * for calling the necessary locking functions. | ||
198 | * | ||
199 | */ | ||
200 | int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head) | ||
201 | { | ||
202 | struct netlbl_af6list *iter; | ||
203 | |||
204 | iter = netlbl_af6list_search(&entry->addr, head); | ||
205 | if (iter != NULL && | ||
206 | ipv6_addr_equal(&iter->addr, &entry->addr) && | ||
207 | ipv6_addr_equal(&iter->mask, &entry->mask)) | ||
208 | return -EEXIST; | ||
209 | |||
210 | /* in order to speed up address searches through the list (the common | ||
211 | * case) we need to keep the list in order based on the size of the | ||
212 | * address mask such that the entry with the widest mask (smallest | ||
213 | * numerical value) appears first in the list */ | ||
214 | list_for_each_entry_rcu(iter, head, list) | ||
215 | if (iter->valid && | ||
216 | ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { | ||
217 | __list_add_rcu(&entry->list, | ||
218 | iter->list.prev, | ||
219 | &iter->list); | ||
220 | return 0; | ||
221 | } | ||
222 | list_add_tail_rcu(&entry->list, head); | ||
223 | return 0; | ||
224 | } | ||
225 | #endif /* IPv6 */ | ||
226 | |||
227 | /** | ||
228 | * netlbl_af4list_remove_entry - Remove an IPv4 address entry | ||
229 | * @entry: address entry | ||
230 | * | ||
231 | * Description: | ||
232 | * Remove the specified IP address entry. The caller is responsible for | ||
233 | * calling the necessary locking functions. | ||
234 | * | ||
235 | */ | ||
236 | void netlbl_af4list_remove_entry(struct netlbl_af4list *entry) | ||
237 | { | ||
238 | entry->valid = 0; | ||
239 | list_del_rcu(&entry->list); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * netlbl_af4list_remove - Remove an IPv4 address entry | ||
244 | * @addr: IP address | ||
245 | * @mask: IP address mask | ||
246 | * @head: the list head | ||
247 | * | ||
248 | * Description: | ||
249 | * Remove an IP address entry from the list pointed to by @head. Returns the | ||
250 | * entry on success, NULL on failure. The caller is responsible for calling | ||
251 | * the necessary locking functions. | ||
252 | * | ||
253 | */ | ||
254 | struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, | ||
255 | struct list_head *head) | ||
256 | { | ||
257 | struct netlbl_af4list *entry; | ||
258 | |||
259 | entry = netlbl_af4list_search(addr, head); | ||
260 | if (entry != NULL && entry->addr == addr && entry->mask == mask) { | ||
261 | netlbl_af4list_remove_entry(entry); | ||
262 | return entry; | ||
263 | } | ||
264 | |||
265 | return NULL; | ||
266 | } | ||
267 | |||
268 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
269 | /** | ||
270 | * netlbl_af6list_remove_entry - Remove an IPv6 address entry | ||
271 | * @entry: address entry | ||
272 | * | ||
273 | * Description: | ||
274 | * Remove the specified IP address entry. The caller is responsible for | ||
275 | * calling the necessary locking functions. | ||
276 | * | ||
277 | */ | ||
278 | void netlbl_af6list_remove_entry(struct netlbl_af6list *entry) | ||
279 | { | ||
280 | entry->valid = 0; | ||
281 | list_del_rcu(&entry->list); | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * netlbl_af6list_remove - Remove an IPv6 address entry | ||
286 | * @addr: IP address | ||
287 | * @mask: IP address mask | ||
288 | * @head: the list head | ||
289 | * | ||
290 | * Description: | ||
291 | * Remove an IP address entry from the list pointed to by @head. Returns the | ||
292 | * entry on success, NULL on failure. The caller is responsible for calling | ||
293 | * the necessary locking functions. | ||
294 | * | ||
295 | */ | ||
296 | struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, | ||
297 | const struct in6_addr *mask, | ||
298 | struct list_head *head) | ||
299 | { | ||
300 | struct netlbl_af6list *entry; | ||
301 | |||
302 | entry = netlbl_af6list_search(addr, head); | ||
303 | if (entry != NULL && | ||
304 | ipv6_addr_equal(&entry->addr, addr) && | ||
305 | ipv6_addr_equal(&entry->mask, mask)) { | ||
306 | netlbl_af6list_remove_entry(entry); | ||
307 | return entry; | ||
308 | } | ||
309 | |||
310 | return NULL; | ||
311 | } | ||
312 | #endif /* IPv6 */ | ||
313 | |||
314 | /* | ||
315 | * Audit Helper Functions | ||
316 | */ | ||
317 | |||
318 | /** | ||
319 | * netlbl_af4list_audit_addr - Audit an IPv4 address | ||
320 | * @audit_buf: audit buffer | ||
321 | * @src: true if source address, false if destination | ||
322 | * @dev: network interface | ||
323 | * @addr: IP address | ||
324 | * @mask: IP address mask | ||
325 | * | ||
326 | * Description: | ||
327 | * Write the IPv4 address and address mask, if necessary, to @audit_buf. | ||
328 | * | ||
329 | */ | ||
330 | void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, | ||
331 | int src, const char *dev, | ||
332 | __be32 addr, __be32 mask) | ||
333 | { | ||
334 | u32 mask_val = ntohl(mask); | ||
335 | char *dir = (src ? "src" : "dst"); | ||
336 | |||
337 | if (dev != NULL) | ||
338 | audit_log_format(audit_buf, " netif=%s", dev); | ||
339 | audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr)); | ||
340 | if (mask_val != 0xffffffff) { | ||
341 | u32 mask_len = 0; | ||
342 | while (mask_val > 0) { | ||
343 | mask_val <<= 1; | ||
344 | mask_len++; | ||
345 | } | ||
346 | audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
351 | /** | ||
352 | * netlbl_af6list_audit_addr - Audit an IPv6 address | ||
353 | * @audit_buf: audit buffer | ||
354 | * @src: true if source address, false if destination | ||
355 | * @dev: network interface | ||
356 | * @addr: IP address | ||
357 | * @mask: IP address mask | ||
358 | * | ||
359 | * Description: | ||
360 | * Write the IPv6 address and address mask, if necessary, to @audit_buf. | ||
361 | * | ||
362 | */ | ||
363 | void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, | ||
364 | int src, | ||
365 | const char *dev, | ||
366 | const struct in6_addr *addr, | ||
367 | const struct in6_addr *mask) | ||
368 | { | ||
369 | char *dir = (src ? "src" : "dst"); | ||
370 | |||
371 | if (dev != NULL) | ||
372 | audit_log_format(audit_buf, " netif=%s", dev); | ||
373 | audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr)); | ||
374 | if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { | ||
375 | u32 mask_len = 0; | ||
376 | u32 mask_val; | ||
377 | int iter = -1; | ||
378 | while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) | ||
379 | mask_len += 32; | ||
380 | mask_val = ntohl(mask->s6_addr32[iter]); | ||
381 | while (mask_val > 0) { | ||
382 | mask_val <<= 1; | ||
383 | mask_len++; | ||
384 | } | ||
385 | audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); | ||
386 | } | ||
387 | } | ||
388 | #endif /* IPv6 */ | ||
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h new file mode 100644 index 000000000000..0242bead405f --- /dev/null +++ b/net/netlabel/netlabel_addrlist.h | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * NetLabel Network Address Lists | ||
3 | * | ||
4 | * This file contains network address list functions used to manage ordered | ||
5 | * lists of network addresses for use by the NetLabel subsystem. The NetLabel | ||
6 | * system manages static and dynamic label mappings for network protocols such | ||
7 | * as CIPSO and RIPSO. | ||
8 | * | ||
9 | * Author: Paul Moore <paul.moore@hp.com> | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or modify | ||
17 | * it under the terms of the GNU General Public License as published by | ||
18 | * the Free Software Foundation; either version 2 of the License, or | ||
19 | * (at your option) any later version. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
24 | * the GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #ifndef _NETLABEL_ADDRLIST_H | ||
33 | #define _NETLABEL_ADDRLIST_H | ||
34 | |||
35 | #include <linux/types.h> | ||
36 | #include <linux/rcupdate.h> | ||
37 | #include <linux/list.h> | ||
38 | #include <linux/in6.h> | ||
39 | #include <linux/audit.h> | ||
40 | |||
41 | /** | ||
42 | * struct netlbl_af4list - NetLabel IPv4 address list | ||
43 | * @addr: IPv4 address | ||
44 | * @mask: IPv4 address mask | ||
45 | * @valid: valid flag | ||
46 | * @list: list structure, used internally | ||
47 | */ | ||
48 | struct netlbl_af4list { | ||
49 | __be32 addr; | ||
50 | __be32 mask; | ||
51 | |||
52 | u32 valid; | ||
53 | struct list_head list; | ||
54 | }; | ||
55 | |||
56 | /** | ||
57 | * struct netlbl_af6list - NetLabel IPv6 address list | ||
58 | * @addr: IPv6 address | ||
59 | * @mask: IPv6 address mask | ||
60 | * @valid: valid flag | ||
61 | * @list: list structure, used internally | ||
62 | */ | ||
63 | struct netlbl_af6list { | ||
64 | struct in6_addr addr; | ||
65 | struct in6_addr mask; | ||
66 | |||
67 | u32 valid; | ||
68 | struct list_head list; | ||
69 | }; | ||
70 | |||
71 | #define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list) | ||
72 | |||
73 | static inline struct netlbl_af4list *__af4list_valid(struct list_head *s, | ||
74 | struct list_head *h) | ||
75 | { | ||
76 | struct list_head *i = s; | ||
77 | struct netlbl_af4list *n = __af4list_entry(s); | ||
78 | while (i != h && !n->valid) { | ||
79 | i = i->next; | ||
80 | n = __af4list_entry(i); | ||
81 | } | ||
82 | return n; | ||
83 | } | ||
84 | |||
85 | static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s, | ||
86 | struct list_head *h) | ||
87 | { | ||
88 | struct list_head *i = s; | ||
89 | struct netlbl_af4list *n = __af4list_entry(s); | ||
90 | while (i != h && !n->valid) { | ||
91 | i = rcu_dereference(i->next); | ||
92 | n = __af4list_entry(i); | ||
93 | } | ||
94 | return n; | ||
95 | } | ||
96 | |||
97 | #define netlbl_af4list_foreach(iter, head) \ | ||
98 | for (iter = __af4list_valid((head)->next, head); \ | ||
99 | prefetch(iter->list.next), &iter->list != (head); \ | ||
100 | iter = __af4list_valid(iter->list.next, head)) | ||
101 | |||
102 | #define netlbl_af4list_foreach_rcu(iter, head) \ | ||
103 | for (iter = __af4list_valid_rcu((head)->next, head); \ | ||
104 | prefetch(iter->list.next), &iter->list != (head); \ | ||
105 | iter = __af4list_valid_rcu(iter->list.next, head)) | ||
106 | |||
107 | #define netlbl_af4list_foreach_safe(iter, tmp, head) \ | ||
108 | for (iter = __af4list_valid((head)->next, head), \ | ||
109 | tmp = __af4list_valid(iter->list.next, head); \ | ||
110 | &iter->list != (head); \ | ||
111 | iter = tmp, tmp = __af4list_valid(iter->list.next, head)) | ||
112 | |||
113 | int netlbl_af4list_add(struct netlbl_af4list *entry, | ||
114 | struct list_head *head); | ||
115 | struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, | ||
116 | struct list_head *head); | ||
117 | void netlbl_af4list_remove_entry(struct netlbl_af4list *entry); | ||
118 | struct netlbl_af4list *netlbl_af4list_search(__be32 addr, | ||
119 | struct list_head *head); | ||
120 | struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, | ||
121 | __be32 mask, | ||
122 | struct list_head *head); | ||
123 | void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, | ||
124 | int src, const char *dev, | ||
125 | __be32 addr, __be32 mask); | ||
126 | |||
127 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
128 | |||
129 | #define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list) | ||
130 | |||
131 | static inline struct netlbl_af6list *__af6list_valid(struct list_head *s, | ||
132 | struct list_head *h) | ||
133 | { | ||
134 | struct list_head *i = s; | ||
135 | struct netlbl_af6list *n = __af6list_entry(s); | ||
136 | while (i != h && !n->valid) { | ||
137 | i = i->next; | ||
138 | n = __af6list_entry(i); | ||
139 | } | ||
140 | return n; | ||
141 | } | ||
142 | |||
143 | static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s, | ||
144 | struct list_head *h) | ||
145 | { | ||
146 | struct list_head *i = s; | ||
147 | struct netlbl_af6list *n = __af6list_entry(s); | ||
148 | while (i != h && !n->valid) { | ||
149 | i = rcu_dereference(i->next); | ||
150 | n = __af6list_entry(i); | ||
151 | } | ||
152 | return n; | ||
153 | } | ||
154 | |||
155 | #define netlbl_af6list_foreach(iter, head) \ | ||
156 | for (iter = __af6list_valid((head)->next, head); \ | ||
157 | prefetch(iter->list.next), &iter->list != (head); \ | ||
158 | iter = __af6list_valid(iter->list.next, head)) | ||
159 | |||
160 | #define netlbl_af6list_foreach_rcu(iter, head) \ | ||
161 | for (iter = __af6list_valid_rcu((head)->next, head); \ | ||
162 | prefetch(iter->list.next), &iter->list != (head); \ | ||
163 | iter = __af6list_valid_rcu(iter->list.next, head)) | ||
164 | |||
165 | #define netlbl_af6list_foreach_safe(iter, tmp, head) \ | ||
166 | for (iter = __af6list_valid((head)->next, head), \ | ||
167 | tmp = __af6list_valid(iter->list.next, head); \ | ||
168 | &iter->list != (head); \ | ||
169 | iter = tmp, tmp = __af6list_valid(iter->list.next, head)) | ||
170 | |||
171 | int netlbl_af6list_add(struct netlbl_af6list *entry, | ||
172 | struct list_head *head); | ||
173 | struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, | ||
174 | const struct in6_addr *mask, | ||
175 | struct list_head *head); | ||
176 | void netlbl_af6list_remove_entry(struct netlbl_af6list *entry); | ||
177 | struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, | ||
178 | struct list_head *head); | ||
179 | struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, | ||
180 | const struct in6_addr *mask, | ||
181 | struct list_head *head); | ||
182 | void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, | ||
183 | int src, | ||
184 | const char *dev, | ||
185 | const struct in6_addr *addr, | ||
186 | const struct in6_addr *mask); | ||
187 | #endif /* IPV6 */ | ||
188 | |||
189 | #endif | ||
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 0aec318bf0ef..fff32b70efa9 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "netlabel_user.h" | 43 | #include "netlabel_user.h" |
44 | #include "netlabel_cipso_v4.h" | 44 | #include "netlabel_cipso_v4.h" |
45 | #include "netlabel_mgmt.h" | 45 | #include "netlabel_mgmt.h" |
46 | #include "netlabel_domainhash.h" | ||
46 | 47 | ||
47 | /* Argument struct for cipso_v4_doi_walk() */ | 48 | /* Argument struct for cipso_v4_doi_walk() */ |
48 | struct netlbl_cipsov4_doiwalk_arg { | 49 | struct netlbl_cipsov4_doiwalk_arg { |
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg { | |||
51 | u32 seq; | 52 | u32 seq; |
52 | }; | 53 | }; |
53 | 54 | ||
55 | /* Argument struct for netlbl_domhsh_walk() */ | ||
56 | struct netlbl_domhsh_walk_arg { | ||
57 | struct netlbl_audit *audit_info; | ||
58 | u32 doi; | ||
59 | }; | ||
60 | |||
54 | /* NetLabel Generic NETLINK CIPSOv4 family */ | 61 | /* NetLabel Generic NETLINK CIPSOv4 family */ |
55 | static struct genl_family netlbl_cipsov4_gnl_family = { | 62 | static struct genl_family netlbl_cipsov4_gnl_family = { |
56 | .id = GENL_ID_GENERATE, | 63 | .id = GENL_ID_GENERATE, |
@@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 | |||
81 | */ | 88 | */ |
82 | 89 | ||
83 | /** | 90 | /** |
84 | * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition | ||
85 | * @entry: the entry's RCU field | ||
86 | * | ||
87 | * Description: | ||
88 | * This function is designed to be used as a callback to the call_rcu() | ||
89 | * function so that the memory allocated to the DOI definition can be released | ||
90 | * safely. | ||
91 | * | ||
92 | */ | ||
93 | void netlbl_cipsov4_doi_free(struct rcu_head *entry) | ||
94 | { | ||
95 | struct cipso_v4_doi *ptr; | ||
96 | |||
97 | ptr = container_of(entry, struct cipso_v4_doi, rcu); | ||
98 | switch (ptr->type) { | ||
99 | case CIPSO_V4_MAP_STD: | ||
100 | kfree(ptr->map.std->lvl.cipso); | ||
101 | kfree(ptr->map.std->lvl.local); | ||
102 | kfree(ptr->map.std->cat.cipso); | ||
103 | kfree(ptr->map.std->cat.local); | ||
104 | break; | ||
105 | } | ||
106 | kfree(ptr); | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * netlbl_cipsov4_add_common - Parse the common sections of a ADD message | 91 | * netlbl_cipsov4_add_common - Parse the common sections of a ADD message |
111 | * @info: the Generic NETLINK info block | 92 | * @info: the Generic NETLINK info block |
112 | * @doi_def: the CIPSO V4 DOI definition | 93 | * @doi_def: the CIPSO V4 DOI definition |
@@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, | |||
151 | * @info: the Generic NETLINK info block | 132 | * @info: the Generic NETLINK info block |
152 | * | 133 | * |
153 | * Description: | 134 | * Description: |
154 | * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message | 135 | * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD |
155 | * and add it to the CIPSO V4 engine. Return zero on success and non-zero on | 136 | * message and add it to the CIPSO V4 engine. Return zero on success and |
156 | * error. | 137 | * non-zero on error. |
157 | * | 138 | * |
158 | */ | 139 | */ |
159 | static int netlbl_cipsov4_add_std(struct genl_info *info) | 140 | static int netlbl_cipsov4_add_std(struct genl_info *info) |
@@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) | |||
183 | ret_val = -ENOMEM; | 164 | ret_val = -ENOMEM; |
184 | goto add_std_failure; | 165 | goto add_std_failure; |
185 | } | 166 | } |
186 | doi_def->type = CIPSO_V4_MAP_STD; | 167 | doi_def->type = CIPSO_V4_MAP_TRANS; |
187 | 168 | ||
188 | ret_val = netlbl_cipsov4_add_common(info, doi_def); | 169 | ret_val = netlbl_cipsov4_add_common(info, doi_def); |
189 | if (ret_val != 0) | 170 | if (ret_val != 0) |
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) | |||
342 | 323 | ||
343 | add_std_failure: | 324 | add_std_failure: |
344 | if (doi_def) | 325 | if (doi_def) |
345 | netlbl_cipsov4_doi_free(&doi_def->rcu); | 326 | cipso_v4_doi_free(doi_def); |
346 | return ret_val; | 327 | return ret_val; |
347 | } | 328 | } |
348 | 329 | ||
@@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) | |||
379 | return 0; | 360 | return 0; |
380 | 361 | ||
381 | add_pass_failure: | 362 | add_pass_failure: |
382 | netlbl_cipsov4_doi_free(&doi_def->rcu); | 363 | cipso_v4_doi_free(doi_def); |
364 | return ret_val; | ||
365 | } | ||
366 | |||
367 | /** | ||
368 | * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition | ||
369 | * @info: the Generic NETLINK info block | ||
370 | * | ||
371 | * Description: | ||
372 | * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD | ||
373 | * message and add it to the CIPSO V4 engine. Return zero on success and | ||
374 | * non-zero on error. | ||
375 | * | ||
376 | */ | ||
377 | static int netlbl_cipsov4_add_local(struct genl_info *info) | ||
378 | { | ||
379 | int ret_val; | ||
380 | struct cipso_v4_doi *doi_def = NULL; | ||
381 | |||
382 | if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) | ||
383 | return -EINVAL; | ||
384 | |||
385 | doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); | ||
386 | if (doi_def == NULL) | ||
387 | return -ENOMEM; | ||
388 | doi_def->type = CIPSO_V4_MAP_LOCAL; | ||
389 | |||
390 | ret_val = netlbl_cipsov4_add_common(info, doi_def); | ||
391 | if (ret_val != 0) | ||
392 | goto add_local_failure; | ||
393 | |||
394 | ret_val = cipso_v4_doi_add(doi_def); | ||
395 | if (ret_val != 0) | ||
396 | goto add_local_failure; | ||
397 | return 0; | ||
398 | |||
399 | add_local_failure: | ||
400 | cipso_v4_doi_free(doi_def); | ||
383 | return ret_val; | 401 | return ret_val; |
384 | } | 402 | } |
385 | 403 | ||
@@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) | |||
412 | 430 | ||
413 | type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); | 431 | type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); |
414 | switch (type) { | 432 | switch (type) { |
415 | case CIPSO_V4_MAP_STD: | 433 | case CIPSO_V4_MAP_TRANS: |
416 | type_str = "std"; | 434 | type_str = "trans"; |
417 | ret_val = netlbl_cipsov4_add_std(info); | 435 | ret_val = netlbl_cipsov4_add_std(info); |
418 | break; | 436 | break; |
419 | case CIPSO_V4_MAP_PASS: | 437 | case CIPSO_V4_MAP_PASS: |
420 | type_str = "pass"; | 438 | type_str = "pass"; |
421 | ret_val = netlbl_cipsov4_add_pass(info); | 439 | ret_val = netlbl_cipsov4_add_pass(info); |
422 | break; | 440 | break; |
441 | case CIPSO_V4_MAP_LOCAL: | ||
442 | type_str = "local"; | ||
443 | ret_val = netlbl_cipsov4_add_local(info); | ||
444 | break; | ||
423 | } | 445 | } |
424 | if (ret_val == 0) | 446 | if (ret_val == 0) |
425 | atomic_inc(&netlabel_mgmt_protocount); | 447 | atomic_inc(&netlabel_mgmt_protocount); |
@@ -491,7 +513,7 @@ list_start: | |||
491 | doi_def = cipso_v4_doi_getdef(doi); | 513 | doi_def = cipso_v4_doi_getdef(doi); |
492 | if (doi_def == NULL) { | 514 | if (doi_def == NULL) { |
493 | ret_val = -EINVAL; | 515 | ret_val = -EINVAL; |
494 | goto list_failure; | 516 | goto list_failure_lock; |
495 | } | 517 | } |
496 | 518 | ||
497 | ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); | 519 | ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); |
@@ -516,7 +538,7 @@ list_start: | |||
516 | nla_nest_end(ans_skb, nla_a); | 538 | nla_nest_end(ans_skb, nla_a); |
517 | 539 | ||
518 | switch (doi_def->type) { | 540 | switch (doi_def->type) { |
519 | case CIPSO_V4_MAP_STD: | 541 | case CIPSO_V4_MAP_TRANS: |
520 | nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); | 542 | nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); |
521 | if (nla_a == NULL) { | 543 | if (nla_a == NULL) { |
522 | ret_val = -ENOMEM; | 544 | ret_val = -ENOMEM; |
@@ -655,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, | |||
655 | struct netlink_callback *cb) | 677 | struct netlink_callback *cb) |
656 | { | 678 | { |
657 | struct netlbl_cipsov4_doiwalk_arg cb_arg; | 679 | struct netlbl_cipsov4_doiwalk_arg cb_arg; |
658 | int doi_skip = cb->args[0]; | 680 | u32 doi_skip = cb->args[0]; |
659 | 681 | ||
660 | cb_arg.nl_cb = cb; | 682 | cb_arg.nl_cb = cb; |
661 | cb_arg.skb = skb; | 683 | cb_arg.skb = skb; |
@@ -668,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, | |||
668 | } | 690 | } |
669 | 691 | ||
670 | /** | 692 | /** |
693 | * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE | ||
694 | * @entry: LSM domain mapping entry | ||
695 | * @arg: the netlbl_domhsh_walk_arg structure | ||
696 | * | ||
697 | * Description: | ||
698 | * This function is intended for use by netlbl_cipsov4_remove() as the callback | ||
699 | * for the netlbl_domhsh_walk() function; it removes LSM domain map entries | ||
700 | * which are associated with the CIPSO DOI specified in @arg. Returns zero on | ||
701 | * success, negative values on failure. | ||
702 | * | ||
703 | */ | ||
704 | static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) | ||
705 | { | ||
706 | struct netlbl_domhsh_walk_arg *cb_arg = arg; | ||
707 | |||
708 | if (entry->type == NETLBL_NLTYPE_CIPSOV4 && | ||
709 | entry->type_def.cipsov4->doi == cb_arg->doi) | ||
710 | return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); | ||
711 | |||
712 | return 0; | ||
713 | } | ||
714 | |||
715 | /** | ||
671 | * netlbl_cipsov4_remove - Handle a REMOVE message | 716 | * netlbl_cipsov4_remove - Handle a REMOVE message |
672 | * @skb: the NETLINK buffer | 717 | * @skb: the NETLINK buffer |
673 | * @info: the Generic NETLINK info block | 718 | * @info: the Generic NETLINK info block |
@@ -681,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) | |||
681 | { | 726 | { |
682 | int ret_val = -EINVAL; | 727 | int ret_val = -EINVAL; |
683 | u32 doi = 0; | 728 | u32 doi = 0; |
729 | struct netlbl_domhsh_walk_arg cb_arg; | ||
684 | struct audit_buffer *audit_buf; | 730 | struct audit_buffer *audit_buf; |
685 | struct netlbl_audit audit_info; | 731 | struct netlbl_audit audit_info; |
732 | u32 skip_bkt = 0; | ||
733 | u32 skip_chain = 0; | ||
686 | 734 | ||
687 | if (!info->attrs[NLBL_CIPSOV4_A_DOI]) | 735 | if (!info->attrs[NLBL_CIPSOV4_A_DOI]) |
688 | return -EINVAL; | 736 | return -EINVAL; |
@@ -690,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) | |||
690 | doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); | 738 | doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); |
691 | netlbl_netlink_auditinfo(skb, &audit_info); | 739 | netlbl_netlink_auditinfo(skb, &audit_info); |
692 | 740 | ||
693 | ret_val = cipso_v4_doi_remove(doi, | 741 | cb_arg.doi = doi; |
694 | &audit_info, | 742 | cb_arg.audit_info = &audit_info; |
695 | netlbl_cipsov4_doi_free); | 743 | ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, |
696 | if (ret_val == 0) | 744 | netlbl_cipsov4_remove_cb, &cb_arg); |
697 | atomic_dec(&netlabel_mgmt_protocount); | 745 | if (ret_val == 0 || ret_val == -ENOENT) { |
746 | ret_val = cipso_v4_doi_remove(doi, &audit_info); | ||
747 | if (ret_val == 0) | ||
748 | atomic_dec(&netlabel_mgmt_protocount); | ||
749 | } | ||
698 | 750 | ||
699 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, | 751 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, |
700 | &audit_info); | 752 | &audit_info); |
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index 220cb9d06b49..c8a4079261f0 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h | |||
@@ -45,12 +45,13 @@ | |||
45 | * NLBL_CIPSOV4_A_MTYPE | 45 | * NLBL_CIPSOV4_A_MTYPE |
46 | * NLBL_CIPSOV4_A_TAGLST | 46 | * NLBL_CIPSOV4_A_TAGLST |
47 | * | 47 | * |
48 | * If using CIPSO_V4_MAP_STD the following attributes are required: | 48 | * If using CIPSO_V4_MAP_TRANS the following attributes are required: |
49 | * | 49 | * |
50 | * NLBL_CIPSOV4_A_MLSLVLLST | 50 | * NLBL_CIPSOV4_A_MLSLVLLST |
51 | * NLBL_CIPSOV4_A_MLSCATLST | 51 | * NLBL_CIPSOV4_A_MLSCATLST |
52 | * | 52 | * |
53 | * If using CIPSO_V4_MAP_PASS no additional attributes are required. | 53 | * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes |
54 | * are required. | ||
54 | * | 55 | * |
55 | * o REMOVE: | 56 | * o REMOVE: |
56 | * Sent by an application to remove a specific DOI mapping table from the | 57 | * Sent by an application to remove a specific DOI mapping table from the |
@@ -76,12 +77,13 @@ | |||
76 | * NLBL_CIPSOV4_A_MTYPE | 77 | * NLBL_CIPSOV4_A_MTYPE |
77 | * NLBL_CIPSOV4_A_TAGLST | 78 | * NLBL_CIPSOV4_A_TAGLST |
78 | * | 79 | * |
79 | * If using CIPSO_V4_MAP_STD the following attributes are required: | 80 | * If using CIPSO_V4_MAP_TRANS the following attributes are required: |
80 | * | 81 | * |
81 | * NLBL_CIPSOV4_A_MLSLVLLST | 82 | * NLBL_CIPSOV4_A_MLSLVLLST |
82 | * NLBL_CIPSOV4_A_MLSCATLST | 83 | * NLBL_CIPSOV4_A_MLSCATLST |
83 | * | 84 | * |
84 | * If using CIPSO_V4_MAP_PASS no additional attributes are required. | 85 | * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes |
86 | * are required. | ||
85 | * | 87 | * |
86 | * o LISTALL: | 88 | * o LISTALL: |
87 | * This message is sent by an application to list the valid DOIs on the | 89 | * This message is sent by an application to list the valid DOIs on the |
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 643c032a3a57..5fadf10e5ddf 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c | |||
@@ -11,7 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | 14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 |
15 | * | 15 | * |
16 | * This program is free software; you can redistribute it and/or modify | 16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by | 17 | * it under the terms of the GNU General Public License as published by |
@@ -40,6 +40,7 @@ | |||
40 | #include <asm/bug.h> | 40 | #include <asm/bug.h> |
41 | 41 | ||
42 | #include "netlabel_mgmt.h" | 42 | #include "netlabel_mgmt.h" |
43 | #include "netlabel_addrlist.h" | ||
43 | #include "netlabel_domainhash.h" | 44 | #include "netlabel_domainhash.h" |
44 | #include "netlabel_user.h" | 45 | #include "netlabel_user.h" |
45 | 46 | ||
@@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL; | |||
72 | static void netlbl_domhsh_free_entry(struct rcu_head *entry) | 73 | static void netlbl_domhsh_free_entry(struct rcu_head *entry) |
73 | { | 74 | { |
74 | struct netlbl_dom_map *ptr; | 75 | struct netlbl_dom_map *ptr; |
76 | struct netlbl_af4list *iter4; | ||
77 | struct netlbl_af4list *tmp4; | ||
78 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
79 | struct netlbl_af6list *iter6; | ||
80 | struct netlbl_af6list *tmp6; | ||
81 | #endif /* IPv6 */ | ||
75 | 82 | ||
76 | ptr = container_of(entry, struct netlbl_dom_map, rcu); | 83 | ptr = container_of(entry, struct netlbl_dom_map, rcu); |
84 | if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { | ||
85 | netlbl_af4list_foreach_safe(iter4, tmp4, | ||
86 | &ptr->type_def.addrsel->list4) { | ||
87 | netlbl_af4list_remove_entry(iter4); | ||
88 | kfree(netlbl_domhsh_addr4_entry(iter4)); | ||
89 | } | ||
90 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
91 | netlbl_af6list_foreach_safe(iter6, tmp6, | ||
92 | &ptr->type_def.addrsel->list6) { | ||
93 | netlbl_af6list_remove_entry(iter6); | ||
94 | kfree(netlbl_domhsh_addr6_entry(iter6)); | ||
95 | } | ||
96 | #endif /* IPv6 */ | ||
97 | } | ||
77 | kfree(ptr->domain); | 98 | kfree(ptr->domain); |
78 | kfree(ptr); | 99 | kfree(ptr); |
79 | } | 100 | } |
@@ -115,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key) | |||
115 | static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) | 136 | static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) |
116 | { | 137 | { |
117 | u32 bkt; | 138 | u32 bkt; |
139 | struct list_head *bkt_list; | ||
118 | struct netlbl_dom_map *iter; | 140 | struct netlbl_dom_map *iter; |
119 | 141 | ||
120 | if (domain != NULL) { | 142 | if (domain != NULL) { |
121 | bkt = netlbl_domhsh_hash(domain); | 143 | bkt = netlbl_domhsh_hash(domain); |
122 | list_for_each_entry_rcu(iter, | 144 | bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt]; |
123 | &rcu_dereference(netlbl_domhsh)->tbl[bkt], | 145 | list_for_each_entry_rcu(iter, bkt_list, list) |
124 | list) | ||
125 | if (iter->valid && strcmp(iter->domain, domain) == 0) | 146 | if (iter->valid && strcmp(iter->domain, domain) == 0) |
126 | return iter; | 147 | return iter; |
127 | } | 148 | } |
@@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) | |||
156 | return entry; | 177 | return entry; |
157 | } | 178 | } |
158 | 179 | ||
180 | /** | ||
181 | * netlbl_domhsh_audit_add - Generate an audit entry for an add event | ||
182 | * @entry: the entry being added | ||
183 | * @addr4: the IPv4 address information | ||
184 | * @addr6: the IPv6 address information | ||
185 | * @result: the result code | ||
186 | * @audit_info: NetLabel audit information | ||
187 | * | ||
188 | * Description: | ||
189 | * Generate an audit record for adding a new NetLabel/LSM mapping entry with | ||
190 | * the given information. Caller is responsibile for holding the necessary | ||
191 | * locks. | ||
192 | * | ||
193 | */ | ||
194 | static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, | ||
195 | struct netlbl_af4list *addr4, | ||
196 | struct netlbl_af6list *addr6, | ||
197 | int result, | ||
198 | struct netlbl_audit *audit_info) | ||
199 | { | ||
200 | struct audit_buffer *audit_buf; | ||
201 | struct cipso_v4_doi *cipsov4 = NULL; | ||
202 | u32 type; | ||
203 | |||
204 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); | ||
205 | if (audit_buf != NULL) { | ||
206 | audit_log_format(audit_buf, " nlbl_domain=%s", | ||
207 | entry->domain ? entry->domain : "(default)"); | ||
208 | if (addr4 != NULL) { | ||
209 | struct netlbl_domaddr4_map *map4; | ||
210 | map4 = netlbl_domhsh_addr4_entry(addr4); | ||
211 | type = map4->type; | ||
212 | cipsov4 = map4->type_def.cipsov4; | ||
213 | netlbl_af4list_audit_addr(audit_buf, 0, NULL, | ||
214 | addr4->addr, addr4->mask); | ||
215 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
216 | } else if (addr6 != NULL) { | ||
217 | struct netlbl_domaddr6_map *map6; | ||
218 | map6 = netlbl_domhsh_addr6_entry(addr6); | ||
219 | type = map6->type; | ||
220 | netlbl_af6list_audit_addr(audit_buf, 0, NULL, | ||
221 | &addr6->addr, &addr6->mask); | ||
222 | #endif /* IPv6 */ | ||
223 | } else { | ||
224 | type = entry->type; | ||
225 | cipsov4 = entry->type_def.cipsov4; | ||
226 | } | ||
227 | switch (type) { | ||
228 | case NETLBL_NLTYPE_UNLABELED: | ||
229 | audit_log_format(audit_buf, " nlbl_protocol=unlbl"); | ||
230 | break; | ||
231 | case NETLBL_NLTYPE_CIPSOV4: | ||
232 | BUG_ON(cipsov4 == NULL); | ||
233 | audit_log_format(audit_buf, | ||
234 | " nlbl_protocol=cipsov4 cipso_doi=%u", | ||
235 | cipsov4->doi); | ||
236 | break; | ||
237 | } | ||
238 | audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0); | ||
239 | audit_log_end(audit_buf); | ||
240 | } | ||
241 | } | ||
242 | |||
159 | /* | 243 | /* |
160 | * Domain Hash Table Functions | 244 | * Domain Hash Table Functions |
161 | */ | 245 | */ |
@@ -213,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size) | |||
213 | int netlbl_domhsh_add(struct netlbl_dom_map *entry, | 297 | int netlbl_domhsh_add(struct netlbl_dom_map *entry, |
214 | struct netlbl_audit *audit_info) | 298 | struct netlbl_audit *audit_info) |
215 | { | 299 | { |
216 | int ret_val; | 300 | int ret_val = 0; |
217 | u32 bkt; | 301 | struct netlbl_dom_map *entry_old; |
218 | struct audit_buffer *audit_buf; | 302 | struct netlbl_af4list *iter4; |
219 | 303 | struct netlbl_af4list *tmp4; | |
220 | switch (entry->type) { | 304 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
221 | case NETLBL_NLTYPE_UNLABELED: | 305 | struct netlbl_af6list *iter6; |
222 | ret_val = 0; | 306 | struct netlbl_af6list *tmp6; |
223 | break; | 307 | #endif /* IPv6 */ |
224 | case NETLBL_NLTYPE_CIPSOV4: | ||
225 | ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, | ||
226 | entry->domain); | ||
227 | break; | ||
228 | default: | ||
229 | return -EINVAL; | ||
230 | } | ||
231 | if (ret_val != 0) | ||
232 | return ret_val; | ||
233 | |||
234 | entry->valid = 1; | ||
235 | INIT_RCU_HEAD(&entry->rcu); | ||
236 | 308 | ||
237 | rcu_read_lock(); | 309 | rcu_read_lock(); |
310 | |||
238 | spin_lock(&netlbl_domhsh_lock); | 311 | spin_lock(&netlbl_domhsh_lock); |
239 | if (entry->domain != NULL) { | 312 | if (entry->domain != NULL) |
240 | bkt = netlbl_domhsh_hash(entry->domain); | 313 | entry_old = netlbl_domhsh_search(entry->domain); |
241 | if (netlbl_domhsh_search(entry->domain) == NULL) | 314 | else |
315 | entry_old = netlbl_domhsh_search_def(entry->domain); | ||
316 | if (entry_old == NULL) { | ||
317 | entry->valid = 1; | ||
318 | INIT_RCU_HEAD(&entry->rcu); | ||
319 | |||
320 | if (entry->domain != NULL) { | ||
321 | u32 bkt = netlbl_domhsh_hash(entry->domain); | ||
242 | list_add_tail_rcu(&entry->list, | 322 | list_add_tail_rcu(&entry->list, |
243 | &rcu_dereference(netlbl_domhsh)->tbl[bkt]); | 323 | &rcu_dereference(netlbl_domhsh)->tbl[bkt]); |
244 | else | 324 | } else { |
245 | ret_val = -EEXIST; | 325 | INIT_LIST_HEAD(&entry->list); |
246 | } else { | ||
247 | INIT_LIST_HEAD(&entry->list); | ||
248 | if (rcu_dereference(netlbl_domhsh_def) == NULL) | ||
249 | rcu_assign_pointer(netlbl_domhsh_def, entry); | 326 | rcu_assign_pointer(netlbl_domhsh_def, entry); |
250 | else | ||
251 | ret_val = -EEXIST; | ||
252 | } | ||
253 | spin_unlock(&netlbl_domhsh_lock); | ||
254 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); | ||
255 | if (audit_buf != NULL) { | ||
256 | audit_log_format(audit_buf, | ||
257 | " nlbl_domain=%s", | ||
258 | entry->domain ? entry->domain : "(default)"); | ||
259 | switch (entry->type) { | ||
260 | case NETLBL_NLTYPE_UNLABELED: | ||
261 | audit_log_format(audit_buf, " nlbl_protocol=unlbl"); | ||
262 | break; | ||
263 | case NETLBL_NLTYPE_CIPSOV4: | ||
264 | audit_log_format(audit_buf, | ||
265 | " nlbl_protocol=cipsov4 cipso_doi=%u", | ||
266 | entry->type_def.cipsov4->doi); | ||
267 | break; | ||
268 | } | 327 | } |
269 | audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); | ||
270 | audit_log_end(audit_buf); | ||
271 | } | ||
272 | rcu_read_unlock(); | ||
273 | 328 | ||
274 | if (ret_val != 0) { | 329 | if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { |
275 | switch (entry->type) { | 330 | netlbl_af4list_foreach_rcu(iter4, |
276 | case NETLBL_NLTYPE_CIPSOV4: | 331 | &entry->type_def.addrsel->list4) |
277 | if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, | 332 | netlbl_domhsh_audit_add(entry, iter4, NULL, |
278 | entry->domain) != 0) | 333 | ret_val, audit_info); |
279 | BUG(); | 334 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
280 | break; | 335 | netlbl_af6list_foreach_rcu(iter6, |
336 | &entry->type_def.addrsel->list6) | ||
337 | netlbl_domhsh_audit_add(entry, NULL, iter6, | ||
338 | ret_val, audit_info); | ||
339 | #endif /* IPv6 */ | ||
340 | } else | ||
341 | netlbl_domhsh_audit_add(entry, NULL, NULL, | ||
342 | ret_val, audit_info); | ||
343 | } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && | ||
344 | entry->type == NETLBL_NLTYPE_ADDRSELECT) { | ||
345 | struct list_head *old_list4; | ||
346 | struct list_head *old_list6; | ||
347 | |||
348 | old_list4 = &entry_old->type_def.addrsel->list4; | ||
349 | old_list6 = &entry_old->type_def.addrsel->list6; | ||
350 | |||
351 | /* we only allow the addition of address selectors if all of | ||
352 | * the selectors do not exist in the existing domain map */ | ||
353 | netlbl_af4list_foreach_rcu(iter4, | ||
354 | &entry->type_def.addrsel->list4) | ||
355 | if (netlbl_af4list_search_exact(iter4->addr, | ||
356 | iter4->mask, | ||
357 | old_list4)) { | ||
358 | ret_val = -EEXIST; | ||
359 | goto add_return; | ||
360 | } | ||
361 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
362 | netlbl_af6list_foreach_rcu(iter6, | ||
363 | &entry->type_def.addrsel->list6) | ||
364 | if (netlbl_af6list_search_exact(&iter6->addr, | ||
365 | &iter6->mask, | ||
366 | old_list6)) { | ||
367 | ret_val = -EEXIST; | ||
368 | goto add_return; | ||
369 | } | ||
370 | #endif /* IPv6 */ | ||
371 | |||
372 | netlbl_af4list_foreach_safe(iter4, tmp4, | ||
373 | &entry->type_def.addrsel->list4) { | ||
374 | netlbl_af4list_remove_entry(iter4); | ||
375 | iter4->valid = 1; | ||
376 | ret_val = netlbl_af4list_add(iter4, old_list4); | ||
377 | netlbl_domhsh_audit_add(entry_old, iter4, NULL, | ||
378 | ret_val, audit_info); | ||
379 | if (ret_val != 0) | ||
380 | goto add_return; | ||
281 | } | 381 | } |
282 | } | 382 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
383 | netlbl_af6list_foreach_safe(iter6, tmp6, | ||
384 | &entry->type_def.addrsel->list6) { | ||
385 | netlbl_af6list_remove_entry(iter6); | ||
386 | iter6->valid = 1; | ||
387 | ret_val = netlbl_af6list_add(iter6, old_list6); | ||
388 | netlbl_domhsh_audit_add(entry_old, NULL, iter6, | ||
389 | ret_val, audit_info); | ||
390 | if (ret_val != 0) | ||
391 | goto add_return; | ||
392 | } | ||
393 | #endif /* IPv6 */ | ||
394 | } else | ||
395 | ret_val = -EINVAL; | ||
283 | 396 | ||
397 | add_return: | ||
398 | spin_unlock(&netlbl_domhsh_lock); | ||
399 | rcu_read_unlock(); | ||
284 | return ret_val; | 400 | return ret_val; |
285 | } | 401 | } |
286 | 402 | ||
@@ -302,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, | |||
302 | } | 418 | } |
303 | 419 | ||
304 | /** | 420 | /** |
305 | * netlbl_domhsh_remove - Removes an entry from the domain hash table | 421 | * netlbl_domhsh_remove_entry - Removes a given entry from the domain table |
306 | * @domain: the domain to remove | 422 | * @entry: the entry to remove |
307 | * @audit_info: NetLabel audit information | 423 | * @audit_info: NetLabel audit information |
308 | * | 424 | * |
309 | * Description: | 425 | * Description: |
310 | * Removes an entry from the domain hash table and handles any updates to the | 426 | * Removes an entry from the domain hash table and handles any updates to the |
311 | * lower level protocol handler (i.e. CIPSO). Returns zero on success, | 427 | * lower level protocol handler (i.e. CIPSO). Caller is responsible for |
312 | * negative on failure. | 428 | * ensuring that the RCU read lock is held. Returns zero on success, negative |
429 | * on failure. | ||
313 | * | 430 | * |
314 | */ | 431 | */ |
315 | int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) | 432 | int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, |
433 | struct netlbl_audit *audit_info) | ||
316 | { | 434 | { |
317 | int ret_val = -ENOENT; | 435 | int ret_val = 0; |
318 | struct netlbl_dom_map *entry; | ||
319 | struct audit_buffer *audit_buf; | 436 | struct audit_buffer *audit_buf; |
320 | 437 | ||
321 | rcu_read_lock(); | ||
322 | if (domain) | ||
323 | entry = netlbl_domhsh_search(domain); | ||
324 | else | ||
325 | entry = netlbl_domhsh_search_def(domain); | ||
326 | if (entry == NULL) | 438 | if (entry == NULL) |
327 | goto remove_return; | 439 | return -ENOENT; |
328 | switch (entry->type) { | 440 | |
329 | case NETLBL_NLTYPE_CIPSOV4: | ||
330 | cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, | ||
331 | entry->domain); | ||
332 | break; | ||
333 | } | ||
334 | spin_lock(&netlbl_domhsh_lock); | 441 | spin_lock(&netlbl_domhsh_lock); |
335 | if (entry->valid) { | 442 | if (entry->valid) { |
336 | entry->valid = 0; | 443 | entry->valid = 0; |
@@ -338,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) | |||
338 | list_del_rcu(&entry->list); | 445 | list_del_rcu(&entry->list); |
339 | else | 446 | else |
340 | rcu_assign_pointer(netlbl_domhsh_def, NULL); | 447 | rcu_assign_pointer(netlbl_domhsh_def, NULL); |
341 | ret_val = 0; | 448 | } else |
342 | } | 449 | ret_val = -ENOENT; |
343 | spin_unlock(&netlbl_domhsh_lock); | 450 | spin_unlock(&netlbl_domhsh_lock); |
344 | 451 | ||
345 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); | 452 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); |
@@ -351,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) | |||
351 | audit_log_end(audit_buf); | 458 | audit_log_end(audit_buf); |
352 | } | 459 | } |
353 | 460 | ||
354 | remove_return: | 461 | if (ret_val == 0) { |
355 | rcu_read_unlock(); | 462 | struct netlbl_af4list *iter4; |
356 | if (ret_val == 0) | 463 | struct netlbl_domaddr4_map *map4; |
464 | |||
465 | switch (entry->type) { | ||
466 | case NETLBL_NLTYPE_ADDRSELECT: | ||
467 | netlbl_af4list_foreach_rcu(iter4, | ||
468 | &entry->type_def.addrsel->list4) { | ||
469 | map4 = netlbl_domhsh_addr4_entry(iter4); | ||
470 | cipso_v4_doi_putdef(map4->type_def.cipsov4); | ||
471 | } | ||
472 | /* no need to check the IPv6 list since we currently | ||
473 | * support only unlabeled protocols for IPv6 */ | ||
474 | break; | ||
475 | case NETLBL_NLTYPE_CIPSOV4: | ||
476 | cipso_v4_doi_putdef(entry->type_def.cipsov4); | ||
477 | break; | ||
478 | } | ||
357 | call_rcu(&entry->rcu, netlbl_domhsh_free_entry); | 479 | call_rcu(&entry->rcu, netlbl_domhsh_free_entry); |
480 | } | ||
481 | |||
482 | return ret_val; | ||
483 | } | ||
484 | |||
485 | /** | ||
486 | * netlbl_domhsh_remove - Removes an entry from the domain hash table | ||
487 | * @domain: the domain to remove | ||
488 | * @audit_info: NetLabel audit information | ||
489 | * | ||
490 | * Description: | ||
491 | * Removes an entry from the domain hash table and handles any updates to the | ||
492 | * lower level protocol handler (i.e. CIPSO). Returns zero on success, | ||
493 | * negative on failure. | ||
494 | * | ||
495 | */ | ||
496 | int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) | ||
497 | { | ||
498 | int ret_val; | ||
499 | struct netlbl_dom_map *entry; | ||
500 | |||
501 | rcu_read_lock(); | ||
502 | if (domain) | ||
503 | entry = netlbl_domhsh_search(domain); | ||
504 | else | ||
505 | entry = netlbl_domhsh_search_def(domain); | ||
506 | ret_val = netlbl_domhsh_remove_entry(entry, audit_info); | ||
507 | rcu_read_unlock(); | ||
508 | |||
358 | return ret_val; | 509 | return ret_val; |
359 | } | 510 | } |
360 | 511 | ||
@@ -389,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) | |||
389 | } | 540 | } |
390 | 541 | ||
391 | /** | 542 | /** |
543 | * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table | ||
544 | * @domain: the domain name to search for | ||
545 | * @addr: the IP address to search for | ||
546 | * | ||
547 | * Description: | ||
548 | * Look through the domain hash table searching for an entry to match @domain | ||
549 | * and @addr, return a pointer to a copy of the entry or NULL. The caller is | ||
550 | * responsible for ensuring that rcu_read_[un]lock() is called. | ||
551 | * | ||
552 | */ | ||
553 | struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, | ||
554 | __be32 addr) | ||
555 | { | ||
556 | struct netlbl_dom_map *dom_iter; | ||
557 | struct netlbl_af4list *addr_iter; | ||
558 | |||
559 | dom_iter = netlbl_domhsh_search_def(domain); | ||
560 | if (dom_iter == NULL) | ||
561 | return NULL; | ||
562 | if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) | ||
563 | return NULL; | ||
564 | |||
565 | addr_iter = netlbl_af4list_search(addr, | ||
566 | &dom_iter->type_def.addrsel->list4); | ||
567 | if (addr_iter == NULL) | ||
568 | return NULL; | ||
569 | |||
570 | return netlbl_domhsh_addr4_entry(addr_iter); | ||
571 | } | ||
572 | |||
573 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
574 | /** | ||
575 | * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table | ||
576 | * @domain: the domain name to search for | ||
577 | * @addr: the IP address to search for | ||
578 | * | ||
579 | * Description: | ||
580 | * Look through the domain hash table searching for an entry to match @domain | ||
581 | * and @addr, return a pointer to a copy of the entry or NULL. The caller is | ||
582 | * responsible for ensuring that rcu_read_[un]lock() is called. | ||
583 | * | ||
584 | */ | ||
585 | struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, | ||
586 | const struct in6_addr *addr) | ||
587 | { | ||
588 | struct netlbl_dom_map *dom_iter; | ||
589 | struct netlbl_af6list *addr_iter; | ||
590 | |||
591 | dom_iter = netlbl_domhsh_search_def(domain); | ||
592 | if (dom_iter == NULL) | ||
593 | return NULL; | ||
594 | if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) | ||
595 | return NULL; | ||
596 | |||
597 | addr_iter = netlbl_af6list_search(addr, | ||
598 | &dom_iter->type_def.addrsel->list6); | ||
599 | if (addr_iter == NULL) | ||
600 | return NULL; | ||
601 | |||
602 | return netlbl_domhsh_addr6_entry(addr_iter); | ||
603 | } | ||
604 | #endif /* IPv6 */ | ||
605 | |||
606 | /** | ||
392 | * netlbl_domhsh_walk - Iterate through the domain mapping hash table | 607 | * netlbl_domhsh_walk - Iterate through the domain mapping hash table |
393 | * @skip_bkt: the number of buckets to skip at the start | 608 | * @skip_bkt: the number of buckets to skip at the start |
394 | * @skip_chain: the number of entries to skip in the first iterated bucket | 609 | * @skip_chain: the number of entries to skip in the first iterated bucket |
@@ -410,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt, | |||
410 | { | 625 | { |
411 | int ret_val = -ENOENT; | 626 | int ret_val = -ENOENT; |
412 | u32 iter_bkt; | 627 | u32 iter_bkt; |
628 | struct list_head *iter_list; | ||
413 | struct netlbl_dom_map *iter_entry; | 629 | struct netlbl_dom_map *iter_entry; |
414 | u32 chain_cnt = 0; | 630 | u32 chain_cnt = 0; |
415 | 631 | ||
@@ -417,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt, | |||
417 | for (iter_bkt = *skip_bkt; | 633 | for (iter_bkt = *skip_bkt; |
418 | iter_bkt < rcu_dereference(netlbl_domhsh)->size; | 634 | iter_bkt < rcu_dereference(netlbl_domhsh)->size; |
419 | iter_bkt++, chain_cnt = 0) { | 635 | iter_bkt++, chain_cnt = 0) { |
420 | list_for_each_entry_rcu(iter_entry, | 636 | iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt]; |
421 | &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], | 637 | list_for_each_entry_rcu(iter_entry, iter_list, list) |
422 | list) | ||
423 | if (iter_entry->valid) { | 638 | if (iter_entry->valid) { |
424 | if (chain_cnt++ < *skip_chain) | 639 | if (chain_cnt++ < *skip_chain) |
425 | continue; | 640 | continue; |
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 8220990ceb96..bfcb6763a1a1 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h | |||
@@ -11,7 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | 14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 |
15 | * | 15 | * |
16 | * This program is free software; you can redistribute it and/or modify | 16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by | 17 | * it under the terms of the GNU General Public License as published by |
@@ -36,16 +36,43 @@ | |||
36 | #include <linux/rcupdate.h> | 36 | #include <linux/rcupdate.h> |
37 | #include <linux/list.h> | 37 | #include <linux/list.h> |
38 | 38 | ||
39 | #include "netlabel_addrlist.h" | ||
40 | |||
39 | /* Domain hash table size */ | 41 | /* Domain hash table size */ |
40 | /* XXX - currently this number is an uneducated guess */ | 42 | /* XXX - currently this number is an uneducated guess */ |
41 | #define NETLBL_DOMHSH_BITSIZE 7 | 43 | #define NETLBL_DOMHSH_BITSIZE 7 |
42 | 44 | ||
43 | /* Domain mapping definition struct */ | 45 | /* Domain mapping definition structures */ |
46 | #define netlbl_domhsh_addr4_entry(iter) \ | ||
47 | container_of(iter, struct netlbl_domaddr4_map, list) | ||
48 | struct netlbl_domaddr4_map { | ||
49 | u32 type; | ||
50 | union { | ||
51 | struct cipso_v4_doi *cipsov4; | ||
52 | } type_def; | ||
53 | |||
54 | struct netlbl_af4list list; | ||
55 | }; | ||
56 | #define netlbl_domhsh_addr6_entry(iter) \ | ||
57 | container_of(iter, struct netlbl_domaddr6_map, list) | ||
58 | struct netlbl_domaddr6_map { | ||
59 | u32 type; | ||
60 | |||
61 | /* NOTE: no 'type_def' union needed at present since we don't currently | ||
62 | * support any IPv6 labeling protocols */ | ||
63 | |||
64 | struct netlbl_af6list list; | ||
65 | }; | ||
66 | struct netlbl_domaddr_map { | ||
67 | struct list_head list4; | ||
68 | struct list_head list6; | ||
69 | }; | ||
44 | struct netlbl_dom_map { | 70 | struct netlbl_dom_map { |
45 | char *domain; | 71 | char *domain; |
46 | u32 type; | 72 | u32 type; |
47 | union { | 73 | union { |
48 | struct cipso_v4_doi *cipsov4; | 74 | struct cipso_v4_doi *cipsov4; |
75 | struct netlbl_domaddr_map *addrsel; | ||
49 | } type_def; | 76 | } type_def; |
50 | 77 | ||
51 | u32 valid; | 78 | u32 valid; |
@@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, | |||
61 | struct netlbl_audit *audit_info); | 88 | struct netlbl_audit *audit_info); |
62 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, | 89 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, |
63 | struct netlbl_audit *audit_info); | 90 | struct netlbl_audit *audit_info); |
91 | int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, | ||
92 | struct netlbl_audit *audit_info); | ||
64 | int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); | 93 | int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); |
65 | int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); | 94 | int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); |
66 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); | 95 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); |
96 | struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, | ||
97 | __be32 addr); | ||
67 | int netlbl_domhsh_walk(u32 *skip_bkt, | 98 | int netlbl_domhsh_walk(u32 *skip_bkt, |
68 | u32 *skip_chain, | 99 | u32 *skip_chain, |
69 | int (*callback) (struct netlbl_dom_map *entry, void *arg), | 100 | int (*callback) (struct netlbl_dom_map *entry, void *arg), |
70 | void *cb_arg); | 101 | void *cb_arg); |
71 | 102 | ||
103 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
104 | struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, | ||
105 | const struct in6_addr *addr); | ||
106 | #endif /* IPv6 */ | ||
107 | |||
72 | #endif | 108 | #endif |
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 39793a1a93aa..b32eceb3ab0d 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c | |||
@@ -10,7 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | 13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 |
14 | * | 14 | * |
15 | * This program is free software; you can redistribute it and/or modify | 15 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 16 | * it under the terms of the GNU General Public License as published by |
@@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain, | |||
82 | 82 | ||
83 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | 83 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); |
84 | if (entry == NULL) | 84 | if (entry == NULL) |
85 | goto cfg_unlbl_add_map_failure; | 85 | return -ENOMEM; |
86 | if (domain != NULL) { | 86 | if (domain != NULL) { |
87 | entry->domain = kstrdup(domain, GFP_ATOMIC); | 87 | entry->domain = kstrdup(domain, GFP_ATOMIC); |
88 | if (entry->domain == NULL) | 88 | if (entry->domain == NULL) |
@@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure: | |||
104 | } | 104 | } |
105 | 105 | ||
106 | /** | 106 | /** |
107 | * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition | ||
108 | * @doi_def: the DOI definition | ||
109 | * @audit_info: NetLabel audit information | ||
110 | * | ||
111 | * Description: | ||
112 | * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on | ||
113 | * success, negative values on failure. | ||
114 | * | ||
115 | */ | ||
116 | int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, | ||
117 | struct netlbl_audit *audit_info) | ||
118 | { | ||
119 | int ret_val; | ||
120 | const char *type_str; | ||
121 | struct audit_buffer *audit_buf; | ||
122 | |||
123 | ret_val = cipso_v4_doi_add(doi_def); | ||
124 | |||
125 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, | ||
126 | audit_info); | ||
127 | if (audit_buf != NULL) { | ||
128 | switch (doi_def->type) { | ||
129 | case CIPSO_V4_MAP_STD: | ||
130 | type_str = "std"; | ||
131 | break; | ||
132 | case CIPSO_V4_MAP_PASS: | ||
133 | type_str = "pass"; | ||
134 | break; | ||
135 | default: | ||
136 | type_str = "(unknown)"; | ||
137 | } | ||
138 | audit_log_format(audit_buf, | ||
139 | " cipso_doi=%u cipso_type=%s res=%u", | ||
140 | doi_def->doi, | ||
141 | type_str, | ||
142 | ret_val == 0 ? 1 : 0); | ||
143 | audit_log_end(audit_buf); | ||
144 | } | ||
145 | |||
146 | return ret_val; | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping | 107 | * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping |
151 | * @doi_def: the DOI definition | 108 | * @doi_def: the DOI definition |
152 | * @domain: the domain mapping to add | 109 | * @domain: the domain mapping to add |
@@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, | |||
164 | struct netlbl_audit *audit_info) | 121 | struct netlbl_audit *audit_info) |
165 | { | 122 | { |
166 | int ret_val = -ENOMEM; | 123 | int ret_val = -ENOMEM; |
124 | u32 doi; | ||
125 | u32 doi_type; | ||
167 | struct netlbl_dom_map *entry; | 126 | struct netlbl_dom_map *entry; |
127 | const char *type_str; | ||
128 | struct audit_buffer *audit_buf; | ||
129 | |||
130 | doi = doi_def->doi; | ||
131 | doi_type = doi_def->type; | ||
168 | 132 | ||
169 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | 133 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); |
170 | if (entry == NULL) | 134 | if (entry == NULL) |
171 | goto cfg_cipsov4_add_map_failure; | 135 | return -ENOMEM; |
172 | if (domain != NULL) { | 136 | if (domain != NULL) { |
173 | entry->domain = kstrdup(domain, GFP_ATOMIC); | 137 | entry->domain = kstrdup(domain, GFP_ATOMIC); |
174 | if (entry->domain == NULL) | 138 | if (entry->domain == NULL) |
175 | goto cfg_cipsov4_add_map_failure; | 139 | goto cfg_cipsov4_add_map_failure; |
176 | } | 140 | } |
177 | entry->type = NETLBL_NLTYPE_CIPSOV4; | ||
178 | entry->type_def.cipsov4 = doi_def; | ||
179 | |||
180 | /* Grab a RCU read lock here so nothing happens to the doi_def variable | ||
181 | * between adding it to the CIPSOv4 protocol engine and adding a | ||
182 | * domain mapping for it. */ | ||
183 | 141 | ||
184 | rcu_read_lock(); | 142 | ret_val = cipso_v4_doi_add(doi_def); |
185 | ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); | ||
186 | if (ret_val != 0) | 143 | if (ret_val != 0) |
187 | goto cfg_cipsov4_add_map_failure_unlock; | 144 | goto cfg_cipsov4_add_map_failure_remove_doi; |
145 | entry->type = NETLBL_NLTYPE_CIPSOV4; | ||
146 | entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi); | ||
147 | if (entry->type_def.cipsov4 == NULL) { | ||
148 | ret_val = -ENOENT; | ||
149 | goto cfg_cipsov4_add_map_failure_remove_doi; | ||
150 | } | ||
188 | ret_val = netlbl_domhsh_add(entry, audit_info); | 151 | ret_val = netlbl_domhsh_add(entry, audit_info); |
189 | if (ret_val != 0) | 152 | if (ret_val != 0) |
190 | goto cfg_cipsov4_add_map_failure_remove_doi; | 153 | goto cfg_cipsov4_add_map_failure_release_doi; |
191 | rcu_read_unlock(); | ||
192 | 154 | ||
193 | return 0; | 155 | cfg_cipsov4_add_map_return: |
156 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, | ||
157 | audit_info); | ||
158 | if (audit_buf != NULL) { | ||
159 | switch (doi_type) { | ||
160 | case CIPSO_V4_MAP_TRANS: | ||
161 | type_str = "trans"; | ||
162 | break; | ||
163 | case CIPSO_V4_MAP_PASS: | ||
164 | type_str = "pass"; | ||
165 | break; | ||
166 | case CIPSO_V4_MAP_LOCAL: | ||
167 | type_str = "local"; | ||
168 | break; | ||
169 | default: | ||
170 | type_str = "(unknown)"; | ||
171 | } | ||
172 | audit_log_format(audit_buf, | ||
173 | " cipso_doi=%u cipso_type=%s res=%u", | ||
174 | doi, type_str, ret_val == 0 ? 1 : 0); | ||
175 | audit_log_end(audit_buf); | ||
176 | } | ||
194 | 177 | ||
178 | return ret_val; | ||
179 | |||
180 | cfg_cipsov4_add_map_failure_release_doi: | ||
181 | cipso_v4_doi_putdef(doi_def); | ||
195 | cfg_cipsov4_add_map_failure_remove_doi: | 182 | cfg_cipsov4_add_map_failure_remove_doi: |
196 | cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); | 183 | cipso_v4_doi_remove(doi, audit_info); |
197 | cfg_cipsov4_add_map_failure_unlock: | ||
198 | rcu_read_unlock(); | ||
199 | cfg_cipsov4_add_map_failure: | 184 | cfg_cipsov4_add_map_failure: |
200 | if (entry != NULL) | 185 | if (entry != NULL) |
201 | kfree(entry->domain); | 186 | kfree(entry->domain); |
202 | kfree(entry); | 187 | kfree(entry); |
203 | return ret_val; | 188 | goto cfg_cipsov4_add_map_return; |
204 | } | ||
205 | |||
206 | /** | ||
207 | * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition | ||
208 | * @doi: the CIPSO DOI value | ||
209 | * @audit_info: NetLabel audit information | ||
210 | * | ||
211 | * Description: | ||
212 | * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. | ||
213 | * Returns zero on success, negative values on failure. | ||
214 | * | ||
215 | */ | ||
216 | int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) | ||
217 | { | ||
218 | return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); | ||
219 | } | 189 | } |
220 | 190 | ||
221 | /* | 191 | /* |
@@ -452,7 +422,9 @@ int netlbl_enabled(void) | |||
452 | * Attach the correct label to the given socket using the security attributes | 422 | * Attach the correct label to the given socket using the security attributes |
453 | * specified in @secattr. This function requires exclusive access to @sk, | 423 | * specified in @secattr. This function requires exclusive access to @sk, |
454 | * which means it either needs to be in the process of being created or locked. | 424 | * which means it either needs to be in the process of being created or locked. |
455 | * Returns zero on success, negative values on failure. | 425 | * Returns zero on success, -EDESTADDRREQ if the domain is configured to use |
426 | * network address selectors (can't blindly label the socket), and negative | ||
427 | * values on all other failures. | ||
456 | * | 428 | * |
457 | */ | 429 | */ |
458 | int netlbl_sock_setattr(struct sock *sk, | 430 | int netlbl_sock_setattr(struct sock *sk, |
@@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk, | |||
466 | if (dom_entry == NULL) | 438 | if (dom_entry == NULL) |
467 | goto socket_setattr_return; | 439 | goto socket_setattr_return; |
468 | switch (dom_entry->type) { | 440 | switch (dom_entry->type) { |
441 | case NETLBL_NLTYPE_ADDRSELECT: | ||
442 | ret_val = -EDESTADDRREQ; | ||
443 | break; | ||
469 | case NETLBL_NLTYPE_CIPSOV4: | 444 | case NETLBL_NLTYPE_CIPSOV4: |
470 | ret_val = cipso_v4_sock_setattr(sk, | 445 | ret_val = cipso_v4_sock_setattr(sk, |
471 | dom_entry->type_def.cipsov4, | 446 | dom_entry->type_def.cipsov4, |
@@ -484,6 +459,20 @@ socket_setattr_return: | |||
484 | } | 459 | } |
485 | 460 | ||
486 | /** | 461 | /** |
462 | * netlbl_sock_delattr - Delete all the NetLabel labels on a socket | ||
463 | * @sk: the socket | ||
464 | * | ||
465 | * Description: | ||
466 | * Remove all the NetLabel labeling from @sk. The caller is responsible for | ||
467 | * ensuring that @sk is locked. | ||
468 | * | ||
469 | */ | ||
470 | void netlbl_sock_delattr(struct sock *sk) | ||
471 | { | ||
472 | cipso_v4_sock_delattr(sk); | ||
473 | } | ||
474 | |||
475 | /** | ||
487 | * netlbl_sock_getattr - Determine the security attributes of a sock | 476 | * netlbl_sock_getattr - Determine the security attributes of a sock |
488 | * @sk: the sock | 477 | * @sk: the sock |
489 | * @secattr: the security attributes | 478 | * @secattr: the security attributes |
@@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) | |||
501 | } | 490 | } |
502 | 491 | ||
503 | /** | 492 | /** |
493 | * netlbl_conn_setattr - Label a connected socket using the correct protocol | ||
494 | * @sk: the socket to label | ||
495 | * @addr: the destination address | ||
496 | * @secattr: the security attributes | ||
497 | * | ||
498 | * Description: | ||
499 | * Attach the correct label to the given connected socket using the security | ||
500 | * attributes specified in @secattr. The caller is responsible for ensuring | ||
501 | * that @sk is locked. Returns zero on success, negative values on failure. | ||
502 | * | ||
503 | */ | ||
504 | int netlbl_conn_setattr(struct sock *sk, | ||
505 | struct sockaddr *addr, | ||
506 | const struct netlbl_lsm_secattr *secattr) | ||
507 | { | ||
508 | int ret_val; | ||
509 | struct sockaddr_in *addr4; | ||
510 | struct netlbl_domaddr4_map *af4_entry; | ||
511 | |||
512 | rcu_read_lock(); | ||
513 | switch (addr->sa_family) { | ||
514 | case AF_INET: | ||
515 | addr4 = (struct sockaddr_in *)addr; | ||
516 | af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, | ||
517 | addr4->sin_addr.s_addr); | ||
518 | if (af4_entry == NULL) { | ||
519 | ret_val = -ENOENT; | ||
520 | goto conn_setattr_return; | ||
521 | } | ||
522 | switch (af4_entry->type) { | ||
523 | case NETLBL_NLTYPE_CIPSOV4: | ||
524 | ret_val = cipso_v4_sock_setattr(sk, | ||
525 | af4_entry->type_def.cipsov4, | ||
526 | secattr); | ||
527 | break; | ||
528 | case NETLBL_NLTYPE_UNLABELED: | ||
529 | /* just delete the protocols we support for right now | ||
530 | * but we could remove other protocols if needed */ | ||
531 | cipso_v4_sock_delattr(sk); | ||
532 | ret_val = 0; | ||
533 | break; | ||
534 | default: | ||
535 | ret_val = -ENOENT; | ||
536 | } | ||
537 | break; | ||
538 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
539 | case AF_INET6: | ||
540 | /* since we don't support any IPv6 labeling protocols right | ||
541 | * now we can optimize everything away until we do */ | ||
542 | ret_val = 0; | ||
543 | break; | ||
544 | #endif /* IPv6 */ | ||
545 | default: | ||
546 | ret_val = 0; | ||
547 | } | ||
548 | |||
549 | conn_setattr_return: | ||
550 | rcu_read_unlock(); | ||
551 | return ret_val; | ||
552 | } | ||
553 | |||
554 | /** | ||
555 | * netlbl_skbuff_setattr - Label a packet using the correct protocol | ||
556 | * @skb: the packet | ||
557 | * @family: protocol family | ||
558 | * @secattr: the security attributes | ||
559 | * | ||
560 | * Description: | ||
561 | * Attach the correct label to the given packet using the security attributes | ||
562 | * specified in @secattr. Returns zero on success, negative values on failure. | ||
563 | * | ||
564 | */ | ||
565 | int netlbl_skbuff_setattr(struct sk_buff *skb, | ||
566 | u16 family, | ||
567 | const struct netlbl_lsm_secattr *secattr) | ||
568 | { | ||
569 | int ret_val; | ||
570 | struct iphdr *hdr4; | ||
571 | struct netlbl_domaddr4_map *af4_entry; | ||
572 | |||
573 | rcu_read_lock(); | ||
574 | switch (family) { | ||
575 | case AF_INET: | ||
576 | hdr4 = ip_hdr(skb); | ||
577 | af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, | ||
578 | hdr4->daddr); | ||
579 | if (af4_entry == NULL) { | ||
580 | ret_val = -ENOENT; | ||
581 | goto skbuff_setattr_return; | ||
582 | } | ||
583 | switch (af4_entry->type) { | ||
584 | case NETLBL_NLTYPE_CIPSOV4: | ||
585 | ret_val = cipso_v4_skbuff_setattr(skb, | ||
586 | af4_entry->type_def.cipsov4, | ||
587 | secattr); | ||
588 | break; | ||
589 | case NETLBL_NLTYPE_UNLABELED: | ||
590 | /* just delete the protocols we support for right now | ||
591 | * but we could remove other protocols if needed */ | ||
592 | ret_val = cipso_v4_skbuff_delattr(skb); | ||
593 | break; | ||
594 | default: | ||
595 | ret_val = -ENOENT; | ||
596 | } | ||
597 | break; | ||
598 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
599 | case AF_INET6: | ||
600 | /* since we don't support any IPv6 labeling protocols right | ||
601 | * now we can optimize everything away until we do */ | ||
602 | ret_val = 0; | ||
603 | break; | ||
604 | #endif /* IPv6 */ | ||
605 | default: | ||
606 | ret_val = 0; | ||
607 | } | ||
608 | |||
609 | skbuff_setattr_return: | ||
610 | rcu_read_unlock(); | ||
611 | return ret_val; | ||
612 | } | ||
613 | |||
614 | /** | ||
504 | * netlbl_skbuff_getattr - Determine the security attributes of a packet | 615 | * netlbl_skbuff_getattr - Determine the security attributes of a packet |
505 | * @skb: the packet | 616 | * @skb: the packet |
506 | * @family: protocol family | 617 | * @family: protocol family |
@@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, | |||
528 | * netlbl_skbuff_err - Handle a LSM error on a sk_buff | 639 | * netlbl_skbuff_err - Handle a LSM error on a sk_buff |
529 | * @skb: the packet | 640 | * @skb: the packet |
530 | * @error: the error code | 641 | * @error: the error code |
642 | * @gateway: true if host is acting as a gateway, false otherwise | ||
531 | * | 643 | * |
532 | * Description: | 644 | * Description: |
533 | * Deal with a LSM problem when handling the packet in @skb, typically this is | 645 | * Deal with a LSM problem when handling the packet in @skb, typically this is |
@@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, | |||
535 | * according to the packet's labeling protocol. | 647 | * according to the packet's labeling protocol. |
536 | * | 648 | * |
537 | */ | 649 | */ |
538 | void netlbl_skbuff_err(struct sk_buff *skb, int error) | 650 | void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) |
539 | { | 651 | { |
540 | if (CIPSO_V4_OPTEXIST(skb)) | 652 | if (CIPSO_V4_OPTEXIST(skb)) |
541 | cipso_v4_error(skb, error, 0); | 653 | cipso_v4_error(skb, error, gateway); |
542 | } | 654 | } |
543 | 655 | ||
544 | /** | 656 | /** |
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 44be5d5261f4..ee769ecaa13c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c | |||
@@ -10,7 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | 13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 |
14 | * | 14 | * |
15 | * This program is free software; you can redistribute it and/or modify | 15 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 16 | * it under the terms of the GNU General Public License as published by |
@@ -32,9 +32,13 @@ | |||
32 | #include <linux/socket.h> | 32 | #include <linux/socket.h> |
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/skbuff.h> | 34 | #include <linux/skbuff.h> |
35 | #include <linux/in.h> | ||
36 | #include <linux/in6.h> | ||
35 | #include <net/sock.h> | 37 | #include <net/sock.h> |
36 | #include <net/netlink.h> | 38 | #include <net/netlink.h> |
37 | #include <net/genetlink.h> | 39 | #include <net/genetlink.h> |
40 | #include <net/ip.h> | ||
41 | #include <net/ipv6.h> | ||
38 | #include <net/netlabel.h> | 42 | #include <net/netlabel.h> |
39 | #include <net/cipso_ipv4.h> | 43 | #include <net/cipso_ipv4.h> |
40 | #include <asm/atomic.h> | 44 | #include <asm/atomic.h> |
@@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { | |||
71 | }; | 75 | }; |
72 | 76 | ||
73 | /* | 77 | /* |
74 | * NetLabel Command Handlers | 78 | * Helper Functions |
75 | */ | 79 | */ |
76 | 80 | ||
77 | /** | 81 | /** |
78 | * netlbl_mgmt_add - Handle an ADD message | 82 | * netlbl_mgmt_add - Handle an ADD message |
79 | * @skb: the NETLINK buffer | ||
80 | * @info: the Generic NETLINK info block | 83 | * @info: the Generic NETLINK info block |
84 | * @audit_info: NetLabel audit information | ||
81 | * | 85 | * |
82 | * Description: | 86 | * Description: |
83 | * Process a user generated ADD message and add the domains from the message | 87 | * Helper function for the ADD and ADDDEF messages to add the domain mappings |
84 | * to the hash table. See netlabel.h for a description of the message format. | 88 | * from the message to the hash table. See netlabel.h for a description of the |
85 | * Returns zero on success, negative values on failure. | 89 | * message format. Returns zero on success, negative values on failure. |
86 | * | 90 | * |
87 | */ | 91 | */ |
88 | static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) | 92 | static int netlbl_mgmt_add_common(struct genl_info *info, |
93 | struct netlbl_audit *audit_info) | ||
89 | { | 94 | { |
90 | int ret_val = -EINVAL; | 95 | int ret_val = -EINVAL; |
91 | struct netlbl_dom_map *entry = NULL; | 96 | struct netlbl_dom_map *entry = NULL; |
92 | size_t tmp_size; | 97 | struct netlbl_domaddr_map *addrmap = NULL; |
98 | struct cipso_v4_doi *cipsov4 = NULL; | ||
93 | u32 tmp_val; | 99 | u32 tmp_val; |
94 | struct netlbl_audit audit_info; | ||
95 | |||
96 | if (!info->attrs[NLBL_MGMT_A_DOMAIN] || | ||
97 | !info->attrs[NLBL_MGMT_A_PROTOCOL]) | ||
98 | goto add_failure; | ||
99 | |||
100 | netlbl_netlink_auditinfo(skb, &audit_info); | ||
101 | 100 | ||
102 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | 101 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); |
103 | if (entry == NULL) { | 102 | if (entry == NULL) { |
104 | ret_val = -ENOMEM; | 103 | ret_val = -ENOMEM; |
105 | goto add_failure; | 104 | goto add_failure; |
106 | } | 105 | } |
107 | tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); | ||
108 | entry->domain = kmalloc(tmp_size, GFP_KERNEL); | ||
109 | if (entry->domain == NULL) { | ||
110 | ret_val = -ENOMEM; | ||
111 | goto add_failure; | ||
112 | } | ||
113 | entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); | 106 | entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); |
114 | nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); | 107 | if (info->attrs[NLBL_MGMT_A_DOMAIN]) { |
108 | size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); | ||
109 | entry->domain = kmalloc(tmp_size, GFP_KERNEL); | ||
110 | if (entry->domain == NULL) { | ||
111 | ret_val = -ENOMEM; | ||
112 | goto add_failure; | ||
113 | } | ||
114 | nla_strlcpy(entry->domain, | ||
115 | info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); | ||
116 | } | ||
117 | |||
118 | /* NOTE: internally we allow/use a entry->type value of | ||
119 | * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users | ||
120 | * to pass that as a protocol value because we need to know the | ||
121 | * "real" protocol */ | ||
115 | 122 | ||
116 | switch (entry->type) { | 123 | switch (entry->type) { |
117 | case NETLBL_NLTYPE_UNLABELED: | 124 | case NETLBL_NLTYPE_UNLABELED: |
118 | ret_val = netlbl_domhsh_add(entry, &audit_info); | ||
119 | break; | 125 | break; |
120 | case NETLBL_NLTYPE_CIPSOV4: | 126 | case NETLBL_NLTYPE_CIPSOV4: |
121 | if (!info->attrs[NLBL_MGMT_A_CV4DOI]) | 127 | if (!info->attrs[NLBL_MGMT_A_CV4DOI]) |
122 | goto add_failure; | 128 | goto add_failure; |
123 | 129 | ||
124 | tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); | 130 | tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); |
125 | /* We should be holding a rcu_read_lock() here while we hold | 131 | cipsov4 = cipso_v4_doi_getdef(tmp_val); |
126 | * the result but since the entry will always be deleted when | 132 | if (cipsov4 == NULL) |
127 | * the CIPSO DOI is deleted we aren't going to keep the | ||
128 | * lock. */ | ||
129 | rcu_read_lock(); | ||
130 | entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); | ||
131 | if (entry->type_def.cipsov4 == NULL) { | ||
132 | rcu_read_unlock(); | ||
133 | goto add_failure; | 133 | goto add_failure; |
134 | } | 134 | entry->type_def.cipsov4 = cipsov4; |
135 | ret_val = netlbl_domhsh_add(entry, &audit_info); | ||
136 | rcu_read_unlock(); | ||
137 | break; | 135 | break; |
138 | default: | 136 | default: |
139 | goto add_failure; | 137 | goto add_failure; |
140 | } | 138 | } |
139 | |||
140 | if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { | ||
141 | struct in_addr *addr; | ||
142 | struct in_addr *mask; | ||
143 | struct netlbl_domaddr4_map *map; | ||
144 | |||
145 | addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); | ||
146 | if (addrmap == NULL) { | ||
147 | ret_val = -ENOMEM; | ||
148 | goto add_failure; | ||
149 | } | ||
150 | INIT_LIST_HEAD(&addrmap->list4); | ||
151 | INIT_LIST_HEAD(&addrmap->list6); | ||
152 | |||
153 | if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != | ||
154 | sizeof(struct in_addr)) { | ||
155 | ret_val = -EINVAL; | ||
156 | goto add_failure; | ||
157 | } | ||
158 | if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != | ||
159 | sizeof(struct in_addr)) { | ||
160 | ret_val = -EINVAL; | ||
161 | goto add_failure; | ||
162 | } | ||
163 | addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); | ||
164 | mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); | ||
165 | |||
166 | map = kzalloc(sizeof(*map), GFP_KERNEL); | ||
167 | if (map == NULL) { | ||
168 | ret_val = -ENOMEM; | ||
169 | goto add_failure; | ||
170 | } | ||
171 | map->list.addr = addr->s_addr & mask->s_addr; | ||
172 | map->list.mask = mask->s_addr; | ||
173 | map->list.valid = 1; | ||
174 | map->type = entry->type; | ||
175 | if (cipsov4) | ||
176 | map->type_def.cipsov4 = cipsov4; | ||
177 | |||
178 | ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); | ||
179 | if (ret_val != 0) { | ||
180 | kfree(map); | ||
181 | goto add_failure; | ||
182 | } | ||
183 | |||
184 | entry->type = NETLBL_NLTYPE_ADDRSELECT; | ||
185 | entry->type_def.addrsel = addrmap; | ||
186 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
187 | } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { | ||
188 | struct in6_addr *addr; | ||
189 | struct in6_addr *mask; | ||
190 | struct netlbl_domaddr6_map *map; | ||
191 | |||
192 | addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); | ||
193 | if (addrmap == NULL) { | ||
194 | ret_val = -ENOMEM; | ||
195 | goto add_failure; | ||
196 | } | ||
197 | INIT_LIST_HEAD(&addrmap->list4); | ||
198 | INIT_LIST_HEAD(&addrmap->list6); | ||
199 | |||
200 | if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != | ||
201 | sizeof(struct in6_addr)) { | ||
202 | ret_val = -EINVAL; | ||
203 | goto add_failure; | ||
204 | } | ||
205 | if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != | ||
206 | sizeof(struct in6_addr)) { | ||
207 | ret_val = -EINVAL; | ||
208 | goto add_failure; | ||
209 | } | ||
210 | addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); | ||
211 | mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); | ||
212 | |||
213 | map = kzalloc(sizeof(*map), GFP_KERNEL); | ||
214 | if (map == NULL) { | ||
215 | ret_val = -ENOMEM; | ||
216 | goto add_failure; | ||
217 | } | ||
218 | ipv6_addr_copy(&map->list.addr, addr); | ||
219 | map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; | ||
220 | map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; | ||
221 | map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; | ||
222 | map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; | ||
223 | ipv6_addr_copy(&map->list.mask, mask); | ||
224 | map->list.valid = 1; | ||
225 | map->type = entry->type; | ||
226 | |||
227 | ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); | ||
228 | if (ret_val != 0) { | ||
229 | kfree(map); | ||
230 | goto add_failure; | ||
231 | } | ||
232 | |||
233 | entry->type = NETLBL_NLTYPE_ADDRSELECT; | ||
234 | entry->type_def.addrsel = addrmap; | ||
235 | #endif /* IPv6 */ | ||
236 | } | ||
237 | |||
238 | ret_val = netlbl_domhsh_add(entry, audit_info); | ||
141 | if (ret_val != 0) | 239 | if (ret_val != 0) |
142 | goto add_failure; | 240 | goto add_failure; |
143 | 241 | ||
144 | return 0; | 242 | return 0; |
145 | 243 | ||
146 | add_failure: | 244 | add_failure: |
245 | if (cipsov4) | ||
246 | cipso_v4_doi_putdef(cipsov4); | ||
147 | if (entry) | 247 | if (entry) |
148 | kfree(entry->domain); | 248 | kfree(entry->domain); |
249 | kfree(addrmap); | ||
149 | kfree(entry); | 250 | kfree(entry); |
150 | return ret_val; | 251 | return ret_val; |
151 | } | 252 | } |
152 | 253 | ||
153 | /** | 254 | /** |
255 | * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry | ||
256 | * @skb: the NETLINK buffer | ||
257 | * @entry: the map entry | ||
258 | * | ||
259 | * Description: | ||
260 | * This function is a helper function used by the LISTALL and LISTDEF command | ||
261 | * handlers. The caller is responsibile for ensuring that the RCU read lock | ||
262 | * is held. Returns zero on success, negative values on failure. | ||
263 | * | ||
264 | */ | ||
265 | static int netlbl_mgmt_listentry(struct sk_buff *skb, | ||
266 | struct netlbl_dom_map *entry) | ||
267 | { | ||
268 | int ret_val; | ||
269 | struct nlattr *nla_a; | ||
270 | struct nlattr *nla_b; | ||
271 | struct netlbl_af4list *iter4; | ||
272 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
273 | struct netlbl_af6list *iter6; | ||
274 | #endif | ||
275 | |||
276 | if (entry->domain != NULL) { | ||
277 | ret_val = nla_put_string(skb, | ||
278 | NLBL_MGMT_A_DOMAIN, entry->domain); | ||
279 | if (ret_val != 0) | ||
280 | return ret_val; | ||
281 | } | ||
282 | |||
283 | switch (entry->type) { | ||
284 | case NETLBL_NLTYPE_ADDRSELECT: | ||
285 | nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); | ||
286 | if (nla_a == NULL) | ||
287 | return -ENOMEM; | ||
288 | |||
289 | netlbl_af4list_foreach_rcu(iter4, | ||
290 | &entry->type_def.addrsel->list4) { | ||
291 | struct netlbl_domaddr4_map *map4; | ||
292 | struct in_addr addr_struct; | ||
293 | |||
294 | nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); | ||
295 | if (nla_b == NULL) | ||
296 | return -ENOMEM; | ||
297 | |||
298 | addr_struct.s_addr = iter4->addr; | ||
299 | ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, | ||
300 | sizeof(struct in_addr), | ||
301 | &addr_struct); | ||
302 | if (ret_val != 0) | ||
303 | return ret_val; | ||
304 | addr_struct.s_addr = iter4->mask; | ||
305 | ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, | ||
306 | sizeof(struct in_addr), | ||
307 | &addr_struct); | ||
308 | if (ret_val != 0) | ||
309 | return ret_val; | ||
310 | map4 = netlbl_domhsh_addr4_entry(iter4); | ||
311 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, | ||
312 | map4->type); | ||
313 | if (ret_val != 0) | ||
314 | return ret_val; | ||
315 | switch (map4->type) { | ||
316 | case NETLBL_NLTYPE_CIPSOV4: | ||
317 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, | ||
318 | map4->type_def.cipsov4->doi); | ||
319 | if (ret_val != 0) | ||
320 | return ret_val; | ||
321 | break; | ||
322 | } | ||
323 | |||
324 | nla_nest_end(skb, nla_b); | ||
325 | } | ||
326 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
327 | netlbl_af6list_foreach_rcu(iter6, | ||
328 | &entry->type_def.addrsel->list6) { | ||
329 | struct netlbl_domaddr6_map *map6; | ||
330 | |||
331 | nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); | ||
332 | if (nla_b == NULL) | ||
333 | return -ENOMEM; | ||
334 | |||
335 | ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, | ||
336 | sizeof(struct in6_addr), | ||
337 | &iter6->addr); | ||
338 | if (ret_val != 0) | ||
339 | return ret_val; | ||
340 | ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, | ||
341 | sizeof(struct in6_addr), | ||
342 | &iter6->mask); | ||
343 | if (ret_val != 0) | ||
344 | return ret_val; | ||
345 | map6 = netlbl_domhsh_addr6_entry(iter6); | ||
346 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, | ||
347 | map6->type); | ||
348 | if (ret_val != 0) | ||
349 | return ret_val; | ||
350 | |||
351 | nla_nest_end(skb, nla_b); | ||
352 | } | ||
353 | #endif /* IPv6 */ | ||
354 | |||
355 | nla_nest_end(skb, nla_a); | ||
356 | break; | ||
357 | case NETLBL_NLTYPE_UNLABELED: | ||
358 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); | ||
359 | break; | ||
360 | case NETLBL_NLTYPE_CIPSOV4: | ||
361 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); | ||
362 | if (ret_val != 0) | ||
363 | return ret_val; | ||
364 | ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, | ||
365 | entry->type_def.cipsov4->doi); | ||
366 | break; | ||
367 | } | ||
368 | |||
369 | return ret_val; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * NetLabel Command Handlers | ||
374 | */ | ||
375 | |||
376 | /** | ||
377 | * netlbl_mgmt_add - Handle an ADD message | ||
378 | * @skb: the NETLINK buffer | ||
379 | * @info: the Generic NETLINK info block | ||
380 | * | ||
381 | * Description: | ||
382 | * Process a user generated ADD message and add the domains from the message | ||
383 | * to the hash table. See netlabel.h for a description of the message format. | ||
384 | * Returns zero on success, negative values on failure. | ||
385 | * | ||
386 | */ | ||
387 | static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) | ||
388 | { | ||
389 | struct netlbl_audit audit_info; | ||
390 | |||
391 | if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) || | ||
392 | (!info->attrs[NLBL_MGMT_A_PROTOCOL]) || | ||
393 | (info->attrs[NLBL_MGMT_A_IPV4ADDR] && | ||
394 | info->attrs[NLBL_MGMT_A_IPV6ADDR]) || | ||
395 | (info->attrs[NLBL_MGMT_A_IPV4MASK] && | ||
396 | info->attrs[NLBL_MGMT_A_IPV6MASK]) || | ||
397 | ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ | ||
398 | (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || | ||
399 | ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ | ||
400 | (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) | ||
401 | return -EINVAL; | ||
402 | |||
403 | netlbl_netlink_auditinfo(skb, &audit_info); | ||
404 | |||
405 | return netlbl_mgmt_add_common(info, &audit_info); | ||
406 | } | ||
407 | |||
408 | /** | ||
154 | * netlbl_mgmt_remove - Handle a REMOVE message | 409 | * netlbl_mgmt_remove - Handle a REMOVE message |
155 | * @skb: the NETLINK buffer | 410 | * @skb: the NETLINK buffer |
156 | * @info: the Generic NETLINK info block | 411 | * @info: the Generic NETLINK info block |
@@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) | |||
198 | if (data == NULL) | 453 | if (data == NULL) |
199 | goto listall_cb_failure; | 454 | goto listall_cb_failure; |
200 | 455 | ||
201 | ret_val = nla_put_string(cb_arg->skb, | 456 | ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry); |
202 | NLBL_MGMT_A_DOMAIN, | ||
203 | entry->domain); | ||
204 | if (ret_val != 0) | 457 | if (ret_val != 0) |
205 | goto listall_cb_failure; | 458 | goto listall_cb_failure; |
206 | ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); | ||
207 | if (ret_val != 0) | ||
208 | goto listall_cb_failure; | ||
209 | switch (entry->type) { | ||
210 | case NETLBL_NLTYPE_CIPSOV4: | ||
211 | ret_val = nla_put_u32(cb_arg->skb, | ||
212 | NLBL_MGMT_A_CV4DOI, | ||
213 | entry->type_def.cipsov4->doi); | ||
214 | if (ret_val != 0) | ||
215 | goto listall_cb_failure; | ||
216 | break; | ||
217 | } | ||
218 | 459 | ||
219 | cb_arg->seq++; | 460 | cb_arg->seq++; |
220 | return genlmsg_end(cb_arg->skb, data); | 461 | return genlmsg_end(cb_arg->skb, data); |
@@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb, | |||
268 | */ | 509 | */ |
269 | static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) | 510 | static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) |
270 | { | 511 | { |
271 | int ret_val = -EINVAL; | ||
272 | struct netlbl_dom_map *entry = NULL; | ||
273 | u32 tmp_val; | ||
274 | struct netlbl_audit audit_info; | 512 | struct netlbl_audit audit_info; |
275 | 513 | ||
276 | if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) | 514 | if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) || |
277 | goto adddef_failure; | 515 | (info->attrs[NLBL_MGMT_A_IPV4ADDR] && |
516 | info->attrs[NLBL_MGMT_A_IPV6ADDR]) || | ||
517 | (info->attrs[NLBL_MGMT_A_IPV4MASK] && | ||
518 | info->attrs[NLBL_MGMT_A_IPV6MASK]) || | ||
519 | ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ | ||
520 | (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || | ||
521 | ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ | ||
522 | (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) | ||
523 | return -EINVAL; | ||
278 | 524 | ||
279 | netlbl_netlink_auditinfo(skb, &audit_info); | 525 | netlbl_netlink_auditinfo(skb, &audit_info); |
280 | 526 | ||
281 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | 527 | return netlbl_mgmt_add_common(info, &audit_info); |
282 | if (entry == NULL) { | ||
283 | ret_val = -ENOMEM; | ||
284 | goto adddef_failure; | ||
285 | } | ||
286 | entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); | ||
287 | |||
288 | switch (entry->type) { | ||
289 | case NETLBL_NLTYPE_UNLABELED: | ||
290 | ret_val = netlbl_domhsh_add_default(entry, &audit_info); | ||
291 | break; | ||
292 | case NETLBL_NLTYPE_CIPSOV4: | ||
293 | if (!info->attrs[NLBL_MGMT_A_CV4DOI]) | ||
294 | goto adddef_failure; | ||
295 | |||
296 | tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); | ||
297 | /* We should be holding a rcu_read_lock() here while we hold | ||
298 | * the result but since the entry will always be deleted when | ||
299 | * the CIPSO DOI is deleted we aren't going to keep the | ||
300 | * lock. */ | ||
301 | rcu_read_lock(); | ||
302 | entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); | ||
303 | if (entry->type_def.cipsov4 == NULL) { | ||
304 | rcu_read_unlock(); | ||
305 | goto adddef_failure; | ||
306 | } | ||
307 | ret_val = netlbl_domhsh_add_default(entry, &audit_info); | ||
308 | rcu_read_unlock(); | ||
309 | break; | ||
310 | default: | ||
311 | goto adddef_failure; | ||
312 | } | ||
313 | if (ret_val != 0) | ||
314 | goto adddef_failure; | ||
315 | |||
316 | return 0; | ||
317 | |||
318 | adddef_failure: | ||
319 | kfree(entry); | ||
320 | return ret_val; | ||
321 | } | 528 | } |
322 | 529 | ||
323 | /** | 530 | /** |
@@ -371,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) | |||
371 | ret_val = -ENOENT; | 578 | ret_val = -ENOENT; |
372 | goto listdef_failure_lock; | 579 | goto listdef_failure_lock; |
373 | } | 580 | } |
374 | ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); | 581 | ret_val = netlbl_mgmt_listentry(ans_skb, entry); |
375 | if (ret_val != 0) | ||
376 | goto listdef_failure_lock; | ||
377 | switch (entry->type) { | ||
378 | case NETLBL_NLTYPE_CIPSOV4: | ||
379 | ret_val = nla_put_u32(ans_skb, | ||
380 | NLBL_MGMT_A_CV4DOI, | ||
381 | entry->type_def.cipsov4->doi); | ||
382 | if (ret_val != 0) | ||
383 | goto listdef_failure_lock; | ||
384 | break; | ||
385 | } | ||
386 | rcu_read_unlock(); | 582 | rcu_read_unlock(); |
583 | if (ret_val != 0) | ||
584 | goto listdef_failure; | ||
387 | 585 | ||
388 | genlmsg_end(ans_skb, data); | 586 | genlmsg_end(ans_skb, data); |
389 | return genlmsg_reply(ans_skb, info); | 587 | return genlmsg_reply(ans_skb, info); |
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index a43bff169d6b..05d96431f819 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h | |||
@@ -45,6 +45,16 @@ | |||
45 | * NLBL_MGMT_A_DOMAIN | 45 | * NLBL_MGMT_A_DOMAIN |
46 | * NLBL_MGMT_A_PROTOCOL | 46 | * NLBL_MGMT_A_PROTOCOL |
47 | * | 47 | * |
48 | * If IPv4 is specified the following attributes are required: | ||
49 | * | ||
50 | * NLBL_MGMT_A_IPV4ADDR | ||
51 | * NLBL_MGMT_A_IPV4MASK | ||
52 | * | ||
53 | * If IPv6 is specified the following attributes are required: | ||
54 | * | ||
55 | * NLBL_MGMT_A_IPV6ADDR | ||
56 | * NLBL_MGMT_A_IPV6MASK | ||
57 | * | ||
48 | * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: | 58 | * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: |
49 | * | 59 | * |
50 | * NLBL_MGMT_A_CV4DOI | 60 | * NLBL_MGMT_A_CV4DOI |
@@ -68,13 +78,24 @@ | |||
68 | * Required attributes: | 78 | * Required attributes: |
69 | * | 79 | * |
70 | * NLBL_MGMT_A_DOMAIN | 80 | * NLBL_MGMT_A_DOMAIN |
81 | * | ||
82 | * If the IP address selectors are not used the following attribute is | ||
83 | * required: | ||
84 | * | ||
71 | * NLBL_MGMT_A_PROTOCOL | 85 | * NLBL_MGMT_A_PROTOCOL |
72 | * | 86 | * |
73 | * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: | 87 | * If the IP address selectors are used then the following attritbute is |
88 | * required: | ||
89 | * | ||
90 | * NLBL_MGMT_A_SELECTORLIST | ||
91 | * | ||
92 | * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following | ||
93 | * attributes are required: | ||
74 | * | 94 | * |
75 | * NLBL_MGMT_A_CV4DOI | 95 | * NLBL_MGMT_A_CV4DOI |
76 | * | 96 | * |
77 | * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. | 97 | * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other |
98 | * attributes are required. | ||
78 | * | 99 | * |
79 | * o ADDDEF: | 100 | * o ADDDEF: |
80 | * Sent by an application to set the default domain mapping for the NetLabel | 101 | * Sent by an application to set the default domain mapping for the NetLabel |
@@ -100,15 +121,23 @@ | |||
100 | * application there is no payload. On success the kernel should send a | 121 | * application there is no payload. On success the kernel should send a |
101 | * response using the following format. | 122 | * response using the following format. |
102 | * | 123 | * |
103 | * Required attributes: | 124 | * If the IP address selectors are not used the following attribute is |
125 | * required: | ||
104 | * | 126 | * |
105 | * NLBL_MGMT_A_PROTOCOL | 127 | * NLBL_MGMT_A_PROTOCOL |
106 | * | 128 | * |
107 | * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: | 129 | * If the IP address selectors are used then the following attritbute is |
130 | * required: | ||
131 | * | ||
132 | * NLBL_MGMT_A_SELECTORLIST | ||
133 | * | ||
134 | * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following | ||
135 | * attributes are required: | ||
108 | * | 136 | * |
109 | * NLBL_MGMT_A_CV4DOI | 137 | * NLBL_MGMT_A_CV4DOI |
110 | * | 138 | * |
111 | * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. | 139 | * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other |
140 | * attributes are required. | ||
112 | * | 141 | * |
113 | * o PROTOCOLS: | 142 | * o PROTOCOLS: |
114 | * Sent by an application to request a list of configured NetLabel protocols | 143 | * Sent by an application to request a list of configured NetLabel protocols |
@@ -162,6 +191,26 @@ enum { | |||
162 | NLBL_MGMT_A_CV4DOI, | 191 | NLBL_MGMT_A_CV4DOI, |
163 | /* (NLA_U32) | 192 | /* (NLA_U32) |
164 | * the CIPSOv4 DOI value */ | 193 | * the CIPSOv4 DOI value */ |
194 | NLBL_MGMT_A_IPV6ADDR, | ||
195 | /* (NLA_BINARY, struct in6_addr) | ||
196 | * an IPv6 address */ | ||
197 | NLBL_MGMT_A_IPV6MASK, | ||
198 | /* (NLA_BINARY, struct in6_addr) | ||
199 | * an IPv6 address mask */ | ||
200 | NLBL_MGMT_A_IPV4ADDR, | ||
201 | /* (NLA_BINARY, struct in_addr) | ||
202 | * an IPv4 address */ | ||
203 | NLBL_MGMT_A_IPV4MASK, | ||
204 | /* (NLA_BINARY, struct in_addr) | ||
205 | * and IPv4 address mask */ | ||
206 | NLBL_MGMT_A_ADDRSELECTOR, | ||
207 | /* (NLA_NESTED) | ||
208 | * an IP address selector, must contain an address, mask, and protocol | ||
209 | * attribute plus any protocol specific attributes */ | ||
210 | NLBL_MGMT_A_SELECTORLIST, | ||
211 | /* (NLA_NESTED) | ||
212 | * the selector list, there must be at least one | ||
213 | * NLBL_MGMT_A_ADDRSELECTOR attribute */ | ||
165 | __NLBL_MGMT_A_MAX, | 214 | __NLBL_MGMT_A_MAX, |
166 | }; | 215 | }; |
167 | #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) | 216 | #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) |
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 921c118ead89..e8a5c32b0f10 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c | |||
@@ -10,7 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007 | 13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008 |
14 | * | 14 | * |
15 | * This program is free software; you can redistribute it and/or modify | 15 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 16 | * it under the terms of the GNU General Public License as published by |
@@ -54,6 +54,7 @@ | |||
54 | #include <asm/atomic.h> | 54 | #include <asm/atomic.h> |
55 | 55 | ||
56 | #include "netlabel_user.h" | 56 | #include "netlabel_user.h" |
57 | #include "netlabel_addrlist.h" | ||
57 | #include "netlabel_domainhash.h" | 58 | #include "netlabel_domainhash.h" |
58 | #include "netlabel_unlabeled.h" | 59 | #include "netlabel_unlabeled.h" |
59 | #include "netlabel_mgmt.h" | 60 | #include "netlabel_mgmt.h" |
@@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl { | |||
76 | struct list_head *tbl; | 77 | struct list_head *tbl; |
77 | u32 size; | 78 | u32 size; |
78 | }; | 79 | }; |
80 | #define netlbl_unlhsh_addr4_entry(iter) \ | ||
81 | container_of(iter, struct netlbl_unlhsh_addr4, list) | ||
79 | struct netlbl_unlhsh_addr4 { | 82 | struct netlbl_unlhsh_addr4 { |
80 | __be32 addr; | ||
81 | __be32 mask; | ||
82 | u32 secid; | 83 | u32 secid; |
83 | 84 | ||
84 | u32 valid; | 85 | struct netlbl_af4list list; |
85 | struct list_head list; | ||
86 | struct rcu_head rcu; | 86 | struct rcu_head rcu; |
87 | }; | 87 | }; |
88 | #define netlbl_unlhsh_addr6_entry(iter) \ | ||
89 | container_of(iter, struct netlbl_unlhsh_addr6, list) | ||
88 | struct netlbl_unlhsh_addr6 { | 90 | struct netlbl_unlhsh_addr6 { |
89 | struct in6_addr addr; | ||
90 | struct in6_addr mask; | ||
91 | u32 secid; | 91 | u32 secid; |
92 | 92 | ||
93 | u32 valid; | 93 | struct netlbl_af6list list; |
94 | struct list_head list; | ||
95 | struct rcu_head rcu; | 94 | struct rcu_head rcu; |
96 | }; | 95 | }; |
97 | struct netlbl_unlhsh_iface { | 96 | struct netlbl_unlhsh_iface { |
@@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1 | |||
147 | }; | 146 | }; |
148 | 147 | ||
149 | /* | 148 | /* |
150 | * Audit Helper Functions | ||
151 | */ | ||
152 | |||
153 | /** | ||
154 | * netlbl_unlabel_audit_addr4 - Audit an IPv4 address | ||
155 | * @audit_buf: audit buffer | ||
156 | * @dev: network interface | ||
157 | * @addr: IP address | ||
158 | * @mask: IP address mask | ||
159 | * | ||
160 | * Description: | ||
161 | * Write the IPv4 address and address mask, if necessary, to @audit_buf. | ||
162 | * | ||
163 | */ | ||
164 | static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, | ||
165 | const char *dev, | ||
166 | __be32 addr, __be32 mask) | ||
167 | { | ||
168 | u32 mask_val = ntohl(mask); | ||
169 | |||
170 | if (dev != NULL) | ||
171 | audit_log_format(audit_buf, " netif=%s", dev); | ||
172 | audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr)); | ||
173 | if (mask_val != 0xffffffff) { | ||
174 | u32 mask_len = 0; | ||
175 | while (mask_val > 0) { | ||
176 | mask_val <<= 1; | ||
177 | mask_len++; | ||
178 | } | ||
179 | audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
184 | /** | ||
185 | * netlbl_unlabel_audit_addr6 - Audit an IPv6 address | ||
186 | * @audit_buf: audit buffer | ||
187 | * @dev: network interface | ||
188 | * @addr: IP address | ||
189 | * @mask: IP address mask | ||
190 | * | ||
191 | * Description: | ||
192 | * Write the IPv6 address and address mask, if necessary, to @audit_buf. | ||
193 | * | ||
194 | */ | ||
195 | static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, | ||
196 | const char *dev, | ||
197 | const struct in6_addr *addr, | ||
198 | const struct in6_addr *mask) | ||
199 | { | ||
200 | if (dev != NULL) | ||
201 | audit_log_format(audit_buf, " netif=%s", dev); | ||
202 | audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr)); | ||
203 | if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { | ||
204 | u32 mask_len = 0; | ||
205 | u32 mask_val; | ||
206 | int iter = -1; | ||
207 | while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) | ||
208 | mask_len += 32; | ||
209 | mask_val = ntohl(mask->s6_addr32[iter]); | ||
210 | while (mask_val > 0) { | ||
211 | mask_val <<= 1; | ||
212 | mask_len++; | ||
213 | } | ||
214 | audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); | ||
215 | } | ||
216 | } | ||
217 | #endif /* IPv6 */ | ||
218 | |||
219 | /* | ||
220 | * Unlabeled Connection Hash Table Functions | 149 | * Unlabeled Connection Hash Table Functions |
221 | */ | 150 | */ |
222 | 151 | ||
@@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry) | |||
274 | static void netlbl_unlhsh_free_iface(struct rcu_head *entry) | 203 | static void netlbl_unlhsh_free_iface(struct rcu_head *entry) |
275 | { | 204 | { |
276 | struct netlbl_unlhsh_iface *iface; | 205 | struct netlbl_unlhsh_iface *iface; |
277 | struct netlbl_unlhsh_addr4 *iter4; | 206 | struct netlbl_af4list *iter4; |
278 | struct netlbl_unlhsh_addr4 *tmp4; | 207 | struct netlbl_af4list *tmp4; |
279 | struct netlbl_unlhsh_addr6 *iter6; | 208 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
280 | struct netlbl_unlhsh_addr6 *tmp6; | 209 | struct netlbl_af6list *iter6; |
210 | struct netlbl_af6list *tmp6; | ||
211 | #endif /* IPv6 */ | ||
281 | 212 | ||
282 | iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); | 213 | iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); |
283 | 214 | ||
284 | /* no need for locks here since we are the only one with access to this | 215 | /* no need for locks here since we are the only one with access to this |
285 | * structure */ | 216 | * structure */ |
286 | 217 | ||
287 | list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list) | 218 | netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) { |
288 | if (iter4->valid) { | 219 | netlbl_af4list_remove_entry(iter4); |
289 | list_del_rcu(&iter4->list); | 220 | kfree(netlbl_unlhsh_addr4_entry(iter4)); |
290 | kfree(iter4); | 221 | } |
291 | } | 222 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
292 | list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list) | 223 | netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { |
293 | if (iter6->valid) { | 224 | netlbl_af6list_remove_entry(iter6); |
294 | list_del_rcu(&iter6->list); | 225 | kfree(netlbl_unlhsh_addr6_entry(iter6)); |
295 | kfree(iter6); | 226 | } |
296 | } | 227 | #endif /* IPv6 */ |
297 | kfree(iface); | 228 | kfree(iface); |
298 | } | 229 | } |
299 | 230 | ||
@@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex) | |||
316 | } | 247 | } |
317 | 248 | ||
318 | /** | 249 | /** |
319 | * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry | ||
320 | * @addr: IPv4 address | ||
321 | * @iface: the network interface entry | ||
322 | * | ||
323 | * Description: | ||
324 | * Searches the IPv4 address list of the network interface specified by @iface. | ||
325 | * If a matching address entry is found it is returned, otherwise NULL is | ||
326 | * returned. The caller is responsible for calling the rcu_read_[un]lock() | ||
327 | * functions. | ||
328 | * | ||
329 | */ | ||
330 | static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4( | ||
331 | __be32 addr, | ||
332 | const struct netlbl_unlhsh_iface *iface) | ||
333 | { | ||
334 | struct netlbl_unlhsh_addr4 *iter; | ||
335 | |||
336 | list_for_each_entry_rcu(iter, &iface->addr4_list, list) | ||
337 | if (iter->valid && (addr & iter->mask) == iter->addr) | ||
338 | return iter; | ||
339 | |||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
344 | /** | ||
345 | * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry | ||
346 | * @addr: IPv6 address | ||
347 | * @iface: the network interface entry | ||
348 | * | ||
349 | * Description: | ||
350 | * Searches the IPv6 address list of the network interface specified by @iface. | ||
351 | * If a matching address entry is found it is returned, otherwise NULL is | ||
352 | * returned. The caller is responsible for calling the rcu_read_[un]lock() | ||
353 | * functions. | ||
354 | * | ||
355 | */ | ||
356 | static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( | ||
357 | const struct in6_addr *addr, | ||
358 | const struct netlbl_unlhsh_iface *iface) | ||
359 | { | ||
360 | struct netlbl_unlhsh_addr6 *iter; | ||
361 | |||
362 | list_for_each_entry_rcu(iter, &iface->addr6_list, list) | ||
363 | if (iter->valid && | ||
364 | ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) | ||
365 | return iter; | ||
366 | |||
367 | return NULL; | ||
368 | } | ||
369 | #endif /* IPv6 */ | ||
370 | |||
371 | /** | ||
372 | * netlbl_unlhsh_search_iface - Search for a matching interface entry | 250 | * netlbl_unlhsh_search_iface - Search for a matching interface entry |
373 | * @ifindex: the network interface | 251 | * @ifindex: the network interface |
374 | * | 252 | * |
@@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( | |||
381 | static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) | 259 | static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) |
382 | { | 260 | { |
383 | u32 bkt; | 261 | u32 bkt; |
262 | struct list_head *bkt_list; | ||
384 | struct netlbl_unlhsh_iface *iter; | 263 | struct netlbl_unlhsh_iface *iter; |
385 | 264 | ||
386 | bkt = netlbl_unlhsh_hash(ifindex); | 265 | bkt = netlbl_unlhsh_hash(ifindex); |
387 | list_for_each_entry_rcu(iter, | 266 | bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt]; |
388 | &rcu_dereference(netlbl_unlhsh)->tbl[bkt], | 267 | list_for_each_entry_rcu(iter, bkt_list, list) |
389 | list) | ||
390 | if (iter->valid && iter->ifindex == ifindex) | 268 | if (iter->valid && iter->ifindex == ifindex) |
391 | return iter; | 269 | return iter; |
392 | 270 | ||
@@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, | |||
439 | const struct in_addr *mask, | 317 | const struct in_addr *mask, |
440 | u32 secid) | 318 | u32 secid) |
441 | { | 319 | { |
320 | int ret_val; | ||
442 | struct netlbl_unlhsh_addr4 *entry; | 321 | struct netlbl_unlhsh_addr4 *entry; |
443 | struct netlbl_unlhsh_addr4 *iter; | ||
444 | 322 | ||
445 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | 323 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); |
446 | if (entry == NULL) | 324 | if (entry == NULL) |
447 | return -ENOMEM; | 325 | return -ENOMEM; |
448 | 326 | ||
449 | entry->addr = addr->s_addr & mask->s_addr; | 327 | entry->list.addr = addr->s_addr & mask->s_addr; |
450 | entry->mask = mask->s_addr; | 328 | entry->list.mask = mask->s_addr; |
451 | entry->secid = secid; | 329 | entry->list.valid = 1; |
452 | entry->valid = 1; | ||
453 | INIT_RCU_HEAD(&entry->rcu); | 330 | INIT_RCU_HEAD(&entry->rcu); |
331 | entry->secid = secid; | ||
454 | 332 | ||
455 | spin_lock(&netlbl_unlhsh_lock); | 333 | spin_lock(&netlbl_unlhsh_lock); |
456 | iter = netlbl_unlhsh_search_addr4(entry->addr, iface); | 334 | ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list); |
457 | if (iter != NULL && | ||
458 | iter->addr == addr->s_addr && iter->mask == mask->s_addr) { | ||
459 | spin_unlock(&netlbl_unlhsh_lock); | ||
460 | kfree(entry); | ||
461 | return -EEXIST; | ||
462 | } | ||
463 | /* in order to speed up address searches through the list (the common | ||
464 | * case) we need to keep the list in order based on the size of the | ||
465 | * address mask such that the entry with the widest mask (smallest | ||
466 | * numerical value) appears first in the list */ | ||
467 | list_for_each_entry_rcu(iter, &iface->addr4_list, list) | ||
468 | if (iter->valid && | ||
469 | ntohl(entry->mask) > ntohl(iter->mask)) { | ||
470 | __list_add_rcu(&entry->list, | ||
471 | iter->list.prev, | ||
472 | &iter->list); | ||
473 | spin_unlock(&netlbl_unlhsh_lock); | ||
474 | return 0; | ||
475 | } | ||
476 | list_add_tail_rcu(&entry->list, &iface->addr4_list); | ||
477 | spin_unlock(&netlbl_unlhsh_lock); | 335 | spin_unlock(&netlbl_unlhsh_lock); |
478 | return 0; | 336 | |
337 | if (ret_val != 0) | ||
338 | kfree(entry); | ||
339 | return ret_val; | ||
479 | } | 340 | } |
480 | 341 | ||
481 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 342 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
@@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, | |||
498 | const struct in6_addr *mask, | 359 | const struct in6_addr *mask, |
499 | u32 secid) | 360 | u32 secid) |
500 | { | 361 | { |
362 | int ret_val; | ||
501 | struct netlbl_unlhsh_addr6 *entry; | 363 | struct netlbl_unlhsh_addr6 *entry; |
502 | struct netlbl_unlhsh_addr6 *iter; | ||
503 | 364 | ||
504 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | 365 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); |
505 | if (entry == NULL) | 366 | if (entry == NULL) |
506 | return -ENOMEM; | 367 | return -ENOMEM; |
507 | 368 | ||
508 | ipv6_addr_copy(&entry->addr, addr); | 369 | ipv6_addr_copy(&entry->list.addr, addr); |
509 | entry->addr.s6_addr32[0] &= mask->s6_addr32[0]; | 370 | entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; |
510 | entry->addr.s6_addr32[1] &= mask->s6_addr32[1]; | 371 | entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; |
511 | entry->addr.s6_addr32[2] &= mask->s6_addr32[2]; | 372 | entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; |
512 | entry->addr.s6_addr32[3] &= mask->s6_addr32[3]; | 373 | entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; |
513 | ipv6_addr_copy(&entry->mask, mask); | 374 | ipv6_addr_copy(&entry->list.mask, mask); |
514 | entry->secid = secid; | 375 | entry->list.valid = 1; |
515 | entry->valid = 1; | ||
516 | INIT_RCU_HEAD(&entry->rcu); | 376 | INIT_RCU_HEAD(&entry->rcu); |
377 | entry->secid = secid; | ||
517 | 378 | ||
518 | spin_lock(&netlbl_unlhsh_lock); | 379 | spin_lock(&netlbl_unlhsh_lock); |
519 | iter = netlbl_unlhsh_search_addr6(&entry->addr, iface); | 380 | ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list); |
520 | if (iter != NULL && | ||
521 | (ipv6_addr_equal(&iter->addr, addr) && | ||
522 | ipv6_addr_equal(&iter->mask, mask))) { | ||
523 | spin_unlock(&netlbl_unlhsh_lock); | ||
524 | kfree(entry); | ||
525 | return -EEXIST; | ||
526 | } | ||
527 | /* in order to speed up address searches through the list (the common | ||
528 | * case) we need to keep the list in order based on the size of the | ||
529 | * address mask such that the entry with the widest mask (smallest | ||
530 | * numerical value) appears first in the list */ | ||
531 | list_for_each_entry_rcu(iter, &iface->addr6_list, list) | ||
532 | if (iter->valid && | ||
533 | ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { | ||
534 | __list_add_rcu(&entry->list, | ||
535 | iter->list.prev, | ||
536 | &iter->list); | ||
537 | spin_unlock(&netlbl_unlhsh_lock); | ||
538 | return 0; | ||
539 | } | ||
540 | list_add_tail_rcu(&entry->list, &iface->addr6_list); | ||
541 | spin_unlock(&netlbl_unlhsh_lock); | 381 | spin_unlock(&netlbl_unlhsh_lock); |
382 | |||
383 | if (ret_val != 0) | ||
384 | kfree(entry); | ||
542 | return 0; | 385 | return 0; |
543 | } | 386 | } |
544 | #endif /* IPv6 */ | 387 | #endif /* IPv6 */ |
@@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net, | |||
658 | mask4 = (struct in_addr *)mask; | 501 | mask4 = (struct in_addr *)mask; |
659 | ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); | 502 | ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); |
660 | if (audit_buf != NULL) | 503 | if (audit_buf != NULL) |
661 | netlbl_unlabel_audit_addr4(audit_buf, | 504 | netlbl_af4list_audit_addr(audit_buf, 1, |
662 | dev_name, | 505 | dev_name, |
663 | addr4->s_addr, | 506 | addr4->s_addr, |
664 | mask4->s_addr); | 507 | mask4->s_addr); |
665 | break; | 508 | break; |
666 | } | 509 | } |
667 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 510 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
@@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net, | |||
672 | mask6 = (struct in6_addr *)mask; | 515 | mask6 = (struct in6_addr *)mask; |
673 | ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); | 516 | ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); |
674 | if (audit_buf != NULL) | 517 | if (audit_buf != NULL) |
675 | netlbl_unlabel_audit_addr6(audit_buf, | 518 | netlbl_af6list_audit_addr(audit_buf, 1, |
676 | dev_name, | 519 | dev_name, |
677 | addr6, mask6); | 520 | addr6, mask6); |
678 | break; | 521 | break; |
679 | } | 522 | } |
680 | #endif /* IPv6 */ | 523 | #endif /* IPv6 */ |
@@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, | |||
719 | const struct in_addr *mask, | 562 | const struct in_addr *mask, |
720 | struct netlbl_audit *audit_info) | 563 | struct netlbl_audit *audit_info) |
721 | { | 564 | { |
722 | int ret_val = -ENOENT; | 565 | int ret_val = 0; |
566 | struct netlbl_af4list *list_entry; | ||
723 | struct netlbl_unlhsh_addr4 *entry; | 567 | struct netlbl_unlhsh_addr4 *entry; |
724 | struct audit_buffer *audit_buf = NULL; | 568 | struct audit_buffer *audit_buf; |
725 | struct net_device *dev; | 569 | struct net_device *dev; |
726 | char *secctx = NULL; | 570 | char *secctx; |
727 | u32 secctx_len; | 571 | u32 secctx_len; |
728 | 572 | ||
729 | spin_lock(&netlbl_unlhsh_lock); | 573 | spin_lock(&netlbl_unlhsh_lock); |
730 | entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface); | 574 | list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, |
731 | if (entry != NULL && | 575 | &iface->addr4_list); |
732 | entry->addr == addr->s_addr && entry->mask == mask->s_addr) { | ||
733 | entry->valid = 0; | ||
734 | list_del_rcu(&entry->list); | ||
735 | ret_val = 0; | ||
736 | } | ||
737 | spin_unlock(&netlbl_unlhsh_lock); | 576 | spin_unlock(&netlbl_unlhsh_lock); |
577 | if (list_entry == NULL) | ||
578 | ret_val = -ENOENT; | ||
579 | entry = netlbl_unlhsh_addr4_entry(list_entry); | ||
738 | 580 | ||
739 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, | 581 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, |
740 | audit_info); | 582 | audit_info); |
741 | if (audit_buf != NULL) { | 583 | if (audit_buf != NULL) { |
742 | dev = dev_get_by_index(net, iface->ifindex); | 584 | dev = dev_get_by_index(net, iface->ifindex); |
743 | netlbl_unlabel_audit_addr4(audit_buf, | 585 | netlbl_af4list_audit_addr(audit_buf, 1, |
744 | (dev != NULL ? dev->name : NULL), | 586 | (dev != NULL ? dev->name : NULL), |
745 | entry->addr, entry->mask); | 587 | addr->s_addr, mask->s_addr); |
746 | if (dev != NULL) | 588 | if (dev != NULL) |
747 | dev_put(dev); | 589 | dev_put(dev); |
748 | if (security_secid_to_secctx(entry->secid, | 590 | if (entry && security_secid_to_secctx(entry->secid, |
749 | &secctx, | 591 | &secctx, |
750 | &secctx_len) == 0) { | 592 | &secctx_len) == 0) { |
751 | audit_log_format(audit_buf, " sec_obj=%s", secctx); | 593 | audit_log_format(audit_buf, " sec_obj=%s", secctx); |
752 | security_release_secctx(secctx, secctx_len); | 594 | security_release_secctx(secctx, secctx_len); |
753 | } | 595 | } |
@@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, | |||
781 | const struct in6_addr *mask, | 623 | const struct in6_addr *mask, |
782 | struct netlbl_audit *audit_info) | 624 | struct netlbl_audit *audit_info) |
783 | { | 625 | { |
784 | int ret_val = -ENOENT; | 626 | int ret_val = 0; |
627 | struct netlbl_af6list *list_entry; | ||
785 | struct netlbl_unlhsh_addr6 *entry; | 628 | struct netlbl_unlhsh_addr6 *entry; |
786 | struct audit_buffer *audit_buf = NULL; | 629 | struct audit_buffer *audit_buf; |
787 | struct net_device *dev; | 630 | struct net_device *dev; |
788 | char *secctx = NULL; | 631 | char *secctx; |
789 | u32 secctx_len; | 632 | u32 secctx_len; |
790 | 633 | ||
791 | spin_lock(&netlbl_unlhsh_lock); | 634 | spin_lock(&netlbl_unlhsh_lock); |
792 | entry = netlbl_unlhsh_search_addr6(addr, iface); | 635 | list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); |
793 | if (entry != NULL && | ||
794 | (ipv6_addr_equal(&entry->addr, addr) && | ||
795 | ipv6_addr_equal(&entry->mask, mask))) { | ||
796 | entry->valid = 0; | ||
797 | list_del_rcu(&entry->list); | ||
798 | ret_val = 0; | ||
799 | } | ||
800 | spin_unlock(&netlbl_unlhsh_lock); | 636 | spin_unlock(&netlbl_unlhsh_lock); |
637 | if (list_entry == NULL) | ||
638 | ret_val = -ENOENT; | ||
639 | entry = netlbl_unlhsh_addr6_entry(list_entry); | ||
801 | 640 | ||
802 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, | 641 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, |
803 | audit_info); | 642 | audit_info); |
804 | if (audit_buf != NULL) { | 643 | if (audit_buf != NULL) { |
805 | dev = dev_get_by_index(net, iface->ifindex); | 644 | dev = dev_get_by_index(net, iface->ifindex); |
806 | netlbl_unlabel_audit_addr6(audit_buf, | 645 | netlbl_af6list_audit_addr(audit_buf, 1, |
807 | (dev != NULL ? dev->name : NULL), | 646 | (dev != NULL ? dev->name : NULL), |
808 | addr, mask); | 647 | addr, mask); |
809 | if (dev != NULL) | 648 | if (dev != NULL) |
810 | dev_put(dev); | 649 | dev_put(dev); |
811 | if (security_secid_to_secctx(entry->secid, | 650 | if (entry && security_secid_to_secctx(entry->secid, |
812 | &secctx, | 651 | &secctx, |
813 | &secctx_len) == 0) { | 652 | &secctx_len) == 0) { |
814 | audit_log_format(audit_buf, " sec_obj=%s", secctx); | 653 | audit_log_format(audit_buf, " sec_obj=%s", secctx); |
815 | security_release_secctx(secctx, secctx_len); | 654 | security_release_secctx(secctx, secctx_len); |
816 | } | 655 | } |
@@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, | |||
836 | */ | 675 | */ |
837 | static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) | 676 | static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) |
838 | { | 677 | { |
839 | struct netlbl_unlhsh_addr4 *iter4; | 678 | struct netlbl_af4list *iter4; |
840 | struct netlbl_unlhsh_addr6 *iter6; | 679 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
680 | struct netlbl_af6list *iter6; | ||
681 | #endif /* IPv6 */ | ||
841 | 682 | ||
842 | spin_lock(&netlbl_unlhsh_lock); | 683 | spin_lock(&netlbl_unlhsh_lock); |
843 | list_for_each_entry_rcu(iter4, &iface->addr4_list, list) | 684 | netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) |
844 | if (iter4->valid) | 685 | goto unlhsh_condremove_failure; |
845 | goto unlhsh_condremove_failure; | 686 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
846 | list_for_each_entry_rcu(iter6, &iface->addr6_list, list) | 687 | netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) |
847 | if (iter6->valid) | 688 | goto unlhsh_condremove_failure; |
848 | goto unlhsh_condremove_failure; | 689 | #endif /* IPv6 */ |
849 | iface->valid = 0; | 690 | iface->valid = 0; |
850 | if (iface->ifindex > 0) | 691 | if (iface->ifindex > 0) |
851 | list_del_rcu(&iface->list); | 692 | list_del_rcu(&iface->list); |
@@ -1349,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, | |||
1349 | if (addr4) { | 1190 | if (addr4) { |
1350 | struct in_addr addr_struct; | 1191 | struct in_addr addr_struct; |
1351 | 1192 | ||
1352 | addr_struct.s_addr = addr4->addr; | 1193 | addr_struct.s_addr = addr4->list.addr; |
1353 | ret_val = nla_put(cb_arg->skb, | 1194 | ret_val = nla_put(cb_arg->skb, |
1354 | NLBL_UNLABEL_A_IPV4ADDR, | 1195 | NLBL_UNLABEL_A_IPV4ADDR, |
1355 | sizeof(struct in_addr), | 1196 | sizeof(struct in_addr), |
@@ -1357,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, | |||
1357 | if (ret_val != 0) | 1198 | if (ret_val != 0) |
1358 | goto list_cb_failure; | 1199 | goto list_cb_failure; |
1359 | 1200 | ||
1360 | addr_struct.s_addr = addr4->mask; | 1201 | addr_struct.s_addr = addr4->list.mask; |
1361 | ret_val = nla_put(cb_arg->skb, | 1202 | ret_val = nla_put(cb_arg->skb, |
1362 | NLBL_UNLABEL_A_IPV4MASK, | 1203 | NLBL_UNLABEL_A_IPV4MASK, |
1363 | sizeof(struct in_addr), | 1204 | sizeof(struct in_addr), |
@@ -1370,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, | |||
1370 | ret_val = nla_put(cb_arg->skb, | 1211 | ret_val = nla_put(cb_arg->skb, |
1371 | NLBL_UNLABEL_A_IPV6ADDR, | 1212 | NLBL_UNLABEL_A_IPV6ADDR, |
1372 | sizeof(struct in6_addr), | 1213 | sizeof(struct in6_addr), |
1373 | &addr6->addr); | 1214 | &addr6->list.addr); |
1374 | if (ret_val != 0) | 1215 | if (ret_val != 0) |
1375 | goto list_cb_failure; | 1216 | goto list_cb_failure; |
1376 | 1217 | ||
1377 | ret_val = nla_put(cb_arg->skb, | 1218 | ret_val = nla_put(cb_arg->skb, |
1378 | NLBL_UNLABEL_A_IPV6MASK, | 1219 | NLBL_UNLABEL_A_IPV6MASK, |
1379 | sizeof(struct in6_addr), | 1220 | sizeof(struct in6_addr), |
1380 | &addr6->mask); | 1221 | &addr6->list.mask); |
1381 | if (ret_val != 0) | 1222 | if (ret_val != 0) |
1382 | goto list_cb_failure; | 1223 | goto list_cb_failure; |
1383 | 1224 | ||
@@ -1425,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, | |||
1425 | u32 iter_bkt; | 1266 | u32 iter_bkt; |
1426 | u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; | 1267 | u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; |
1427 | struct netlbl_unlhsh_iface *iface; | 1268 | struct netlbl_unlhsh_iface *iface; |
1428 | struct netlbl_unlhsh_addr4 *addr4; | 1269 | struct list_head *iter_list; |
1429 | struct netlbl_unlhsh_addr6 *addr6; | 1270 | struct netlbl_af4list *addr4; |
1271 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
1272 | struct netlbl_af6list *addr6; | ||
1273 | #endif | ||
1430 | 1274 | ||
1431 | cb_arg.nl_cb = cb; | 1275 | cb_arg.nl_cb = cb; |
1432 | cb_arg.skb = skb; | 1276 | cb_arg.skb = skb; |
@@ -1436,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, | |||
1436 | for (iter_bkt = skip_bkt; | 1280 | for (iter_bkt = skip_bkt; |
1437 | iter_bkt < rcu_dereference(netlbl_unlhsh)->size; | 1281 | iter_bkt < rcu_dereference(netlbl_unlhsh)->size; |
1438 | iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { | 1282 | iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { |
1439 | list_for_each_entry_rcu(iface, | 1283 | iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; |
1440 | &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt], | 1284 | list_for_each_entry_rcu(iface, iter_list, list) { |
1441 | list) { | ||
1442 | if (!iface->valid || | 1285 | if (!iface->valid || |
1443 | iter_chain++ < skip_chain) | 1286 | iter_chain++ < skip_chain) |
1444 | continue; | 1287 | continue; |
1445 | list_for_each_entry_rcu(addr4, | 1288 | netlbl_af4list_foreach_rcu(addr4, |
1446 | &iface->addr4_list, | 1289 | &iface->addr4_list) { |
1447 | list) { | 1290 | if (iter_addr4++ < skip_addr4) |
1448 | if (!addr4->valid || iter_addr4++ < skip_addr4) | ||
1449 | continue; | 1291 | continue; |
1450 | if (netlbl_unlabel_staticlist_gen( | 1292 | if (netlbl_unlabel_staticlist_gen( |
1451 | NLBL_UNLABEL_C_STATICLIST, | 1293 | NLBL_UNLABEL_C_STATICLIST, |
1452 | iface, | 1294 | iface, |
1453 | addr4, | 1295 | netlbl_unlhsh_addr4_entry(addr4), |
1454 | NULL, | 1296 | NULL, |
1455 | &cb_arg) < 0) { | 1297 | &cb_arg) < 0) { |
1456 | iter_addr4--; | 1298 | iter_addr4--; |
1457 | iter_chain--; | 1299 | iter_chain--; |
1458 | goto unlabel_staticlist_return; | 1300 | goto unlabel_staticlist_return; |
1459 | } | 1301 | } |
1460 | } | 1302 | } |
1461 | list_for_each_entry_rcu(addr6, | 1303 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1462 | &iface->addr6_list, | 1304 | netlbl_af6list_foreach_rcu(addr6, |
1463 | list) { | 1305 | &iface->addr6_list) { |
1464 | if (!addr6->valid || iter_addr6++ < skip_addr6) | 1306 | if (iter_addr6++ < skip_addr6) |
1465 | continue; | 1307 | continue; |
1466 | if (netlbl_unlabel_staticlist_gen( | 1308 | if (netlbl_unlabel_staticlist_gen( |
1467 | NLBL_UNLABEL_C_STATICLIST, | 1309 | NLBL_UNLABEL_C_STATICLIST, |
1468 | iface, | 1310 | iface, |
1469 | NULL, | 1311 | NULL, |
1470 | addr6, | 1312 | netlbl_unlhsh_addr6_entry(addr6), |
1471 | &cb_arg) < 0) { | 1313 | &cb_arg) < 0) { |
1472 | iter_addr6--; | 1314 | iter_addr6--; |
1473 | iter_chain--; | 1315 | iter_chain--; |
1474 | goto unlabel_staticlist_return; | 1316 | goto unlabel_staticlist_return; |
1475 | } | 1317 | } |
1476 | } | 1318 | } |
1319 | #endif /* IPv6 */ | ||
1477 | } | 1320 | } |
1478 | } | 1321 | } |
1479 | 1322 | ||
@@ -1504,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, | |||
1504 | struct netlbl_unlhsh_iface *iface; | 1347 | struct netlbl_unlhsh_iface *iface; |
1505 | u32 skip_addr4 = cb->args[0]; | 1348 | u32 skip_addr4 = cb->args[0]; |
1506 | u32 skip_addr6 = cb->args[1]; | 1349 | u32 skip_addr6 = cb->args[1]; |
1507 | u32 iter_addr4 = 0, iter_addr6 = 0; | 1350 | u32 iter_addr4 = 0; |
1508 | struct netlbl_unlhsh_addr4 *addr4; | 1351 | struct netlbl_af4list *addr4; |
1509 | struct netlbl_unlhsh_addr6 *addr6; | 1352 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1353 | u32 iter_addr6 = 0; | ||
1354 | struct netlbl_af6list *addr6; | ||
1355 | #endif | ||
1510 | 1356 | ||
1511 | cb_arg.nl_cb = cb; | 1357 | cb_arg.nl_cb = cb; |
1512 | cb_arg.skb = skb; | 1358 | cb_arg.skb = skb; |
@@ -1517,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, | |||
1517 | if (iface == NULL || !iface->valid) | 1363 | if (iface == NULL || !iface->valid) |
1518 | goto unlabel_staticlistdef_return; | 1364 | goto unlabel_staticlistdef_return; |
1519 | 1365 | ||
1520 | list_for_each_entry_rcu(addr4, &iface->addr4_list, list) { | 1366 | netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { |
1521 | if (!addr4->valid || iter_addr4++ < skip_addr4) | 1367 | if (iter_addr4++ < skip_addr4) |
1522 | continue; | 1368 | continue; |
1523 | if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, | 1369 | if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, |
1524 | iface, | 1370 | iface, |
1525 | addr4, | 1371 | netlbl_unlhsh_addr4_entry(addr4), |
1526 | NULL, | 1372 | NULL, |
1527 | &cb_arg) < 0) { | 1373 | &cb_arg) < 0) { |
1528 | iter_addr4--; | 1374 | iter_addr4--; |
1529 | goto unlabel_staticlistdef_return; | 1375 | goto unlabel_staticlistdef_return; |
1530 | } | 1376 | } |
1531 | } | 1377 | } |
1532 | list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { | 1378 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1533 | if (!addr6->valid || iter_addr6++ < skip_addr6) | 1379 | netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { |
1380 | if (iter_addr6++ < skip_addr6) | ||
1534 | continue; | 1381 | continue; |
1535 | if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, | 1382 | if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, |
1536 | iface, | 1383 | iface, |
1537 | NULL, | 1384 | NULL, |
1538 | addr6, | 1385 | netlbl_unlhsh_addr6_entry(addr6), |
1539 | &cb_arg) < 0) { | 1386 | &cb_arg) < 0) { |
1540 | iter_addr6--; | 1387 | iter_addr6--; |
1541 | goto unlabel_staticlistdef_return; | 1388 | goto unlabel_staticlistdef_return; |
1542 | } | 1389 | } |
1543 | } | 1390 | } |
1391 | #endif /* IPv6 */ | ||
1544 | 1392 | ||
1545 | unlabel_staticlistdef_return: | 1393 | unlabel_staticlistdef_return: |
1546 | rcu_read_unlock(); | 1394 | rcu_read_unlock(); |
@@ -1718,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, | |||
1718 | switch (family) { | 1566 | switch (family) { |
1719 | case PF_INET: { | 1567 | case PF_INET: { |
1720 | struct iphdr *hdr4; | 1568 | struct iphdr *hdr4; |
1721 | struct netlbl_unlhsh_addr4 *addr4; | 1569 | struct netlbl_af4list *addr4; |
1722 | 1570 | ||
1723 | hdr4 = ip_hdr(skb); | 1571 | hdr4 = ip_hdr(skb); |
1724 | addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); | 1572 | addr4 = netlbl_af4list_search(hdr4->saddr, |
1573 | &iface->addr4_list); | ||
1725 | if (addr4 == NULL) | 1574 | if (addr4 == NULL) |
1726 | goto unlabel_getattr_nolabel; | 1575 | goto unlabel_getattr_nolabel; |
1727 | secattr->attr.secid = addr4->secid; | 1576 | secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; |
1728 | break; | 1577 | break; |
1729 | } | 1578 | } |
1730 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 1579 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1731 | case PF_INET6: { | 1580 | case PF_INET6: { |
1732 | struct ipv6hdr *hdr6; | 1581 | struct ipv6hdr *hdr6; |
1733 | struct netlbl_unlhsh_addr6 *addr6; | 1582 | struct netlbl_af6list *addr6; |
1734 | 1583 | ||
1735 | hdr6 = ipv6_hdr(skb); | 1584 | hdr6 = ipv6_hdr(skb); |
1736 | addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); | 1585 | addr6 = netlbl_af6list_search(&hdr6->saddr, |
1586 | &iface->addr6_list); | ||
1737 | if (addr6 == NULL) | 1587 | if (addr6 == NULL) |
1738 | goto unlabel_getattr_nolabel; | 1588 | goto unlabel_getattr_nolabel; |
1739 | secattr->attr.secid = addr6->secid; | 1589 | secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid; |
1740 | break; | 1590 | break; |
1741 | } | 1591 | } |
1742 | #endif /* IPv6 */ | 1592 | #endif /* IPv6 */ |
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e5b69556bb5b..21124ec0a73d 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/workqueue.h> | 16 | #include <linux/workqueue.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/rfkill.h> | 18 | #include <linux/rfkill.h> |
19 | #include <linux/sched.h> | ||
19 | 20 | ||
20 | #include "rfkill-input.h" | 21 | #include "rfkill-input.h" |
21 | 22 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76739e928d0d..4895c341e46d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru | |||
174 | clnt->cl_procinfo = version->procs; | 174 | clnt->cl_procinfo = version->procs; |
175 | clnt->cl_maxproc = version->nrprocs; | 175 | clnt->cl_maxproc = version->nrprocs; |
176 | clnt->cl_protname = program->name; | 176 | clnt->cl_protname = program->name; |
177 | clnt->cl_prog = program->number; | 177 | clnt->cl_prog = args->prognumber ? : program->number; |
178 | clnt->cl_vers = version->number; | 178 | clnt->cl_vers = version->number; |
179 | clnt->cl_stats = program->stats; | 179 | clnt->cl_stats = program->stats; |
180 | clnt->cl_metrics = rpc_alloc_iostats(clnt); | 180 | clnt->cl_metrics = rpc_alloc_iostats(clnt); |
@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru | |||
213 | } | 213 | } |
214 | 214 | ||
215 | /* save the nodename */ | 215 | /* save the nodename */ |
216 | clnt->cl_nodelen = strlen(utsname()->nodename); | 216 | clnt->cl_nodelen = strlen(init_utsname()->nodename); |
217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) | 217 | if (clnt->cl_nodelen > UNX_MAXNODENAME) |
218 | clnt->cl_nodelen = UNX_MAXNODENAME; | 218 | clnt->cl_nodelen = UNX_MAXNODENAME; |
219 | memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); | 219 | memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); |
220 | rpc_register_client(clnt); | 220 | rpc_register_client(clnt); |
221 | return clnt; | 221 | return clnt; |
222 | 222 | ||
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24db2b4d12d3..41013dd66ac3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/in6.h> | 20 | #include <linux/in6.h> |
21 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <net/ipv6.h> | ||
23 | 24 | ||
24 | #include <linux/sunrpc/clnt.h> | 25 | #include <linux/sunrpc/clnt.h> |
25 | #include <linux/sunrpc/sched.h> | 26 | #include <linux/sunrpc/sched.h> |
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, | |||
176 | } | 177 | } |
177 | 178 | ||
178 | static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | 179 | static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, |
179 | u32 version, struct rpc_message *msg, | 180 | u32 version, struct rpc_message *msg) |
180 | int *result) | ||
181 | { | 181 | { |
182 | struct rpc_clnt *rpcb_clnt; | 182 | struct rpc_clnt *rpcb_clnt; |
183 | int error = 0; | 183 | int result, error = 0; |
184 | 184 | ||
185 | *result = 0; | 185 | msg->rpc_resp = &result; |
186 | 186 | ||
187 | rpcb_clnt = rpcb_create_local(addr, addrlen, version); | 187 | rpcb_clnt = rpcb_create_local(addr, addrlen, version); |
188 | if (!IS_ERR(rpcb_clnt)) { | 188 | if (!IS_ERR(rpcb_clnt)) { |
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
191 | } else | 191 | } else |
192 | error = PTR_ERR(rpcb_clnt); | 192 | error = PTR_ERR(rpcb_clnt); |
193 | 193 | ||
194 | if (error < 0) | 194 | if (error < 0) { |
195 | printk(KERN_WARNING "RPC: failed to contact local rpcbind " | 195 | printk(KERN_WARNING "RPC: failed to contact local rpcbind " |
196 | "server (errno %d).\n", -error); | 196 | "server (errno %d).\n", -error); |
197 | dprintk("RPC: registration status %d/%d\n", error, *result); | 197 | return error; |
198 | } | ||
198 | 199 | ||
199 | return error; | 200 | if (!result) |
201 | return -EACCES; | ||
202 | return 0; | ||
200 | } | 203 | } |
201 | 204 | ||
202 | /** | 205 | /** |
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
205 | * @vers: RPC version number to bind | 208 | * @vers: RPC version number to bind |
206 | * @prot: transport protocol to register | 209 | * @prot: transport protocol to register |
207 | * @port: port value to register | 210 | * @port: port value to register |
208 | * @okay: OUT: result code | 211 | * |
212 | * Returns zero if the registration request was dispatched successfully | ||
213 | * and the rpcbind daemon returned success. Otherwise, returns an errno | ||
214 | * value that reflects the nature of the error (request could not be | ||
215 | * dispatched, timed out, or rpcbind returned an error). | ||
209 | * | 216 | * |
210 | * RPC services invoke this function to advertise their contact | 217 | * RPC services invoke this function to advertise their contact |
211 | * information via the system's rpcbind daemon. RPC services | 218 | * information via the system's rpcbind daemon. RPC services |
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
217 | * all registered transports for [program, version] from the local | 224 | * all registered transports for [program, version] from the local |
218 | * rpcbind database. | 225 | * rpcbind database. |
219 | * | 226 | * |
220 | * Returns zero if the registration request was dispatched | ||
221 | * successfully and a reply was received. The rpcbind daemon's | ||
222 | * boolean result code is stored in *okay. | ||
223 | * | ||
224 | * Returns an errno value and sets *result to zero if there was | ||
225 | * some problem that prevented the rpcbind request from being | ||
226 | * dispatched, or if the rpcbind daemon did not respond within | ||
227 | * the timeout. | ||
228 | * | ||
229 | * This function uses rpcbind protocol version 2 to contact the | 227 | * This function uses rpcbind protocol version 2 to contact the |
230 | * local rpcbind daemon. | 228 | * local rpcbind daemon. |
231 | * | 229 | * |
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
236 | * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 | 234 | * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 |
237 | * addresses). | 235 | * addresses). |
238 | */ | 236 | */ |
239 | int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | 237 | int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) |
240 | { | 238 | { |
241 | struct rpcbind_args map = { | 239 | struct rpcbind_args map = { |
242 | .r_prog = prog, | 240 | .r_prog = prog, |
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
246 | }; | 244 | }; |
247 | struct rpc_message msg = { | 245 | struct rpc_message msg = { |
248 | .rpc_argp = &map, | 246 | .rpc_argp = &map, |
249 | .rpc_resp = okay, | ||
250 | }; | 247 | }; |
251 | 248 | ||
252 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " | 249 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " |
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
259 | 256 | ||
260 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, | 257 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, |
261 | sizeof(rpcb_inaddr_loopback), | 258 | sizeof(rpcb_inaddr_loopback), |
262 | RPCBVERS_2, &msg, okay); | 259 | RPCBVERS_2, &msg); |
263 | } | 260 | } |
264 | 261 | ||
265 | /* | 262 | /* |
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, | |||
290 | 287 | ||
291 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, | 288 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, |
292 | sizeof(rpcb_inaddr_loopback), | 289 | sizeof(rpcb_inaddr_loopback), |
293 | RPCBVERS_4, msg, msg->rpc_resp); | 290 | RPCBVERS_4, msg); |
294 | } | 291 | } |
295 | 292 | ||
296 | /* | 293 | /* |
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
304 | char buf[64]; | 301 | char buf[64]; |
305 | 302 | ||
306 | /* Construct AF_INET6 universal address */ | 303 | /* Construct AF_INET6 universal address */ |
307 | snprintf(buf, sizeof(buf), | 304 | if (ipv6_addr_any(&address_to_register->sin6_addr)) |
308 | NIP6_FMT".%u.%u", | 305 | snprintf(buf, sizeof(buf), "::.%u.%u", |
309 | NIP6(address_to_register->sin6_addr), | 306 | port >> 8, port & 0xff); |
310 | port >> 8, port & 0xff); | 307 | else |
308 | snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u", | ||
309 | NIP6(address_to_register->sin6_addr), | ||
310 | port >> 8, port & 0xff); | ||
311 | map->r_addr = buf; | 311 | map->r_addr = buf; |
312 | 312 | ||
313 | dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " | 313 | dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " |
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
321 | 321 | ||
322 | return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, | 322 | return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, |
323 | sizeof(rpcb_in6addr_loopback), | 323 | sizeof(rpcb_in6addr_loopback), |
324 | RPCBVERS_4, msg, msg->rpc_resp); | 324 | RPCBVERS_4, msg); |
325 | } | 325 | } |
326 | 326 | ||
327 | /** | 327 | /** |
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
330 | * @version: RPC version number of service to (un)register | 330 | * @version: RPC version number of service to (un)register |
331 | * @address: address family, IP address, and port to (un)register | 331 | * @address: address family, IP address, and port to (un)register |
332 | * @netid: netid of transport protocol to (un)register | 332 | * @netid: netid of transport protocol to (un)register |
333 | * @result: result code from rpcbind RPC call | 333 | * |
334 | * Returns zero if the registration request was dispatched successfully | ||
335 | * and the rpcbind daemon returned success. Otherwise, returns an errno | ||
336 | * value that reflects the nature of the error (request could not be | ||
337 | * dispatched, timed out, or rpcbind returned an error). | ||
334 | * | 338 | * |
335 | * RPC services invoke this function to advertise their contact | 339 | * RPC services invoke this function to advertise their contact |
336 | * information via the system's rpcbind daemon. RPC services | 340 | * information via the system's rpcbind daemon. RPC services |
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
342 | * to zero. Callers pass a netid of "" to unregister all | 346 | * to zero. Callers pass a netid of "" to unregister all |
343 | * transport netids associated with [program, version, address]. | 347 | * transport netids associated with [program, version, address]. |
344 | * | 348 | * |
345 | * Returns zero if the registration request was dispatched | ||
346 | * successfully and a reply was received. The rpcbind daemon's | ||
347 | * result code is stored in *result. | ||
348 | * | ||
349 | * Returns an errno value and sets *result to zero if there was | ||
350 | * some problem that prevented the rpcbind request from being | ||
351 | * dispatched, or if the rpcbind daemon did not respond within | ||
352 | * the timeout. | ||
353 | * | ||
354 | * This function uses rpcbind protocol version 4 to contact the | 349 | * This function uses rpcbind protocol version 4 to contact the |
355 | * local rpcbind daemon. The local rpcbind daemon must support | 350 | * local rpcbind daemon. The local rpcbind daemon must support |
356 | * version 4 of the rpcbind protocol in order for these functions | 351 | * version 4 of the rpcbind protocol in order for these functions |
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
372 | * advertises the service on all IPv4 and IPv6 addresses. | 367 | * advertises the service on all IPv4 and IPv6 addresses. |
373 | */ | 368 | */ |
374 | int rpcb_v4_register(const u32 program, const u32 version, | 369 | int rpcb_v4_register(const u32 program, const u32 version, |
375 | const struct sockaddr *address, const char *netid, | 370 | const struct sockaddr *address, const char *netid) |
376 | int *result) | ||
377 | { | 371 | { |
378 | struct rpcbind_args map = { | 372 | struct rpcbind_args map = { |
379 | .r_prog = program, | 373 | .r_prog = program, |
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version, | |||
383 | }; | 377 | }; |
384 | struct rpc_message msg = { | 378 | struct rpc_message msg = { |
385 | .rpc_argp = &map, | 379 | .rpc_argp = &map, |
386 | .rpc_resp = result, | ||
387 | }; | 380 | }; |
388 | 381 | ||
389 | *result = 0; | ||
390 | |||
391 | switch (address->sa_family) { | 382 | switch (address->sa_family) { |
392 | case AF_INET: | 383 | case AF_INET: |
393 | return rpcb_register_netid4((struct sockaddr_in *)address, | 384 | return rpcb_register_netid4((struct sockaddr_in *)address, |
@@ -469,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
469 | return rpc_run_task(&task_setup_data); | 460 | return rpc_run_task(&task_setup_data); |
470 | } | 461 | } |
471 | 462 | ||
463 | /* | ||
464 | * In the case where rpc clients have been cloned, we want to make | ||
465 | * sure that we use the program number/version etc of the actual | ||
466 | * owner of the xprt. To do so, we walk back up the tree of parents | ||
467 | * to find whoever created the transport and/or whoever has the | ||
468 | * autobind flag set. | ||
469 | */ | ||
470 | static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) | ||
471 | { | ||
472 | struct rpc_clnt *parent = clnt->cl_parent; | ||
473 | |||
474 | while (parent != clnt) { | ||
475 | if (parent->cl_xprt != clnt->cl_xprt) | ||
476 | break; | ||
477 | if (clnt->cl_autobind) | ||
478 | break; | ||
479 | clnt = parent; | ||
480 | parent = parent->cl_parent; | ||
481 | } | ||
482 | return clnt; | ||
483 | } | ||
484 | |||
472 | /** | 485 | /** |
473 | * rpcb_getport_async - obtain the port for a given RPC service on a given host | 486 | * rpcb_getport_async - obtain the port for a given RPC service on a given host |
474 | * @task: task that is waiting for portmapper request | 487 | * @task: task that is waiting for portmapper request |
@@ -478,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi | |||
478 | */ | 491 | */ |
479 | void rpcb_getport_async(struct rpc_task *task) | 492 | void rpcb_getport_async(struct rpc_task *task) |
480 | { | 493 | { |
481 | struct rpc_clnt *clnt = task->tk_client; | 494 | struct rpc_clnt *clnt; |
482 | struct rpc_procinfo *proc; | 495 | struct rpc_procinfo *proc; |
483 | u32 bind_version; | 496 | u32 bind_version; |
484 | struct rpc_xprt *xprt = task->tk_xprt; | 497 | struct rpc_xprt *xprt; |
485 | struct rpc_clnt *rpcb_clnt; | 498 | struct rpc_clnt *rpcb_clnt; |
486 | static struct rpcbind_args *map; | 499 | static struct rpcbind_args *map; |
487 | struct rpc_task *child; | 500 | struct rpc_task *child; |
@@ -490,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
490 | size_t salen; | 503 | size_t salen; |
491 | int status; | 504 | int status; |
492 | 505 | ||
506 | clnt = rpcb_find_transport_owner(task->tk_client); | ||
507 | xprt = clnt->cl_xprt; | ||
508 | |||
493 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", | 509 | dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", |
494 | task->tk_pid, __func__, | 510 | task->tk_pid, __func__, |
495 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); | 511 | clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); |
496 | 512 | ||
497 | /* Autobind on cloned rpc clients is discouraged */ | ||
498 | BUG_ON(clnt->cl_parent != clnt); | ||
499 | |||
500 | /* Put self on the wait queue to ensure we get notified if | 513 | /* Put self on the wait queue to ensure we get notified if |
501 | * some other task is already attempting to bind the port */ | 514 | * some other task is already attempting to bind the port */ |
502 | rpc_sleep_on(&xprt->binding, task, NULL); | 515 | rpc_sleep_on(&xprt->binding, task, NULL); |
@@ -558,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
558 | status = -ENOMEM; | 571 | status = -ENOMEM; |
559 | dprintk("RPC: %5u %s: no memory available\n", | 572 | dprintk("RPC: %5u %s: no memory available\n", |
560 | task->tk_pid, __func__); | 573 | task->tk_pid, __func__); |
561 | goto bailout_nofree; | 574 | goto bailout_release_client; |
562 | } | 575 | } |
563 | map->r_prog = clnt->cl_prog; | 576 | map->r_prog = clnt->cl_prog; |
564 | map->r_vers = clnt->cl_vers; | 577 | map->r_vers = clnt->cl_vers; |
@@ -578,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
578 | task->tk_pid, __func__); | 591 | task->tk_pid, __func__); |
579 | return; | 592 | return; |
580 | } | 593 | } |
581 | rpc_put_task(child); | ||
582 | 594 | ||
583 | task->tk_xprt->stat.bind_count++; | 595 | xprt->stat.bind_count++; |
596 | rpc_put_task(child); | ||
584 | return; | 597 | return; |
585 | 598 | ||
599 | bailout_release_client: | ||
600 | rpc_release_client(rpcb_clnt); | ||
586 | bailout_nofree: | 601 | bailout_nofree: |
587 | rpcb_wake_rpcbind_waiters(xprt, status); | 602 | rpcb_wake_rpcbind_waiters(xprt, status); |
588 | task->tk_status = status; | 603 | task->tk_status = status; |
@@ -633,7 +648,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) | |||
633 | static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, | 648 | static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, |
634 | struct rpcbind_args *rpcb) | 649 | struct rpcbind_args *rpcb) |
635 | { | 650 | { |
636 | dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", | 651 | dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", |
637 | rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); | 652 | rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); |
638 | *p++ = htonl(rpcb->r_prog); | 653 | *p++ = htonl(rpcb->r_prog); |
639 | *p++ = htonl(rpcb->r_vers); | 654 | *p++ = htonl(rpcb->r_vers); |
@@ -648,7 +663,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, | |||
648 | unsigned short *portp) | 663 | unsigned short *portp) |
649 | { | 664 | { |
650 | *portp = (unsigned short) ntohl(*p++); | 665 | *portp = (unsigned short) ntohl(*p++); |
651 | dprintk("RPC: rpcb_decode_getport result %u\n", | 666 | dprintk("RPC: rpcb getport result: %u\n", |
652 | *portp); | 667 | *portp); |
653 | return 0; | 668 | return 0; |
654 | } | 669 | } |
@@ -657,7 +672,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, | |||
657 | unsigned int *boolp) | 672 | unsigned int *boolp) |
658 | { | 673 | { |
659 | *boolp = (unsigned int) ntohl(*p++); | 674 | *boolp = (unsigned int) ntohl(*p++); |
660 | dprintk("RPC: rpcb_decode_set: call %s\n", | 675 | dprintk("RPC: rpcb set/unset call %s\n", |
661 | (*boolp ? "succeeded" : "failed")); | 676 | (*boolp ? "succeeded" : "failed")); |
662 | return 0; | 677 | return 0; |
663 | } | 678 | } |
@@ -665,7 +680,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, | |||
665 | static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, | 680 | static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, |
666 | struct rpcbind_args *rpcb) | 681 | struct rpcbind_args *rpcb) |
667 | { | 682 | { |
668 | dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", | 683 | dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", |
669 | rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); | 684 | rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); |
670 | *p++ = htonl(rpcb->r_prog); | 685 | *p++ = htonl(rpcb->r_prog); |
671 | *p++ = htonl(rpcb->r_vers); | 686 | *p++ = htonl(rpcb->r_vers); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a32cb7c4bb4..54c98d876847 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -28,6 +28,8 @@ | |||
28 | 28 | ||
29 | #define RPCDBG_FACILITY RPCDBG_SVCDSP | 29 | #define RPCDBG_FACILITY RPCDBG_SVCDSP |
30 | 30 | ||
31 | static void svc_unregister(const struct svc_serv *serv); | ||
32 | |||
31 | #define svc_serv_is_pooled(serv) ((serv)->sv_function) | 33 | #define svc_serv_is_pooled(serv) ((serv)->sv_function) |
32 | 34 | ||
33 | /* | 35 | /* |
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) | |||
357 | */ | 359 | */ |
358 | static struct svc_serv * | 360 | static struct svc_serv * |
359 | __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | 361 | __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, |
360 | void (*shutdown)(struct svc_serv *serv)) | 362 | sa_family_t family, void (*shutdown)(struct svc_serv *serv)) |
361 | { | 363 | { |
362 | struct svc_serv *serv; | 364 | struct svc_serv *serv; |
363 | unsigned int vers; | 365 | unsigned int vers; |
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
366 | 368 | ||
367 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) | 369 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) |
368 | return NULL; | 370 | return NULL; |
371 | serv->sv_family = family; | ||
369 | serv->sv_name = prog->pg_name; | 372 | serv->sv_name = prog->pg_name; |
370 | serv->sv_program = prog; | 373 | serv->sv_program = prog; |
371 | serv->sv_nrthreads = 1; | 374 | serv->sv_nrthreads = 1; |
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
416 | spin_lock_init(&pool->sp_lock); | 419 | spin_lock_init(&pool->sp_lock); |
417 | } | 420 | } |
418 | 421 | ||
419 | |||
420 | /* Remove any stale portmap registrations */ | 422 | /* Remove any stale portmap registrations */ |
421 | svc_register(serv, 0, 0); | 423 | svc_unregister(serv); |
422 | 424 | ||
423 | return serv; | 425 | return serv; |
424 | } | 426 | } |
425 | 427 | ||
426 | struct svc_serv * | 428 | struct svc_serv * |
427 | svc_create(struct svc_program *prog, unsigned int bufsize, | 429 | svc_create(struct svc_program *prog, unsigned int bufsize, |
428 | void (*shutdown)(struct svc_serv *serv)) | 430 | sa_family_t family, void (*shutdown)(struct svc_serv *serv)) |
429 | { | 431 | { |
430 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); | 432 | return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); |
431 | } | 433 | } |
432 | EXPORT_SYMBOL(svc_create); | 434 | EXPORT_SYMBOL(svc_create); |
433 | 435 | ||
434 | struct svc_serv * | 436 | struct svc_serv * |
435 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | 437 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, |
436 | void (*shutdown)(struct svc_serv *serv), | 438 | sa_family_t family, void (*shutdown)(struct svc_serv *serv), |
437 | svc_thread_fn func, struct module *mod) | 439 | svc_thread_fn func, struct module *mod) |
438 | { | 440 | { |
439 | struct svc_serv *serv; | 441 | struct svc_serv *serv; |
440 | unsigned int npools = svc_pool_map_get(); | 442 | unsigned int npools = svc_pool_map_get(); |
441 | 443 | ||
442 | serv = __svc_create(prog, bufsize, npools, shutdown); | 444 | serv = __svc_create(prog, bufsize, npools, family, shutdown); |
443 | 445 | ||
444 | if (serv != NULL) { | 446 | if (serv != NULL) { |
445 | serv->sv_function = func; | 447 | serv->sv_function = func; |
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv) | |||
486 | if (svc_serv_is_pooled(serv)) | 488 | if (svc_serv_is_pooled(serv)) |
487 | svc_pool_map_put(); | 489 | svc_pool_map_put(); |
488 | 490 | ||
489 | /* Unregister service with the portmapper */ | 491 | svc_unregister(serv); |
490 | svc_register(serv, 0, 0); | ||
491 | kfree(serv->sv_pools); | 492 | kfree(serv->sv_pools); |
492 | kfree(serv); | 493 | kfree(serv); |
493 | } | 494 | } |
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
718 | } | 719 | } |
719 | EXPORT_SYMBOL(svc_exit_thread); | 720 | EXPORT_SYMBOL(svc_exit_thread); |
720 | 721 | ||
722 | #ifdef CONFIG_SUNRPC_REGISTER_V4 | ||
723 | |||
721 | /* | 724 | /* |
722 | * Register an RPC service with the local portmapper. | 725 | * Register an "inet" protocol family netid with the local |
723 | * To unregister a service, call this routine with | 726 | * rpcbind daemon via an rpcbind v4 SET request. |
724 | * proto and port == 0. | 727 | * |
728 | * No netconfig infrastructure is available in the kernel, so | ||
729 | * we map IP_ protocol numbers to netids by hand. | ||
730 | * | ||
731 | * Returns zero on success; a negative errno value is returned | ||
732 | * if any error occurs. | ||
725 | */ | 733 | */ |
726 | int | 734 | static int __svc_rpcb_register4(const u32 program, const u32 version, |
727 | svc_register(struct svc_serv *serv, int proto, unsigned short port) | 735 | const unsigned short protocol, |
736 | const unsigned short port) | ||
737 | { | ||
738 | struct sockaddr_in sin = { | ||
739 | .sin_family = AF_INET, | ||
740 | .sin_addr.s_addr = htonl(INADDR_ANY), | ||
741 | .sin_port = htons(port), | ||
742 | }; | ||
743 | char *netid; | ||
744 | |||
745 | switch (protocol) { | ||
746 | case IPPROTO_UDP: | ||
747 | netid = RPCBIND_NETID_UDP; | ||
748 | break; | ||
749 | case IPPROTO_TCP: | ||
750 | netid = RPCBIND_NETID_TCP; | ||
751 | break; | ||
752 | default: | ||
753 | return -EPROTONOSUPPORT; | ||
754 | } | ||
755 | |||
756 | return rpcb_v4_register(program, version, | ||
757 | (struct sockaddr *)&sin, netid); | ||
758 | } | ||
759 | |||
760 | /* | ||
761 | * Register an "inet6" protocol family netid with the local | ||
762 | * rpcbind daemon via an rpcbind v4 SET request. | ||
763 | * | ||
764 | * No netconfig infrastructure is available in the kernel, so | ||
765 | * we map IP_ protocol numbers to netids by hand. | ||
766 | * | ||
767 | * Returns zero on success; a negative errno value is returned | ||
768 | * if any error occurs. | ||
769 | */ | ||
770 | static int __svc_rpcb_register6(const u32 program, const u32 version, | ||
771 | const unsigned short protocol, | ||
772 | const unsigned short port) | ||
773 | { | ||
774 | struct sockaddr_in6 sin6 = { | ||
775 | .sin6_family = AF_INET6, | ||
776 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
777 | .sin6_port = htons(port), | ||
778 | }; | ||
779 | char *netid; | ||
780 | |||
781 | switch (protocol) { | ||
782 | case IPPROTO_UDP: | ||
783 | netid = RPCBIND_NETID_UDP6; | ||
784 | break; | ||
785 | case IPPROTO_TCP: | ||
786 | netid = RPCBIND_NETID_TCP6; | ||
787 | break; | ||
788 | default: | ||
789 | return -EPROTONOSUPPORT; | ||
790 | } | ||
791 | |||
792 | return rpcb_v4_register(program, version, | ||
793 | (struct sockaddr *)&sin6, netid); | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * Register a kernel RPC service via rpcbind version 4. | ||
798 | * | ||
799 | * Returns zero on success; a negative errno value is returned | ||
800 | * if any error occurs. | ||
801 | */ | ||
802 | static int __svc_register(const u32 program, const u32 version, | ||
803 | const sa_family_t family, | ||
804 | const unsigned short protocol, | ||
805 | const unsigned short port) | ||
806 | { | ||
807 | int error; | ||
808 | |||
809 | switch (family) { | ||
810 | case AF_INET: | ||
811 | return __svc_rpcb_register4(program, version, | ||
812 | protocol, port); | ||
813 | case AF_INET6: | ||
814 | error = __svc_rpcb_register6(program, version, | ||
815 | protocol, port); | ||
816 | if (error < 0) | ||
817 | return error; | ||
818 | |||
819 | /* | ||
820 | * Work around bug in some versions of Linux rpcbind | ||
821 | * which don't allow registration of both inet and | ||
822 | * inet6 netids. | ||
823 | * | ||
824 | * Error return ignored for now. | ||
825 | */ | ||
826 | __svc_rpcb_register4(program, version, | ||
827 | protocol, port); | ||
828 | return 0; | ||
829 | } | ||
830 | |||
831 | return -EAFNOSUPPORT; | ||
832 | } | ||
833 | |||
834 | #else /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
835 | |||
836 | /* | ||
837 | * Register a kernel RPC service via rpcbind version 2. | ||
838 | * | ||
839 | * Returns zero on success; a negative errno value is returned | ||
840 | * if any error occurs. | ||
841 | */ | ||
842 | static int __svc_register(const u32 program, const u32 version, | ||
843 | sa_family_t family, | ||
844 | const unsigned short protocol, | ||
845 | const unsigned short port) | ||
846 | { | ||
847 | if (family != AF_INET) | ||
848 | return -EAFNOSUPPORT; | ||
849 | |||
850 | return rpcb_register(program, version, protocol, port); | ||
851 | } | ||
852 | |||
853 | #endif /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
854 | |||
855 | /** | ||
856 | * svc_register - register an RPC service with the local portmapper | ||
857 | * @serv: svc_serv struct for the service to register | ||
858 | * @proto: transport protocol number to advertise | ||
859 | * @port: port to advertise | ||
860 | * | ||
861 | * Service is registered for any address in serv's address family | ||
862 | */ | ||
863 | int svc_register(const struct svc_serv *serv, const unsigned short proto, | ||
864 | const unsigned short port) | ||
728 | { | 865 | { |
729 | struct svc_program *progp; | 866 | struct svc_program *progp; |
730 | unsigned long flags; | ||
731 | unsigned int i; | 867 | unsigned int i; |
732 | int error = 0, dummy; | 868 | int error = 0; |
733 | 869 | ||
734 | if (!port) | 870 | BUG_ON(proto == 0 && port == 0); |
735 | clear_thread_flag(TIF_SIGPENDING); | ||
736 | 871 | ||
737 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | 872 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { |
738 | for (i = 0; i < progp->pg_nvers; i++) { | 873 | for (i = 0; i < progp->pg_nvers; i++) { |
739 | if (progp->pg_vers[i] == NULL) | 874 | if (progp->pg_vers[i] == NULL) |
740 | continue; | 875 | continue; |
741 | 876 | ||
742 | dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", | 877 | dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", |
743 | progp->pg_name, | 878 | progp->pg_name, |
879 | i, | ||
744 | proto == IPPROTO_UDP? "udp" : "tcp", | 880 | proto == IPPROTO_UDP? "udp" : "tcp", |
745 | port, | 881 | port, |
746 | i, | 882 | serv->sv_family, |
747 | progp->pg_vers[i]->vs_hidden? | 883 | progp->pg_vers[i]->vs_hidden? |
748 | " (but not telling portmap)" : ""); | 884 | " (but not telling portmap)" : ""); |
749 | 885 | ||
750 | if (progp->pg_vers[i]->vs_hidden) | 886 | if (progp->pg_vers[i]->vs_hidden) |
751 | continue; | 887 | continue; |
752 | 888 | ||
753 | error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); | 889 | error = __svc_register(progp->pg_prog, i, |
890 | serv->sv_family, proto, port); | ||
754 | if (error < 0) | 891 | if (error < 0) |
755 | break; | 892 | break; |
756 | if (port && !dummy) { | ||
757 | error = -EACCES; | ||
758 | break; | ||
759 | } | ||
760 | } | 893 | } |
761 | } | 894 | } |
762 | 895 | ||
763 | if (!port) { | 896 | return error; |
764 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 897 | } |
765 | recalc_sigpending(); | 898 | |
766 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | 899 | #ifdef CONFIG_SUNRPC_REGISTER_V4 |
900 | |||
901 | static void __svc_unregister(const u32 program, const u32 version, | ||
902 | const char *progname) | ||
903 | { | ||
904 | struct sockaddr_in6 sin6 = { | ||
905 | .sin6_family = AF_INET6, | ||
906 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
907 | .sin6_port = 0, | ||
908 | }; | ||
909 | int error; | ||
910 | |||
911 | error = rpcb_v4_register(program, version, | ||
912 | (struct sockaddr *)&sin6, ""); | ||
913 | dprintk("svc: %s(%sv%u), error %d\n", | ||
914 | __func__, progname, version, error); | ||
915 | } | ||
916 | |||
917 | #else /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
918 | |||
919 | static void __svc_unregister(const u32 program, const u32 version, | ||
920 | const char *progname) | ||
921 | { | ||
922 | int error; | ||
923 | |||
924 | error = rpcb_register(program, version, 0, 0); | ||
925 | dprintk("svc: %s(%sv%u), error %d\n", | ||
926 | __func__, progname, version, error); | ||
927 | } | ||
928 | |||
929 | #endif /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
930 | |||
931 | /* | ||
932 | * All netids, bind addresses and ports registered for [program, version] | ||
933 | * are removed from the local rpcbind database (if the service is not | ||
934 | * hidden) to make way for a new instance of the service. | ||
935 | * | ||
936 | * The result of unregistration is reported via dprintk for those who want | ||
937 | * verification of the result, but is otherwise not important. | ||
938 | */ | ||
939 | static void svc_unregister(const struct svc_serv *serv) | ||
940 | { | ||
941 | struct svc_program *progp; | ||
942 | unsigned long flags; | ||
943 | unsigned int i; | ||
944 | |||
945 | clear_thread_flag(TIF_SIGPENDING); | ||
946 | |||
947 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | ||
948 | for (i = 0; i < progp->pg_nvers; i++) { | ||
949 | if (progp->pg_vers[i] == NULL) | ||
950 | continue; | ||
951 | if (progp->pg_vers[i]->vs_hidden) | ||
952 | continue; | ||
953 | |||
954 | __svc_unregister(progp->pg_prog, i, progp->pg_name); | ||
955 | } | ||
767 | } | 956 | } |
768 | 957 | ||
769 | return error; | 958 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
959 | recalc_sigpending(); | ||
960 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
770 | } | 961 | } |
771 | 962 | ||
772 | /* | 963 | /* |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e46c825f4954..bf5b5cdafebf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c | |||
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, | |||
159 | } | 159 | } |
160 | EXPORT_SYMBOL_GPL(svc_xprt_init); | 160 | EXPORT_SYMBOL_GPL(svc_xprt_init); |
161 | 161 | ||
162 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | 162 | static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, |
163 | int flags) | 163 | struct svc_serv *serv, |
164 | unsigned short port, int flags) | ||
164 | { | 165 | { |
165 | struct svc_xprt_class *xcl; | ||
166 | struct sockaddr_in sin = { | 166 | struct sockaddr_in sin = { |
167 | .sin_family = AF_INET, | 167 | .sin_family = AF_INET, |
168 | .sin_addr.s_addr = htonl(INADDR_ANY), | 168 | .sin_addr.s_addr = htonl(INADDR_ANY), |
169 | .sin_port = htons(port), | 169 | .sin_port = htons(port), |
170 | }; | 170 | }; |
171 | struct sockaddr_in6 sin6 = { | ||
172 | .sin6_family = AF_INET6, | ||
173 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
174 | .sin6_port = htons(port), | ||
175 | }; | ||
176 | struct sockaddr *sap; | ||
177 | size_t len; | ||
178 | |||
179 | switch (serv->sv_family) { | ||
180 | case AF_INET: | ||
181 | sap = (struct sockaddr *)&sin; | ||
182 | len = sizeof(sin); | ||
183 | break; | ||
184 | case AF_INET6: | ||
185 | sap = (struct sockaddr *)&sin6; | ||
186 | len = sizeof(sin6); | ||
187 | break; | ||
188 | default: | ||
189 | return ERR_PTR(-EAFNOSUPPORT); | ||
190 | } | ||
191 | |||
192 | return xcl->xcl_ops->xpo_create(serv, sap, len, flags); | ||
193 | } | ||
194 | |||
195 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | ||
196 | int flags) | ||
197 | { | ||
198 | struct svc_xprt_class *xcl; | ||
199 | |||
171 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | 200 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); |
172 | spin_lock(&svc_xprt_class_lock); | 201 | spin_lock(&svc_xprt_class_lock); |
173 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | 202 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { |
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | |||
180 | goto err; | 209 | goto err; |
181 | 210 | ||
182 | spin_unlock(&svc_xprt_class_lock); | 211 | spin_unlock(&svc_xprt_class_lock); |
183 | newxprt = xcl->xcl_ops-> | 212 | newxprt = __svc_xpo_create(xcl, serv, port, flags); |
184 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | ||
185 | flags); | ||
186 | if (IS_ERR(newxprt)) { | 213 | if (IS_ERR(newxprt)) { |
187 | module_put(xcl->xcl_owner); | 214 | module_put(xcl->xcl_owner); |
188 | return PTR_ERR(newxprt); | 215 | return PTR_ERR(newxprt); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e65719f1ef6..95293f549e9c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1114 | struct svc_sock *svsk; | 1114 | struct svc_sock *svsk; |
1115 | struct sock *inet; | 1115 | struct sock *inet; |
1116 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1116 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
1117 | int val; | ||
1117 | 1118 | ||
1118 | dprintk("svc: svc_setup_socket %p\n", sock); | 1119 | dprintk("svc: svc_setup_socket %p\n", sock); |
1119 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1120 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1146 | else | 1147 | else |
1147 | svc_tcp_init(svsk, serv); | 1148 | svc_tcp_init(svsk, serv); |
1148 | 1149 | ||
1150 | /* | ||
1151 | * We start one listener per sv_serv. We want AF_INET | ||
1152 | * requests to be automatically shunted to our AF_INET6 | ||
1153 | * listener using a mapped IPv4 address. Make sure | ||
1154 | * no-one starts an equivalent IPv4 listener, which | ||
1155 | * would steal our incoming connections. | ||
1156 | */ | ||
1157 | val = 0; | ||
1158 | if (serv->sv_family == AF_INET6) | ||
1159 | kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, | ||
1160 | (char *)&val, sizeof(val)); | ||
1161 | |||
1149 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1162 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1150 | svsk, svsk->sk_sk); | 1163 | svsk, svsk->sk_sk); |
1151 | 1164 | ||
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1154 | 1167 | ||
1155 | int svc_addsock(struct svc_serv *serv, | 1168 | int svc_addsock(struct svc_serv *serv, |
1156 | int fd, | 1169 | int fd, |
1157 | char *name_return, | 1170 | char *name_return) |
1158 | int *proto) | ||
1159 | { | 1171 | { |
1160 | int err = 0; | 1172 | int err = 0; |
1161 | struct socket *so = sockfd_lookup(fd, &err); | 1173 | struct socket *so = sockfd_lookup(fd, &err); |
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv, | |||
1190 | sockfd_put(so); | 1202 | sockfd_put(so); |
1191 | return err; | 1203 | return err; |
1192 | } | 1204 | } |
1193 | if (proto) *proto = so->sk->sk_protocol; | ||
1194 | return one_sock_name(name_return, svsk); | 1205 | return one_sock_name(name_return, svsk); |
1195 | } | 1206 | } |
1196 | EXPORT_SYMBOL_GPL(svc_addsock); | 1207 | EXPORT_SYMBOL_GPL(svc_addsock); |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe332..29e401bb612e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) | |||
108 | goto out; | 108 | goto out; |
109 | } | 109 | } |
110 | 110 | ||
111 | result = -EINVAL; | 111 | list_add_tail(&transport->list, &xprt_list); |
112 | if (try_module_get(THIS_MODULE)) { | 112 | printk(KERN_INFO "RPC: Registered %s transport module.\n", |
113 | list_add_tail(&transport->list, &xprt_list); | 113 | transport->name); |
114 | printk(KERN_INFO "RPC: Registered %s transport module.\n", | 114 | result = 0; |
115 | transport->name); | ||
116 | result = 0; | ||
117 | } | ||
118 | 115 | ||
119 | out: | 116 | out: |
120 | spin_unlock(&xprt_list_lock); | 117 | spin_unlock(&xprt_list_lock); |
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) | |||
143 | "RPC: Unregistered %s transport module.\n", | 140 | "RPC: Unregistered %s transport module.\n", |
144 | transport->name); | 141 | transport->name); |
145 | list_del_init(&transport->list); | 142 | list_del_init(&transport->list); |
146 | module_put(THIS_MODULE); | ||
147 | goto out; | 143 | goto out; |
148 | } | 144 | } |
149 | } | 145 | } |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5c1954d28d09..14106d26bb95 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
118 | } | 118 | } |
119 | 119 | ||
120 | if (xdrbuf->tail[0].iov_len) { | 120 | if (xdrbuf->tail[0].iov_len) { |
121 | /* the rpcrdma protocol allows us to omit any trailing | ||
122 | * xdr pad bytes, saving the server an RDMA operation. */ | ||
123 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | ||
124 | return n; | ||
121 | if (n == nsegs) | 125 | if (n == nsegs) |
122 | return 0; | 126 | return 0; |
123 | seg[n].mr_page = NULL; | 127 | seg[n].mr_page = NULL; |
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
508 | if (hdrlen == 0) | 512 | if (hdrlen == 0) |
509 | return -1; | 513 | return -1; |
510 | 514 | ||
511 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" | 515 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
512 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 516 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
513 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, | 517 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
514 | headerp, base, req->rl_iov.lkey); | 518 | headerp, base, req->rl_iov.lkey); |
515 | 519 | ||
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b | |||
594 | * Scatter inline received data back into provided iov's. | 598 | * Scatter inline received data back into provided iov's. |
595 | */ | 599 | */ |
596 | static void | 600 | static void |
597 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | 601 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) |
598 | { | 602 | { |
599 | int i, npages, curlen, olen; | 603 | int i, npages, curlen, olen; |
600 | char *destp; | 604 | char *destp; |
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | |||
660 | } else | 664 | } else |
661 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | 665 | rqst->rq_rcv_buf.tail[0].iov_len = 0; |
662 | 666 | ||
667 | if (pad) { | ||
668 | /* implicit padding on terminal chunk */ | ||
669 | unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; | ||
670 | while (pad--) | ||
671 | p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; | ||
672 | } | ||
673 | |||
663 | if (copy_len) | 674 | if (copy_len) |
664 | dprintk("RPC: %s: %d bytes in" | 675 | dprintk("RPC: %s: %d bytes in" |
665 | " %d extra segments (%d lost)\n", | 676 | " %d extra segments (%d lost)\n", |
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
681 | struct rpc_xprt *xprt = ep->rep_xprt; | 692 | struct rpc_xprt *xprt = ep->rep_xprt; |
682 | 693 | ||
683 | spin_lock_bh(&xprt->transport_lock); | 694 | spin_lock_bh(&xprt->transport_lock); |
695 | if (++xprt->connect_cookie == 0) /* maintain a reserved value */ | ||
696 | ++xprt->connect_cookie; | ||
684 | if (ep->rep_connected > 0) { | 697 | if (ep->rep_connected > 0) { |
685 | if (!xprt_test_and_set_connected(xprt)) | 698 | if (!xprt_test_and_set_connected(xprt)) |
686 | xprt_wake_pending_tasks(xprt, 0); | 699 | xprt_wake_pending_tasks(xprt, 0); |
687 | } else { | 700 | } else { |
688 | if (xprt_test_and_clear_connected(xprt)) | 701 | if (xprt_test_and_clear_connected(xprt)) |
689 | xprt_wake_pending_tasks(xprt, ep->rep_connected); | 702 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
690 | } | 703 | } |
691 | spin_unlock_bh(&xprt->transport_lock); | 704 | spin_unlock_bh(&xprt->transport_lock); |
692 | } | 705 | } |
@@ -792,14 +805,20 @@ repost: | |||
792 | ((unsigned char *)iptr - (unsigned char *)headerp); | 805 | ((unsigned char *)iptr - (unsigned char *)headerp); |
793 | status = rep->rr_len + rdmalen; | 806 | status = rep->rr_len + rdmalen; |
794 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | 807 | r_xprt->rx_stats.total_rdma_reply += rdmalen; |
808 | /* special case - last chunk may omit padding */ | ||
809 | if (rdmalen &= 3) { | ||
810 | rdmalen = 4 - rdmalen; | ||
811 | status += rdmalen; | ||
812 | } | ||
795 | } else { | 813 | } else { |
796 | /* else ordinary inline */ | 814 | /* else ordinary inline */ |
815 | rdmalen = 0; | ||
797 | iptr = (__be32 *)((unsigned char *)headerp + 28); | 816 | iptr = (__be32 *)((unsigned char *)headerp + 28); |
798 | rep->rr_len -= 28; /*sizeof *headerp;*/ | 817 | rep->rr_len -= 28; /*sizeof *headerp;*/ |
799 | status = rep->rr_len; | 818 | status = rep->rr_len; |
800 | } | 819 | } |
801 | /* Fix up the rpc results for upper layer */ | 820 | /* Fix up the rpc results for upper layer */ |
802 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); | 821 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); |
803 | break; | 822 | break; |
804 | 823 | ||
805 | case htonl(RDMA_NOMSG): | 824 | case htonl(RDMA_NOMSG): |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 74de31a06616..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
116 | * | 116 | * |
117 | * Assumptions: | 117 | * Assumptions: |
118 | * - chunk[0]->position points to pages[0] at an offset of 0 | 118 | * - chunk[0]->position points to pages[0] at an offset of 0 |
119 | * - pages[] is not physically or virtually contigous and consists of | 119 | * - pages[] is not physically or virtually contiguous and consists of |
120 | * PAGE_SIZE elements. | 120 | * PAGE_SIZE elements. |
121 | * | 121 | * |
122 | * Output: | 122 | * Output: |
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
125 | * chunk in the read list | 125 | * chunk in the read list |
126 | * | 126 | * |
127 | */ | 127 | */ |
128 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | 128 | static int map_read_chunks(struct svcxprt_rdma *xprt, |
129 | struct svc_rqst *rqstp, | 129 | struct svc_rqst *rqstp, |
130 | struct svc_rdma_op_ctxt *head, | 130 | struct svc_rdma_op_ctxt *head, |
131 | struct rpcrdma_msg *rmsgp, | 131 | struct rpcrdma_msg *rmsgp, |
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | |||
211 | return sge_no; | 211 | return sge_no; |
212 | } | 212 | } |
213 | 213 | ||
214 | static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | 214 | /* Map a read-chunk-list to an XDR and fast register the page-list. |
215 | struct svc_rdma_op_ctxt *ctxt, | 215 | * |
216 | struct kvec *vec, | 216 | * Assumptions: |
217 | u64 *sgl_offset, | 217 | * - chunk[0] position points to pages[0] at an offset of 0 |
218 | int count) | 218 | * - pages[] will be made physically contiguous by creating a one-off memory |
219 | * region using the fastreg verb. | ||
220 | * - byte_count is # of bytes in read-chunk-list | ||
221 | * - ch_count is # of chunks in read-chunk-list | ||
222 | * | ||
223 | * Output: | ||
224 | * - sge array pointing into pages[] array. | ||
225 | * - chunk_sge array specifying sge index and count for each | ||
226 | * chunk in the read list | ||
227 | */ | ||
228 | static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, | ||
229 | struct svc_rqst *rqstp, | ||
230 | struct svc_rdma_op_ctxt *head, | ||
231 | struct rpcrdma_msg *rmsgp, | ||
232 | struct svc_rdma_req_map *rpl_map, | ||
233 | struct svc_rdma_req_map *chl_map, | ||
234 | int ch_count, | ||
235 | int byte_count) | ||
236 | { | ||
237 | int page_no; | ||
238 | int ch_no; | ||
239 | u32 offset; | ||
240 | struct rpcrdma_read_chunk *ch; | ||
241 | struct svc_rdma_fastreg_mr *frmr; | ||
242 | int ret = 0; | ||
243 | |||
244 | frmr = svc_rdma_get_frmr(xprt); | ||
245 | if (IS_ERR(frmr)) | ||
246 | return -ENOMEM; | ||
247 | |||
248 | head->frmr = frmr; | ||
249 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
250 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
251 | head->arg.pages = &head->pages[head->count]; | ||
252 | head->hdr_count = head->count; /* save count of hdr pages */ | ||
253 | head->arg.page_base = 0; | ||
254 | head->arg.page_len = byte_count; | ||
255 | head->arg.len = rqstp->rq_arg.len + byte_count; | ||
256 | head->arg.buflen = rqstp->rq_arg.buflen + byte_count; | ||
257 | |||
258 | /* Fast register the page list */ | ||
259 | frmr->kva = page_address(rqstp->rq_arg.pages[0]); | ||
260 | frmr->direction = DMA_FROM_DEVICE; | ||
261 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | ||
262 | frmr->map_len = byte_count; | ||
263 | frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; | ||
264 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
265 | frmr->page_list->page_list[page_no] = | ||
266 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
267 | page_address(rqstp->rq_arg.pages[page_no]), | ||
268 | PAGE_SIZE, DMA_TO_DEVICE); | ||
269 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
270 | frmr->page_list->page_list[page_no])) | ||
271 | goto fatal_err; | ||
272 | atomic_inc(&xprt->sc_dma_used); | ||
273 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
274 | } | ||
275 | head->count += page_no; | ||
276 | |||
277 | /* rq_respages points one past arg pages */ | ||
278 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
279 | |||
280 | /* Create the reply and chunk maps */ | ||
281 | offset = 0; | ||
282 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
283 | for (ch_no = 0; ch_no < ch_count; ch_no++) { | ||
284 | rpl_map->sge[ch_no].iov_base = frmr->kva + offset; | ||
285 | rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; | ||
286 | chl_map->ch[ch_no].count = 1; | ||
287 | chl_map->ch[ch_no].start = ch_no; | ||
288 | offset += ch->rc_target.rs_length; | ||
289 | ch++; | ||
290 | } | ||
291 | |||
292 | ret = svc_rdma_fastreg(xprt, frmr); | ||
293 | if (ret) | ||
294 | goto fatal_err; | ||
295 | |||
296 | return ch_no; | ||
297 | |||
298 | fatal_err: | ||
299 | printk("svcrdma: error fast registering xdr for xprt %p", xprt); | ||
300 | svc_rdma_put_frmr(xprt, frmr); | ||
301 | return -EIO; | ||
302 | } | ||
303 | |||
304 | static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | ||
305 | struct svc_rdma_op_ctxt *ctxt, | ||
306 | struct svc_rdma_fastreg_mr *frmr, | ||
307 | struct kvec *vec, | ||
308 | u64 *sgl_offset, | ||
309 | int count) | ||
219 | { | 310 | { |
220 | int i; | 311 | int i; |
221 | 312 | ||
222 | ctxt->count = count; | 313 | ctxt->count = count; |
223 | ctxt->direction = DMA_FROM_DEVICE; | 314 | ctxt->direction = DMA_FROM_DEVICE; |
224 | for (i = 0; i < count; i++) { | 315 | for (i = 0; i < count; i++) { |
225 | atomic_inc(&xprt->sc_dma_used); | 316 | ctxt->sge[i].length = 0; /* in case map fails */ |
226 | ctxt->sge[i].addr = | 317 | if (!frmr) { |
227 | ib_dma_map_single(xprt->sc_cm_id->device, | 318 | ctxt->sge[i].addr = |
228 | vec[i].iov_base, vec[i].iov_len, | 319 | ib_dma_map_single(xprt->sc_cm_id->device, |
229 | DMA_FROM_DEVICE); | 320 | vec[i].iov_base, |
321 | vec[i].iov_len, | ||
322 | DMA_FROM_DEVICE); | ||
323 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
324 | ctxt->sge[i].addr)) | ||
325 | return -EINVAL; | ||
326 | ctxt->sge[i].lkey = xprt->sc_dma_lkey; | ||
327 | atomic_inc(&xprt->sc_dma_used); | ||
328 | } else { | ||
329 | ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; | ||
330 | ctxt->sge[i].lkey = frmr->mr->lkey; | ||
331 | } | ||
230 | ctxt->sge[i].length = vec[i].iov_len; | 332 | ctxt->sge[i].length = vec[i].iov_len; |
231 | ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; | ||
232 | *sgl_offset = *sgl_offset + vec[i].iov_len; | 333 | *sgl_offset = *sgl_offset + vec[i].iov_len; |
233 | } | 334 | } |
335 | return 0; | ||
234 | } | 336 | } |
235 | 337 | ||
236 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | 338 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) |
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
278 | struct svc_rdma_op_ctxt *hdr_ctxt) | 380 | struct svc_rdma_op_ctxt *hdr_ctxt) |
279 | { | 381 | { |
280 | struct ib_send_wr read_wr; | 382 | struct ib_send_wr read_wr; |
383 | struct ib_send_wr inv_wr; | ||
281 | int err = 0; | 384 | int err = 0; |
282 | int ch_no; | 385 | int ch_no; |
283 | int ch_count; | 386 | int ch_count; |
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
301 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | 404 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); |
302 | if (ch_count > RPCSVC_MAXPAGES) | 405 | if (ch_count > RPCSVC_MAXPAGES) |
303 | return -EINVAL; | 406 | return -EINVAL; |
304 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | 407 | |
305 | rpl_map, chl_map, | 408 | if (!xprt->sc_frmr_pg_list_len) |
306 | ch_count, byte_count); | 409 | sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, |
410 | rpl_map, chl_map, ch_count, | ||
411 | byte_count); | ||
412 | else | ||
413 | sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, | ||
414 | rpl_map, chl_map, ch_count, | ||
415 | byte_count); | ||
416 | if (sge_count < 0) { | ||
417 | err = -EIO; | ||
418 | goto out; | ||
419 | } | ||
420 | |||
307 | sgl_offset = 0; | 421 | sgl_offset = 0; |
308 | ch_no = 0; | 422 | ch_no = 0; |
309 | 423 | ||
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
312 | next_sge: | 426 | next_sge: |
313 | ctxt = svc_rdma_get_context(xprt); | 427 | ctxt = svc_rdma_get_context(xprt); |
314 | ctxt->direction = DMA_FROM_DEVICE; | 428 | ctxt->direction = DMA_FROM_DEVICE; |
429 | ctxt->frmr = hdr_ctxt->frmr; | ||
430 | ctxt->read_hdr = NULL; | ||
315 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 431 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
432 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
316 | 433 | ||
317 | /* Prepare READ WR */ | 434 | /* Prepare READ WR */ |
318 | memset(&read_wr, 0, sizeof read_wr); | 435 | memset(&read_wr, 0, sizeof read_wr); |
319 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
320 | read_wr.wr_id = (unsigned long)ctxt; | 436 | read_wr.wr_id = (unsigned long)ctxt; |
321 | read_wr.opcode = IB_WR_RDMA_READ; | 437 | read_wr.opcode = IB_WR_RDMA_READ; |
438 | ctxt->wr_op = read_wr.opcode; | ||
322 | read_wr.send_flags = IB_SEND_SIGNALED; | 439 | read_wr.send_flags = IB_SEND_SIGNALED; |
323 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | 440 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; |
324 | read_wr.wr.rdma.remote_addr = | 441 | read_wr.wr.rdma.remote_addr = |
@@ -327,10 +444,15 @@ next_sge: | |||
327 | read_wr.sg_list = ctxt->sge; | 444 | read_wr.sg_list = ctxt->sge; |
328 | read_wr.num_sge = | 445 | read_wr.num_sge = |
329 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); | 446 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); |
330 | rdma_set_ctxt_sge(xprt, ctxt, | 447 | err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, |
331 | &rpl_map->sge[chl_map->ch[ch_no].start], | 448 | &rpl_map->sge[chl_map->ch[ch_no].start], |
332 | &sgl_offset, | 449 | &sgl_offset, |
333 | read_wr.num_sge); | 450 | read_wr.num_sge); |
451 | if (err) { | ||
452 | svc_rdma_unmap_dma(ctxt); | ||
453 | svc_rdma_put_context(ctxt, 0); | ||
454 | goto out; | ||
455 | } | ||
334 | if (((ch+1)->rc_discrim == 0) && | 456 | if (((ch+1)->rc_discrim == 0) && |
335 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { | 457 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { |
336 | /* | 458 | /* |
@@ -339,6 +461,29 @@ next_sge: | |||
339 | * the client and the RPC needs to be enqueued. | 461 | * the client and the RPC needs to be enqueued. |
340 | */ | 462 | */ |
341 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 463 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
464 | if (hdr_ctxt->frmr) { | ||
465 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
466 | /* | ||
467 | * Invalidate the local MR used to map the data | ||
468 | * sink. | ||
469 | */ | ||
470 | if (xprt->sc_dev_caps & | ||
471 | SVCRDMA_DEVCAP_READ_W_INV) { | ||
472 | read_wr.opcode = | ||
473 | IB_WR_RDMA_READ_WITH_INV; | ||
474 | ctxt->wr_op = read_wr.opcode; | ||
475 | read_wr.ex.invalidate_rkey = | ||
476 | ctxt->frmr->mr->lkey; | ||
477 | } else { | ||
478 | /* Prepare INVALIDATE WR */ | ||
479 | memset(&inv_wr, 0, sizeof inv_wr); | ||
480 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
481 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
482 | inv_wr.ex.invalidate_rkey = | ||
483 | hdr_ctxt->frmr->mr->lkey; | ||
484 | read_wr.next = &inv_wr; | ||
485 | } | ||
486 | } | ||
342 | ctxt->read_hdr = hdr_ctxt; | 487 | ctxt->read_hdr = hdr_ctxt; |
343 | } | 488 | } |
344 | /* Post the read */ | 489 | /* Post the read */ |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 84d328329d98..9a7a8e7ae038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -69,9 +69,127 @@ | |||
69 | * array is only concerned with the reply we are assured that we have | 69 | * array is only concerned with the reply we are assured that we have |
70 | * on extra page for the RPCRMDA header. | 70 | * on extra page for the RPCRMDA header. |
71 | */ | 71 | */ |
72 | static void xdr_to_sge(struct svcxprt_rdma *xprt, | 72 | int fast_reg_xdr(struct svcxprt_rdma *xprt, |
73 | struct xdr_buf *xdr, | 73 | struct xdr_buf *xdr, |
74 | struct svc_rdma_req_map *vec) | 74 | struct svc_rdma_req_map *vec) |
75 | { | ||
76 | int sge_no; | ||
77 | u32 sge_bytes; | ||
78 | u32 page_bytes; | ||
79 | u32 page_off; | ||
80 | int page_no = 0; | ||
81 | u8 *frva; | ||
82 | struct svc_rdma_fastreg_mr *frmr; | ||
83 | |||
84 | frmr = svc_rdma_get_frmr(xprt); | ||
85 | if (IS_ERR(frmr)) | ||
86 | return -ENOMEM; | ||
87 | vec->frmr = frmr; | ||
88 | |||
89 | /* Skip the RPCRDMA header */ | ||
90 | sge_no = 1; | ||
91 | |||
92 | /* Map the head. */ | ||
93 | frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); | ||
94 | vec->sge[sge_no].iov_base = xdr->head[0].iov_base; | ||
95 | vec->sge[sge_no].iov_len = xdr->head[0].iov_len; | ||
96 | vec->count = 2; | ||
97 | sge_no++; | ||
98 | |||
99 | /* Build the FRMR */ | ||
100 | frmr->kva = frva; | ||
101 | frmr->direction = DMA_TO_DEVICE; | ||
102 | frmr->access_flags = 0; | ||
103 | frmr->map_len = PAGE_SIZE; | ||
104 | frmr->page_list_len = 1; | ||
105 | frmr->page_list->page_list[page_no] = | ||
106 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
107 | (void *)xdr->head[0].iov_base, | ||
108 | PAGE_SIZE, DMA_TO_DEVICE); | ||
109 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
110 | frmr->page_list->page_list[page_no])) | ||
111 | goto fatal_err; | ||
112 | atomic_inc(&xprt->sc_dma_used); | ||
113 | |||
114 | page_off = xdr->page_base; | ||
115 | page_bytes = xdr->page_len + page_off; | ||
116 | if (!page_bytes) | ||
117 | goto encode_tail; | ||
118 | |||
119 | /* Map the pages */ | ||
120 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
121 | vec->sge[sge_no].iov_len = page_bytes; | ||
122 | sge_no++; | ||
123 | while (page_bytes) { | ||
124 | struct page *page; | ||
125 | |||
126 | page = xdr->pages[page_no++]; | ||
127 | sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); | ||
128 | page_bytes -= sge_bytes; | ||
129 | |||
130 | frmr->page_list->page_list[page_no] = | ||
131 | ib_dma_map_page(xprt->sc_cm_id->device, page, 0, | ||
132 | PAGE_SIZE, DMA_TO_DEVICE); | ||
133 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
134 | frmr->page_list->page_list[page_no])) | ||
135 | goto fatal_err; | ||
136 | |||
137 | atomic_inc(&xprt->sc_dma_used); | ||
138 | page_off = 0; /* reset for next time through loop */ | ||
139 | frmr->map_len += PAGE_SIZE; | ||
140 | frmr->page_list_len++; | ||
141 | } | ||
142 | vec->count++; | ||
143 | |||
144 | encode_tail: | ||
145 | /* Map tail */ | ||
146 | if (0 == xdr->tail[0].iov_len) | ||
147 | goto done; | ||
148 | |||
149 | vec->count++; | ||
150 | vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; | ||
151 | |||
152 | if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == | ||
153 | ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { | ||
154 | /* | ||
155 | * If head and tail use the same page, we don't need | ||
156 | * to map it again. | ||
157 | */ | ||
158 | vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; | ||
159 | } else { | ||
160 | void *va; | ||
161 | |||
162 | /* Map another page for the tail */ | ||
163 | page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; | ||
164 | va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); | ||
165 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
166 | |||
167 | frmr->page_list->page_list[page_no] = | ||
168 | ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, | ||
169 | DMA_TO_DEVICE); | ||
170 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
171 | frmr->page_list->page_list[page_no])) | ||
172 | goto fatal_err; | ||
173 | atomic_inc(&xprt->sc_dma_used); | ||
174 | frmr->map_len += PAGE_SIZE; | ||
175 | frmr->page_list_len++; | ||
176 | } | ||
177 | |||
178 | done: | ||
179 | if (svc_rdma_fastreg(xprt, frmr)) | ||
180 | goto fatal_err; | ||
181 | |||
182 | return 0; | ||
183 | |||
184 | fatal_err: | ||
185 | printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); | ||
186 | svc_rdma_put_frmr(xprt, frmr); | ||
187 | return -EIO; | ||
188 | } | ||
189 | |||
190 | static int map_xdr(struct svcxprt_rdma *xprt, | ||
191 | struct xdr_buf *xdr, | ||
192 | struct svc_rdma_req_map *vec) | ||
75 | { | 193 | { |
76 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | 194 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; |
77 | int sge_no; | 195 | int sge_no; |
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
83 | BUG_ON(xdr->len != | 201 | BUG_ON(xdr->len != |
84 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); | 202 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); |
85 | 203 | ||
204 | if (xprt->sc_frmr_pg_list_len) | ||
205 | return fast_reg_xdr(xprt, xdr, vec); | ||
206 | |||
86 | /* Skip the first sge, this is for the RPCRDMA header */ | 207 | /* Skip the first sge, this is for the RPCRDMA header */ |
87 | sge_no = 1; | 208 | sge_no = 1; |
88 | 209 | ||
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
116 | 237 | ||
117 | BUG_ON(sge_no > sge_max); | 238 | BUG_ON(sge_no > sge_max); |
118 | vec->count = sge_no; | 239 | vec->count = sge_no; |
240 | return 0; | ||
119 | } | 241 | } |
120 | 242 | ||
121 | /* Assumptions: | 243 | /* Assumptions: |
244 | * - We are using FRMR | ||
245 | * - or - | ||
122 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | 246 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE |
123 | */ | 247 | */ |
124 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | 248 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, |
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
158 | sge_no = 0; | 282 | sge_no = 0; |
159 | 283 | ||
160 | /* Copy the remaining SGE */ | 284 | /* Copy the remaining SGE */ |
161 | while (bc != 0 && xdr_sge_no < vec->count) { | 285 | while (bc != 0) { |
162 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 286 | sge_bytes = min_t(size_t, |
163 | sge_bytes = min((size_t)bc, | 287 | bc, vec->sge[xdr_sge_no].iov_len-sge_off); |
164 | (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off)); | ||
165 | sge[sge_no].length = sge_bytes; | 288 | sge[sge_no].length = sge_bytes; |
166 | atomic_inc(&xprt->sc_dma_used); | 289 | if (!vec->frmr) { |
167 | sge[sge_no].addr = | 290 | sge[sge_no].addr = |
168 | ib_dma_map_single(xprt->sc_cm_id->device, | 291 | ib_dma_map_single(xprt->sc_cm_id->device, |
169 | (void *) | 292 | (void *) |
170 | vec->sge[xdr_sge_no].iov_base + sge_off, | 293 | vec->sge[xdr_sge_no].iov_base + sge_off, |
171 | sge_bytes, DMA_TO_DEVICE); | 294 | sge_bytes, DMA_TO_DEVICE); |
172 | if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, | 295 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, |
173 | sge[sge_no].addr)) | 296 | sge[sge_no].addr)) |
174 | goto err; | 297 | goto err; |
298 | atomic_inc(&xprt->sc_dma_used); | ||
299 | sge[sge_no].lkey = xprt->sc_dma_lkey; | ||
300 | } else { | ||
301 | sge[sge_no].addr = (unsigned long) | ||
302 | vec->sge[xdr_sge_no].iov_base + sge_off; | ||
303 | sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
304 | } | ||
305 | ctxt->count++; | ||
306 | ctxt->frmr = vec->frmr; | ||
175 | sge_off = 0; | 307 | sge_off = 0; |
176 | sge_no++; | 308 | sge_no++; |
177 | ctxt->count++; | ||
178 | xdr_sge_no++; | 309 | xdr_sge_no++; |
310 | BUG_ON(xdr_sge_no > vec->count); | ||
179 | bc -= sge_bytes; | 311 | bc -= sge_bytes; |
180 | } | 312 | } |
181 | 313 | ||
182 | BUG_ON(bc != 0); | ||
183 | BUG_ON(xdr_sge_no > vec->count); | ||
184 | |||
185 | /* Prepare WRITE WR */ | 314 | /* Prepare WRITE WR */ |
186 | memset(&write_wr, 0, sizeof write_wr); | 315 | memset(&write_wr, 0, sizeof write_wr); |
187 | ctxt->wr_op = IB_WR_RDMA_WRITE; | 316 | ctxt->wr_op = IB_WR_RDMA_WRITE; |
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, | |||
226 | res_ary = (struct rpcrdma_write_array *) | 355 | res_ary = (struct rpcrdma_write_array *) |
227 | &rdma_resp->rm_body.rm_chunks[1]; | 356 | &rdma_resp->rm_body.rm_chunks[1]; |
228 | 357 | ||
229 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 358 | if (vec->frmr) |
359 | max_write = vec->frmr->map_len; | ||
360 | else | ||
361 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
230 | 362 | ||
231 | /* Write chunks start at the pagelist */ | 363 | /* Write chunks start at the pagelist */ |
232 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | 364 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; |
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
297 | res_ary = (struct rpcrdma_write_array *) | 429 | res_ary = (struct rpcrdma_write_array *) |
298 | &rdma_resp->rm_body.rm_chunks[2]; | 430 | &rdma_resp->rm_body.rm_chunks[2]; |
299 | 431 | ||
300 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 432 | if (vec->frmr) |
433 | max_write = vec->frmr->map_len; | ||
434 | else | ||
435 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
301 | 436 | ||
302 | /* xdr offset starts at RPC message */ | 437 | /* xdr offset starts at RPC message */ |
303 | for (xdr_off = 0, chunk_no = 0; | 438 | for (xdr_off = 0, chunk_no = 0; |
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
307 | ch = &arg_ary->wc_array[chunk_no].wc_target; | 442 | ch = &arg_ary->wc_array[chunk_no].wc_target; |
308 | write_len = min(xfer_len, ch->rs_length); | 443 | write_len = min(xfer_len, ch->rs_length); |
309 | 444 | ||
310 | |||
311 | /* Prepare the reply chunk given the length actually | 445 | /* Prepare the reply chunk given the length actually |
312 | * written */ | 446 | * written */ |
313 | rs_offset = get_unaligned(&(ch->rs_offset)); | 447 | rs_offset = get_unaligned(&(ch->rs_offset)); |
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
366 | int byte_count) | 500 | int byte_count) |
367 | { | 501 | { |
368 | struct ib_send_wr send_wr; | 502 | struct ib_send_wr send_wr; |
503 | struct ib_send_wr inv_wr; | ||
369 | int sge_no; | 504 | int sge_no; |
370 | int sge_bytes; | 505 | int sge_bytes; |
371 | int page_no; | 506 | int page_no; |
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
385 | /* Prepare the context */ | 520 | /* Prepare the context */ |
386 | ctxt->pages[0] = page; | 521 | ctxt->pages[0] = page; |
387 | ctxt->count = 1; | 522 | ctxt->count = 1; |
523 | ctxt->frmr = vec->frmr; | ||
524 | if (vec->frmr) | ||
525 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
526 | else | ||
527 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
388 | 528 | ||
389 | /* Prepare the SGE for the RPCRDMA Header */ | 529 | /* Prepare the SGE for the RPCRDMA Header */ |
390 | atomic_inc(&rdma->sc_dma_used); | ||
391 | ctxt->sge[0].addr = | 530 | ctxt->sge[0].addr = |
392 | ib_dma_map_page(rdma->sc_cm_id->device, | 531 | ib_dma_map_page(rdma->sc_cm_id->device, |
393 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | 532 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); |
533 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) | ||
534 | goto err; | ||
535 | atomic_inc(&rdma->sc_dma_used); | ||
536 | |||
394 | ctxt->direction = DMA_TO_DEVICE; | 537 | ctxt->direction = DMA_TO_DEVICE; |
538 | |||
395 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | 539 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); |
396 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | 540 | ctxt->sge[0].lkey = rdma->sc_dma_lkey; |
397 | 541 | ||
398 | /* Determine how many of our SGE are to be transmitted */ | 542 | /* Determine how many of our SGE are to be transmitted */ |
399 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { | 543 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { |
400 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); | 544 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); |
401 | byte_count -= sge_bytes; | 545 | byte_count -= sge_bytes; |
402 | atomic_inc(&rdma->sc_dma_used); | 546 | if (!vec->frmr) { |
403 | ctxt->sge[sge_no].addr = | 547 | ctxt->sge[sge_no].addr = |
404 | ib_dma_map_single(rdma->sc_cm_id->device, | 548 | ib_dma_map_single(rdma->sc_cm_id->device, |
405 | vec->sge[sge_no].iov_base, | 549 | vec->sge[sge_no].iov_base, |
406 | sge_bytes, DMA_TO_DEVICE); | 550 | sge_bytes, DMA_TO_DEVICE); |
551 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, | ||
552 | ctxt->sge[sge_no].addr)) | ||
553 | goto err; | ||
554 | atomic_inc(&rdma->sc_dma_used); | ||
555 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; | ||
556 | } else { | ||
557 | ctxt->sge[sge_no].addr = (unsigned long) | ||
558 | vec->sge[sge_no].iov_base; | ||
559 | ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
560 | } | ||
407 | ctxt->sge[sge_no].length = sge_bytes; | 561 | ctxt->sge[sge_no].length = sge_bytes; |
408 | ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey; | ||
409 | } | 562 | } |
410 | BUG_ON(byte_count != 0); | 563 | BUG_ON(byte_count != 0); |
411 | 564 | ||
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
417 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | 570 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; |
418 | ctxt->count++; | 571 | ctxt->count++; |
419 | rqstp->rq_respages[page_no] = NULL; | 572 | rqstp->rq_respages[page_no] = NULL; |
420 | /* If there are more pages than SGE, terminate SGE list */ | 573 | /* |
574 | * If there are more pages than SGE, terminate SGE | ||
575 | * list so that svc_rdma_unmap_dma doesn't attempt to | ||
576 | * unmap garbage. | ||
577 | */ | ||
421 | if (page_no+1 >= sge_no) | 578 | if (page_no+1 >= sge_no) |
422 | ctxt->sge[page_no+1].length = 0; | 579 | ctxt->sge[page_no+1].length = 0; |
423 | } | 580 | } |
424 | BUG_ON(sge_no > rdma->sc_max_sge); | 581 | BUG_ON(sge_no > rdma->sc_max_sge); |
582 | BUG_ON(sge_no > ctxt->count); | ||
425 | memset(&send_wr, 0, sizeof send_wr); | 583 | memset(&send_wr, 0, sizeof send_wr); |
426 | ctxt->wr_op = IB_WR_SEND; | 584 | ctxt->wr_op = IB_WR_SEND; |
427 | send_wr.wr_id = (unsigned long)ctxt; | 585 | send_wr.wr_id = (unsigned long)ctxt; |
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
429 | send_wr.num_sge = sge_no; | 587 | send_wr.num_sge = sge_no; |
430 | send_wr.opcode = IB_WR_SEND; | 588 | send_wr.opcode = IB_WR_SEND; |
431 | send_wr.send_flags = IB_SEND_SIGNALED; | 589 | send_wr.send_flags = IB_SEND_SIGNALED; |
590 | if (vec->frmr) { | ||
591 | /* Prepare INVALIDATE WR */ | ||
592 | memset(&inv_wr, 0, sizeof inv_wr); | ||
593 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
594 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
595 | inv_wr.ex.invalidate_rkey = | ||
596 | vec->frmr->mr->lkey; | ||
597 | send_wr.next = &inv_wr; | ||
598 | } | ||
432 | 599 | ||
433 | ret = svc_rdma_send(rdma, &send_wr); | 600 | ret = svc_rdma_send(rdma, &send_wr); |
434 | if (ret) | 601 | if (ret) |
435 | svc_rdma_put_context(ctxt, 1); | 602 | goto err; |
436 | 603 | ||
437 | return ret; | 604 | return 0; |
605 | |||
606 | err: | ||
607 | svc_rdma_put_frmr(rdma, vec->frmr); | ||
608 | svc_rdma_put_context(ctxt, 1); | ||
609 | return -EIO; | ||
438 | } | 610 | } |
439 | 611 | ||
440 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | 612 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) |
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
477 | ctxt = svc_rdma_get_context(rdma); | 649 | ctxt = svc_rdma_get_context(rdma); |
478 | ctxt->direction = DMA_TO_DEVICE; | 650 | ctxt->direction = DMA_TO_DEVICE; |
479 | vec = svc_rdma_get_req_map(); | 651 | vec = svc_rdma_get_req_map(); |
480 | xdr_to_sge(rdma, &rqstp->rq_res, vec); | 652 | ret = map_xdr(rdma, &rqstp->rq_res, vec); |
481 | 653 | if (ret) | |
654 | goto err0; | ||
482 | inline_bytes = rqstp->rq_res.len; | 655 | inline_bytes = rqstp->rq_res.len; |
483 | 656 | ||
484 | /* Create the RDMA response header */ | 657 | /* Create the RDMA response header */ |
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
498 | if (ret < 0) { | 671 | if (ret < 0) { |
499 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | 672 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", |
500 | ret); | 673 | ret); |
501 | goto error; | 674 | goto err1; |
502 | } | 675 | } |
503 | inline_bytes -= ret; | 676 | inline_bytes -= ret; |
504 | 677 | ||
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
508 | if (ret < 0) { | 681 | if (ret < 0) { |
509 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | 682 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", |
510 | ret); | 683 | ret); |
511 | goto error; | 684 | goto err1; |
512 | } | 685 | } |
513 | inline_bytes -= ret; | 686 | inline_bytes -= ret; |
514 | 687 | ||
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
517 | svc_rdma_put_req_map(vec); | 690 | svc_rdma_put_req_map(vec); |
518 | dprintk("svcrdma: send_reply returns %d\n", ret); | 691 | dprintk("svcrdma: send_reply returns %d\n", ret); |
519 | return ret; | 692 | return ret; |
520 | error: | 693 | |
694 | err1: | ||
695 | put_page(res_page); | ||
696 | err0: | ||
521 | svc_rdma_put_req_map(vec); | 697 | svc_rdma_put_req_map(vec); |
522 | svc_rdma_put_context(ctxt, 0); | 698 | svc_rdma_put_context(ctxt, 0); |
523 | put_page(res_page); | ||
524 | return ret; | 699 | return ret; |
525 | } | 700 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69728c6..6fb493cbd29f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | |||
100 | ctxt->xprt = xprt; | 100 | ctxt->xprt = xprt; |
101 | INIT_LIST_HEAD(&ctxt->dto_q); | 101 | INIT_LIST_HEAD(&ctxt->dto_q); |
102 | ctxt->count = 0; | 102 | ctxt->count = 0; |
103 | ctxt->frmr = NULL; | ||
103 | atomic_inc(&xprt->sc_ctxt_used); | 104 | atomic_inc(&xprt->sc_ctxt_used); |
104 | return ctxt; | 105 | return ctxt; |
105 | } | 106 | } |
106 | 107 | ||
107 | static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | 108 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) |
108 | { | 109 | { |
109 | struct svcxprt_rdma *xprt = ctxt->xprt; | 110 | struct svcxprt_rdma *xprt = ctxt->xprt; |
110 | int i; | 111 | int i; |
111 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { | 112 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { |
112 | atomic_dec(&xprt->sc_dma_used); | 113 | /* |
113 | ib_dma_unmap_single(xprt->sc_cm_id->device, | 114 | * Unmap the DMA addr in the SGE if the lkey matches |
114 | ctxt->sge[i].addr, | 115 | * the sc_dma_lkey, otherwise, ignore it since it is |
115 | ctxt->sge[i].length, | 116 | * an FRMR lkey and will be unmapped later when the |
116 | ctxt->direction); | 117 | * last WR that uses it completes. |
118 | */ | ||
119 | if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { | ||
120 | atomic_dec(&xprt->sc_dma_used); | ||
121 | ib_dma_unmap_single(xprt->sc_cm_id->device, | ||
122 | ctxt->sge[i].addr, | ||
123 | ctxt->sge[i].length, | ||
124 | ctxt->direction); | ||
125 | } | ||
117 | } | 126 | } |
118 | } | 127 | } |
119 | 128 | ||
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) | |||
150 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | 159 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); |
151 | } | 160 | } |
152 | map->count = 0; | 161 | map->count = 0; |
162 | map->frmr = NULL; | ||
153 | return map; | 163 | return map; |
154 | } | 164 | } |
155 | 165 | ||
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) | |||
316 | } | 326 | } |
317 | 327 | ||
318 | /* | 328 | /* |
329 | * Processs a completion context | ||
330 | */ | ||
331 | static void process_context(struct svcxprt_rdma *xprt, | ||
332 | struct svc_rdma_op_ctxt *ctxt) | ||
333 | { | ||
334 | svc_rdma_unmap_dma(ctxt); | ||
335 | |||
336 | switch (ctxt->wr_op) { | ||
337 | case IB_WR_SEND: | ||
338 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
339 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
340 | svc_rdma_put_context(ctxt, 1); | ||
341 | break; | ||
342 | |||
343 | case IB_WR_RDMA_WRITE: | ||
344 | svc_rdma_put_context(ctxt, 0); | ||
345 | break; | ||
346 | |||
347 | case IB_WR_RDMA_READ: | ||
348 | case IB_WR_RDMA_READ_WITH_INV: | ||
349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
350 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
351 | BUG_ON(!read_hdr); | ||
352 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
353 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
354 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
355 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
356 | list_add_tail(&read_hdr->dto_q, | ||
357 | &xprt->sc_read_complete_q); | ||
358 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
359 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
360 | } | ||
361 | svc_rdma_put_context(ctxt, 0); | ||
362 | break; | ||
363 | |||
364 | default: | ||
365 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
366 | "opcode=%d\n", | ||
367 | ctxt->wr_op); | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* | ||
319 | * Send Queue Completion Handler - potentially called on interrupt context. | 373 | * Send Queue Completion Handler - potentially called on interrupt context. |
320 | * | 374 | * |
321 | * Note that caller must hold a transport reference. | 375 | * Note that caller must hold a transport reference. |
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
327 | struct ib_cq *cq = xprt->sc_sq_cq; | 381 | struct ib_cq *cq = xprt->sc_sq_cq; |
328 | int ret; | 382 | int ret; |
329 | 383 | ||
330 | |||
331 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) | 384 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) |
332 | return; | 385 | return; |
333 | 386 | ||
334 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | 387 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); |
335 | atomic_inc(&rdma_stat_sq_poll); | 388 | atomic_inc(&rdma_stat_sq_poll); |
336 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | 389 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { |
337 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
338 | xprt = ctxt->xprt; | ||
339 | |||
340 | svc_rdma_unmap_dma(ctxt); | ||
341 | if (wc.status != IB_WC_SUCCESS) | 390 | if (wc.status != IB_WC_SUCCESS) |
342 | /* Close the transport */ | 391 | /* Close the transport */ |
343 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | 392 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
346 | atomic_dec(&xprt->sc_sq_count); | 395 | atomic_dec(&xprt->sc_sq_count); |
347 | wake_up(&xprt->sc_send_wait); | 396 | wake_up(&xprt->sc_send_wait); |
348 | 397 | ||
349 | switch (ctxt->wr_op) { | 398 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; |
350 | case IB_WR_SEND: | 399 | if (ctxt) |
351 | svc_rdma_put_context(ctxt, 1); | 400 | process_context(xprt, ctxt); |
352 | break; | ||
353 | |||
354 | case IB_WR_RDMA_WRITE: | ||
355 | svc_rdma_put_context(ctxt, 0); | ||
356 | break; | ||
357 | |||
358 | case IB_WR_RDMA_READ: | ||
359 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
360 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
361 | BUG_ON(!read_hdr); | ||
362 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
363 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
364 | list_add_tail(&read_hdr->dto_q, | ||
365 | &xprt->sc_read_complete_q); | ||
366 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
367 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
368 | } | ||
369 | svc_rdma_put_context(ctxt, 0); | ||
370 | break; | ||
371 | 401 | ||
372 | default: | ||
373 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
374 | "opcode=%d, status=%d\n", | ||
375 | wc.opcode, wc.status); | ||
376 | break; | ||
377 | } | ||
378 | svc_xprt_put(&xprt->sc_xprt); | 402 | svc_xprt_put(&xprt->sc_xprt); |
379 | } | 403 | } |
380 | 404 | ||
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | |||
425 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | 449 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); |
426 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | 450 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); |
427 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | 451 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); |
452 | INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); | ||
428 | init_waitqueue_head(&cma_xprt->sc_send_wait); | 453 | init_waitqueue_head(&cma_xprt->sc_send_wait); |
429 | 454 | ||
430 | spin_lock_init(&cma_xprt->sc_lock); | 455 | spin_lock_init(&cma_xprt->sc_lock); |
431 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | 456 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); |
457 | spin_lock_init(&cma_xprt->sc_frmr_q_lock); | ||
432 | 458 | ||
433 | cma_xprt->sc_ord = svcrdma_ord; | 459 | cma_xprt->sc_ord = svcrdma_ord; |
434 | 460 | ||
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
462 | struct ib_recv_wr recv_wr, *bad_recv_wr; | 488 | struct ib_recv_wr recv_wr, *bad_recv_wr; |
463 | struct svc_rdma_op_ctxt *ctxt; | 489 | struct svc_rdma_op_ctxt *ctxt; |
464 | struct page *page; | 490 | struct page *page; |
465 | unsigned long pa; | 491 | dma_addr_t pa; |
466 | int sge_no; | 492 | int sge_no; |
467 | int buflen; | 493 | int buflen; |
468 | int ret; | 494 | int ret; |
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
474 | BUG_ON(sge_no >= xprt->sc_max_sge); | 500 | BUG_ON(sge_no >= xprt->sc_max_sge); |
475 | page = svc_rdma_get_page(); | 501 | page = svc_rdma_get_page(); |
476 | ctxt->pages[sge_no] = page; | 502 | ctxt->pages[sge_no] = page; |
477 | atomic_inc(&xprt->sc_dma_used); | ||
478 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | 503 | pa = ib_dma_map_page(xprt->sc_cm_id->device, |
479 | page, 0, PAGE_SIZE, | 504 | page, 0, PAGE_SIZE, |
480 | DMA_FROM_DEVICE); | 505 | DMA_FROM_DEVICE); |
506 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) | ||
507 | goto err_put_ctxt; | ||
508 | atomic_inc(&xprt->sc_dma_used); | ||
481 | ctxt->sge[sge_no].addr = pa; | 509 | ctxt->sge[sge_no].addr = pa; |
482 | ctxt->sge[sge_no].length = PAGE_SIZE; | 510 | ctxt->sge[sge_no].length = PAGE_SIZE; |
483 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 511 | ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; |
484 | buflen += PAGE_SIZE; | 512 | buflen += PAGE_SIZE; |
485 | } | 513 | } |
486 | ctxt->count = sge_no; | 514 | ctxt->count = sge_no; |
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
496 | svc_rdma_put_context(ctxt, 1); | 524 | svc_rdma_put_context(ctxt, 1); |
497 | } | 525 | } |
498 | return ret; | 526 | return ret; |
527 | |||
528 | err_put_ctxt: | ||
529 | svc_rdma_put_context(ctxt, 1); | ||
530 | return -ENOMEM; | ||
499 | } | 531 | } |
500 | 532 | ||
501 | /* | 533 | /* |
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, | |||
566 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | 598 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " |
567 | "event=%d\n", cma_id, cma_id->context, event->event); | 599 | "event=%d\n", cma_id, cma_id->context, event->event); |
568 | handle_connect_req(cma_id, | 600 | handle_connect_req(cma_id, |
569 | event->param.conn.responder_resources); | 601 | event->param.conn.initiator_depth); |
570 | break; | 602 | break; |
571 | 603 | ||
572 | case RDMA_CM_EVENT_ESTABLISHED: | 604 | case RDMA_CM_EVENT_ESTABLISHED: |
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
686 | return ERR_PTR(ret); | 718 | return ERR_PTR(ret); |
687 | } | 719 | } |
688 | 720 | ||
721 | static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) | ||
722 | { | ||
723 | struct ib_mr *mr; | ||
724 | struct ib_fast_reg_page_list *pl; | ||
725 | struct svc_rdma_fastreg_mr *frmr; | ||
726 | |||
727 | frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); | ||
728 | if (!frmr) | ||
729 | goto err; | ||
730 | |||
731 | mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); | ||
732 | if (!mr) | ||
733 | goto err_free_frmr; | ||
734 | |||
735 | pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, | ||
736 | RPCSVC_MAXPAGES); | ||
737 | if (!pl) | ||
738 | goto err_free_mr; | ||
739 | |||
740 | frmr->mr = mr; | ||
741 | frmr->page_list = pl; | ||
742 | INIT_LIST_HEAD(&frmr->frmr_list); | ||
743 | return frmr; | ||
744 | |||
745 | err_free_mr: | ||
746 | ib_dereg_mr(mr); | ||
747 | err_free_frmr: | ||
748 | kfree(frmr); | ||
749 | err: | ||
750 | return ERR_PTR(-ENOMEM); | ||
751 | } | ||
752 | |||
753 | static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) | ||
754 | { | ||
755 | struct svc_rdma_fastreg_mr *frmr; | ||
756 | |||
757 | while (!list_empty(&xprt->sc_frmr_q)) { | ||
758 | frmr = list_entry(xprt->sc_frmr_q.next, | ||
759 | struct svc_rdma_fastreg_mr, frmr_list); | ||
760 | list_del_init(&frmr->frmr_list); | ||
761 | ib_dereg_mr(frmr->mr); | ||
762 | ib_free_fast_reg_page_list(frmr->page_list); | ||
763 | kfree(frmr); | ||
764 | } | ||
765 | } | ||
766 | |||
767 | struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) | ||
768 | { | ||
769 | struct svc_rdma_fastreg_mr *frmr = NULL; | ||
770 | |||
771 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
772 | if (!list_empty(&rdma->sc_frmr_q)) { | ||
773 | frmr = list_entry(rdma->sc_frmr_q.next, | ||
774 | struct svc_rdma_fastreg_mr, frmr_list); | ||
775 | list_del_init(&frmr->frmr_list); | ||
776 | frmr->map_len = 0; | ||
777 | frmr->page_list_len = 0; | ||
778 | } | ||
779 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
780 | if (frmr) | ||
781 | return frmr; | ||
782 | |||
783 | return rdma_alloc_frmr(rdma); | ||
784 | } | ||
785 | |||
786 | static void frmr_unmap_dma(struct svcxprt_rdma *xprt, | ||
787 | struct svc_rdma_fastreg_mr *frmr) | ||
788 | { | ||
789 | int page_no; | ||
790 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
791 | dma_addr_t addr = frmr->page_list->page_list[page_no]; | ||
792 | if (ib_dma_mapping_error(frmr->mr->device, addr)) | ||
793 | continue; | ||
794 | atomic_dec(&xprt->sc_dma_used); | ||
795 | ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, | ||
796 | frmr->direction); | ||
797 | } | ||
798 | } | ||
799 | |||
800 | void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, | ||
801 | struct svc_rdma_fastreg_mr *frmr) | ||
802 | { | ||
803 | if (frmr) { | ||
804 | frmr_unmap_dma(rdma, frmr); | ||
805 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
806 | BUG_ON(!list_empty(&frmr->frmr_list)); | ||
807 | list_add(&frmr->frmr_list, &rdma->sc_frmr_q); | ||
808 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
809 | } | ||
810 | } | ||
811 | |||
689 | /* | 812 | /* |
690 | * This is the xpo_recvfrom function for listening endpoints. Its | 813 | * This is the xpo_recvfrom function for listening endpoints. Its |
691 | * purpose is to accept incoming connections. The CMA callback handler | 814 | * purpose is to accept incoming connections. The CMA callback handler |
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
704 | struct rdma_conn_param conn_param; | 827 | struct rdma_conn_param conn_param; |
705 | struct ib_qp_init_attr qp_attr; | 828 | struct ib_qp_init_attr qp_attr; |
706 | struct ib_device_attr devattr; | 829 | struct ib_device_attr devattr; |
830 | int dma_mr_acc; | ||
831 | int need_dma_mr; | ||
707 | int ret; | 832 | int ret; |
708 | int i; | 833 | int i; |
709 | 834 | ||
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
819 | } | 944 | } |
820 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | 945 | newxprt->sc_qp = newxprt->sc_cm_id->qp; |
821 | 946 | ||
822 | /* Register all of physical memory */ | 947 | /* |
823 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | 948 | * Use the most secure set of MR resources based on the |
824 | IB_ACCESS_LOCAL_WRITE | | 949 | * transport type and available memory management features in |
825 | IB_ACCESS_REMOTE_WRITE); | 950 | * the device. Here's the table implemented below: |
826 | if (IS_ERR(newxprt->sc_phys_mr)) { | 951 | * |
827 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | 952 | * Fast Global DMA Remote WR |
953 | * Reg LKEY MR Access | ||
954 | * Sup'd Sup'd Needed Needed | ||
955 | * | ||
956 | * IWARP N N Y Y | ||
957 | * N Y Y Y | ||
958 | * Y N Y N | ||
959 | * Y Y N - | ||
960 | * | ||
961 | * IB N N Y N | ||
962 | * N Y N - | ||
963 | * Y N Y N | ||
964 | * Y Y N - | ||
965 | * | ||
966 | * NB: iWARP requires remote write access for the data sink | ||
967 | * of an RDMA_READ. IB does not. | ||
968 | */ | ||
969 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { | ||
970 | newxprt->sc_frmr_pg_list_len = | ||
971 | devattr.max_fast_reg_page_list_len; | ||
972 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | ||
973 | } | ||
974 | |||
975 | /* | ||
976 | * Determine if a DMA MR is required and if so, what privs are required | ||
977 | */ | ||
978 | switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { | ||
979 | case RDMA_TRANSPORT_IWARP: | ||
980 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; | ||
981 | if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { | ||
982 | need_dma_mr = 1; | ||
983 | dma_mr_acc = | ||
984 | (IB_ACCESS_LOCAL_WRITE | | ||
985 | IB_ACCESS_REMOTE_WRITE); | ||
986 | } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
987 | need_dma_mr = 1; | ||
988 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
989 | } else | ||
990 | need_dma_mr = 0; | ||
991 | break; | ||
992 | case RDMA_TRANSPORT_IB: | ||
993 | if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
994 | need_dma_mr = 1; | ||
995 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
996 | } else | ||
997 | need_dma_mr = 0; | ||
998 | break; | ||
999 | default: | ||
828 | goto errout; | 1000 | goto errout; |
829 | } | 1001 | } |
830 | 1002 | ||
1003 | /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ | ||
1004 | if (need_dma_mr) { | ||
1005 | /* Register all of physical memory */ | ||
1006 | newxprt->sc_phys_mr = | ||
1007 | ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); | ||
1008 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
1009 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", | ||
1010 | ret); | ||
1011 | goto errout; | ||
1012 | } | ||
1013 | newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; | ||
1014 | } else | ||
1015 | newxprt->sc_dma_lkey = | ||
1016 | newxprt->sc_cm_id->device->local_dma_lkey; | ||
1017 | |||
831 | /* Post receive buffers */ | 1018 | /* Post receive buffers */ |
832 | for (i = 0; i < newxprt->sc_max_requests; i++) { | 1019 | for (i = 0; i < newxprt->sc_max_requests; i++) { |
833 | ret = svc_rdma_post_recv(newxprt); | 1020 | ret = svc_rdma_post_recv(newxprt); |
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work) | |||
961 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); | 1148 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); |
962 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); | 1149 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); |
963 | 1150 | ||
1151 | /* De-allocate fastreg mr */ | ||
1152 | rdma_dealloc_frmr_q(rdma); | ||
1153 | |||
964 | /* Destroy the QP if present (not a listener) */ | 1154 | /* Destroy the QP if present (not a listener) */ |
965 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) | 1155 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) |
966 | ib_destroy_qp(rdma->sc_qp); | 1156 | ib_destroy_qp(rdma->sc_qp); |
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) | |||
1014 | return 1; | 1204 | return 1; |
1015 | } | 1205 | } |
1016 | 1206 | ||
1207 | /* | ||
1208 | * Attempt to register the kvec representing the RPC memory with the | ||
1209 | * device. | ||
1210 | * | ||
1211 | * Returns: | ||
1212 | * NULL : The device does not support fastreg or there were no more | ||
1213 | * fastreg mr. | ||
1214 | * frmr : The kvec register request was successfully posted. | ||
1215 | * <0 : An error was encountered attempting to register the kvec. | ||
1216 | */ | ||
1217 | int svc_rdma_fastreg(struct svcxprt_rdma *xprt, | ||
1218 | struct svc_rdma_fastreg_mr *frmr) | ||
1219 | { | ||
1220 | struct ib_send_wr fastreg_wr; | ||
1221 | u8 key; | ||
1222 | |||
1223 | /* Bump the key */ | ||
1224 | key = (u8)(frmr->mr->lkey & 0x000000FF); | ||
1225 | ib_update_fast_reg_key(frmr->mr, ++key); | ||
1226 | |||
1227 | /* Prepare FASTREG WR */ | ||
1228 | memset(&fastreg_wr, 0, sizeof fastreg_wr); | ||
1229 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1230 | fastreg_wr.send_flags = IB_SEND_SIGNALED; | ||
1231 | fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; | ||
1232 | fastreg_wr.wr.fast_reg.page_list = frmr->page_list; | ||
1233 | fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; | ||
1234 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1235 | fastreg_wr.wr.fast_reg.length = frmr->map_len; | ||
1236 | fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; | ||
1237 | fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; | ||
1238 | return svc_rdma_send(xprt, &fastreg_wr); | ||
1239 | } | ||
1240 | |||
1017 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | 1241 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) |
1018 | { | 1242 | { |
1019 | struct ib_send_wr *bad_wr; | 1243 | struct ib_send_wr *bad_wr, *n_wr; |
1244 | int wr_count; | ||
1245 | int i; | ||
1020 | int ret; | 1246 | int ret; |
1021 | 1247 | ||
1022 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | 1248 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) |
1023 | return -ENOTCONN; | 1249 | return -ENOTCONN; |
1024 | 1250 | ||
1025 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | 1251 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); |
1026 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | 1252 | wr_count = 1; |
1027 | wr->opcode); | 1253 | for (n_wr = wr->next; n_wr; n_wr = n_wr->next) |
1254 | wr_count++; | ||
1255 | |||
1028 | /* If the SQ is full, wait until an SQ entry is available */ | 1256 | /* If the SQ is full, wait until an SQ entry is available */ |
1029 | while (1) { | 1257 | while (1) { |
1030 | spin_lock_bh(&xprt->sc_lock); | 1258 | spin_lock_bh(&xprt->sc_lock); |
1031 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | 1259 | if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { |
1032 | spin_unlock_bh(&xprt->sc_lock); | 1260 | spin_unlock_bh(&xprt->sc_lock); |
1033 | atomic_inc(&rdma_stat_sq_starve); | 1261 | atomic_inc(&rdma_stat_sq_starve); |
1034 | 1262 | ||
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | |||
1043 | return 0; | 1271 | return 0; |
1044 | continue; | 1272 | continue; |
1045 | } | 1273 | } |
1046 | /* Bumped used SQ WR count and post */ | 1274 | /* Take a transport ref for each WR posted */ |
1047 | svc_xprt_get(&xprt->sc_xprt); | 1275 | for (i = 0; i < wr_count; i++) |
1276 | svc_xprt_get(&xprt->sc_xprt); | ||
1277 | |||
1278 | /* Bump used SQ WR count and post */ | ||
1279 | atomic_add(wr_count, &xprt->sc_sq_count); | ||
1048 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | 1280 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); |
1049 | if (!ret) | 1281 | if (ret) { |
1050 | atomic_inc(&xprt->sc_sq_count); | 1282 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
1051 | else { | 1283 | atomic_sub(wr_count, &xprt->sc_sq_count); |
1052 | svc_xprt_put(&xprt->sc_xprt); | 1284 | for (i = 0; i < wr_count; i ++) |
1285 | svc_xprt_put(&xprt->sc_xprt); | ||
1053 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | 1286 | dprintk("svcrdma: failed to post SQ WR rc=%d, " |
1054 | "sc_sq_count=%d, sc_sq_depth=%d\n", | 1287 | "sc_sq_count=%d, sc_sq_depth=%d\n", |
1055 | ret, atomic_read(&xprt->sc_sq_count), | 1288 | ret, atomic_read(&xprt->sc_sq_count), |
1056 | xprt->sc_sq_depth); | 1289 | xprt->sc_sq_depth); |
1057 | } | 1290 | } |
1058 | spin_unlock_bh(&xprt->sc_lock); | 1291 | spin_unlock_bh(&xprt->sc_lock); |
1292 | if (ret) | ||
1293 | wake_up(&xprt->sc_send_wait); | ||
1059 | break; | 1294 | break; |
1060 | } | 1295 | } |
1061 | return ret; | 1296 | return ret; |
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1079 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | 1314 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); |
1080 | 1315 | ||
1081 | /* Prepare SGE for local address */ | 1316 | /* Prepare SGE for local address */ |
1082 | atomic_inc(&xprt->sc_dma_used); | ||
1083 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | 1317 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, |
1084 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | 1318 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); |
1085 | sge.lkey = xprt->sc_phys_mr->lkey; | 1319 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { |
1320 | put_page(p); | ||
1321 | return; | ||
1322 | } | ||
1323 | atomic_inc(&xprt->sc_dma_used); | ||
1324 | sge.lkey = xprt->sc_dma_lkey; | ||
1086 | sge.length = length; | 1325 | sge.length = length; |
1087 | 1326 | ||
1088 | ctxt = svc_rdma_get_context(xprt); | 1327 | ctxt = svc_rdma_get_context(xprt); |
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1103 | if (ret) { | 1342 | if (ret) { |
1104 | dprintk("svcrdma: Error %d posting send for protocol error\n", | 1343 | dprintk("svcrdma: Error %d posting send for protocol error\n", |
1105 | ret); | 1344 | ret); |
1345 | ib_dma_unmap_page(xprt->sc_cm_id->device, | ||
1346 | sge.addr, PAGE_SIZE, | ||
1347 | DMA_FROM_DEVICE); | ||
1106 | svc_rdma_put_context(ctxt, 1); | 1348 | svc_rdma_put_context(ctxt, 1); |
1107 | } | 1349 | } |
1108 | } | 1350 | } |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |||
70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
72 | static unsigned int xprt_rdma_inline_write_padding; | 72 | static unsigned int xprt_rdma_inline_write_padding; |
73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | 73 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | 74 | int xprt_rdma_pad_optimize = 0; |
75 | #else | ||
76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
77 | #endif | ||
78 | 75 | ||
79 | #ifdef RPC_DEBUG | 76 | #ifdef RPC_DEBUG |
80 | 77 | ||
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { | |||
140 | .extra2 = &max_memreg, | 137 | .extra2 = &max_memreg, |
141 | }, | 138 | }, |
142 | { | 139 | { |
140 | .ctl_name = CTL_UNNUMBERED, | ||
141 | .procname = "rdma_pad_optimize", | ||
142 | .data = &xprt_rdma_pad_optimize, | ||
143 | .maxlen = sizeof(unsigned int), | ||
144 | .mode = 0644, | ||
145 | .proc_handler = &proc_dointvec, | ||
146 | }, | ||
147 | { | ||
143 | .ctl_name = 0, | 148 | .ctl_name = 0, |
144 | }, | 149 | }, |
145 | }; | 150 | }; |
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
458 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 463 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
459 | 464 | ||
460 | dprintk("RPC: %s: closing\n", __func__); | 465 | dprintk("RPC: %s: closing\n", __func__); |
466 | if (r_xprt->rx_ep.rep_connected > 0) | ||
467 | xprt->reestablish_timeout = 0; | ||
461 | xprt_disconnect_done(xprt); | 468 | xprt_disconnect_done(xprt); |
462 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 469 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
463 | } | 470 | } |
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) | |||
485 | /* Reconnect */ | 492 | /* Reconnect */ |
486 | schedule_delayed_work(&r_xprt->rdma_connect, | 493 | schedule_delayed_work(&r_xprt->rdma_connect, |
487 | xprt->reestablish_timeout); | 494 | xprt->reestablish_timeout); |
495 | xprt->reestablish_timeout <<= 1; | ||
496 | if (xprt->reestablish_timeout > (30 * HZ)) | ||
497 | xprt->reestablish_timeout = (30 * HZ); | ||
498 | else if (xprt->reestablish_timeout < (5 * HZ)) | ||
499 | xprt->reestablish_timeout = (5 * HZ); | ||
488 | } else { | 500 | } else { |
489 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 501 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
490 | if (!RPC_IS_ASYNC(task)) | 502 | if (!RPC_IS_ASYNC(task)) |
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
591 | } | 603 | } |
592 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 604 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
593 | out: | 605 | out: |
606 | req->rl_connect_cookie = 0; /* our reserved value */ | ||
594 | return req->rl_xdr_buf; | 607 | return req->rl_xdr_buf; |
595 | 608 | ||
596 | outfail: | 609 | outfail: |
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
694 | req->rl_reply->rr_xprt = xprt; | 707 | req->rl_reply->rr_xprt = xprt; |
695 | } | 708 | } |
696 | 709 | ||
697 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { | 710 | /* Must suppress retransmit to maintain credits */ |
698 | xprt_disconnect_done(xprt); | 711 | if (req->rl_connect_cookie == xprt->connect_cookie) |
699 | return -ENOTCONN; /* implies disconnect */ | 712 | goto drop_connection; |
700 | } | 713 | req->rl_connect_cookie = xprt->connect_cookie; |
714 | |||
715 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
716 | goto drop_connection; | ||
701 | 717 | ||
718 | task->tk_bytes_sent += rqst->rq_snd_buf.len; | ||
702 | rqst->rq_bytes_sent = 0; | 719 | rqst->rq_bytes_sent = 0; |
703 | return 0; | 720 | return 0; |
721 | |||
722 | drop_connection: | ||
723 | xprt_disconnect_done(xprt); | ||
724 | return -ENOTCONN; /* implies disconnect */ | ||
704 | } | 725 | } |
705 | 726 | ||
706 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 727 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) | |||
770 | { | 791 | { |
771 | int rc; | 792 | int rc; |
772 | 793 | ||
773 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | 794 | dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
774 | #ifdef RPC_DEBUG | 795 | #ifdef RPC_DEBUG |
775 | if (sunrpc_table_header) { | 796 | if (sunrpc_table_header) { |
776 | unregister_sysctl_table(sunrpc_table_header); | 797 | unregister_sysctl_table(sunrpc_table_header); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
284 | switch (event->event) { | 284 | switch (event->event) { |
285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
287 | ia->ri_async_rc = 0; | ||
287 | complete(&ia->ri_done); | 288 | complete(&ia->ri_done); |
288 | break; | 289 | break; |
289 | case RDMA_CM_EVENT_ADDR_ERROR: | 290 | case RDMA_CM_EVENT_ADDR_ERROR: |
@@ -338,13 +339,32 @@ connected: | |||
338 | wake_up_all(&ep->rep_connect_wait); | 339 | wake_up_all(&ep->rep_connect_wait); |
339 | break; | 340 | break; |
340 | default: | 341 | default: |
341 | ia->ri_async_rc = -EINVAL; | 342 | dprintk("RPC: %s: unexpected CM event %d\n", |
342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
343 | __func__, event->event); | 343 | __func__, event->event); |
344 | complete(&ia->ri_done); | ||
345 | break; | 344 | break; |
346 | } | 345 | } |
347 | 346 | ||
347 | #ifdef RPC_DEBUG | ||
348 | if (connstate == 1) { | ||
349 | int ird = attr.max_dest_rd_atomic; | ||
350 | int tird = ep->rep_remote_cma.responder_resources; | ||
351 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
352 | "on %s, memreg %d slots %d ird %d%s\n", | ||
353 | NIPQUAD(addr->sin_addr.s_addr), | ||
354 | ntohs(addr->sin_port), | ||
355 | ia->ri_id->device->name, | ||
356 | ia->ri_memreg_strategy, | ||
357 | xprt->rx_buf.rb_max_requests, | ||
358 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | ||
359 | } else if (connstate < 0) { | ||
360 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
361 | "closed (%d)\n", | ||
362 | NIPQUAD(addr->sin_addr.s_addr), | ||
363 | ntohs(addr->sin_port), | ||
364 | connstate); | ||
365 | } | ||
366 | #endif | ||
367 | |||
348 | return 0; | 368 | return 0; |
349 | } | 369 | } |
350 | 370 | ||
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
355 | struct rdma_cm_id *id; | 375 | struct rdma_cm_id *id; |
356 | int rc; | 376 | int rc; |
357 | 377 | ||
378 | init_completion(&ia->ri_done); | ||
379 | |||
358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | 380 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); |
359 | if (IS_ERR(id)) { | 381 | if (IS_ERR(id)) { |
360 | rc = PTR_ERR(id); | 382 | rc = PTR_ERR(id); |
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
363 | return id; | 385 | return id; |
364 | } | 386 | } |
365 | 387 | ||
366 | ia->ri_async_rc = 0; | 388 | ia->ri_async_rc = -ETIMEDOUT; |
367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 389 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); |
368 | if (rc) { | 390 | if (rc) { |
369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 391 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
370 | __func__, rc); | 392 | __func__, rc); |
371 | goto out; | 393 | goto out; |
372 | } | 394 | } |
373 | wait_for_completion(&ia->ri_done); | 395 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
396 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
374 | rc = ia->ri_async_rc; | 397 | rc = ia->ri_async_rc; |
375 | if (rc) | 398 | if (rc) |
376 | goto out; | 399 | goto out; |
377 | 400 | ||
378 | ia->ri_async_rc = 0; | 401 | ia->ri_async_rc = -ETIMEDOUT; |
379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 402 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
380 | if (rc) { | 403 | if (rc) { |
381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | 404 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
382 | __func__, rc); | 405 | __func__, rc); |
383 | goto out; | 406 | goto out; |
384 | } | 407 | } |
385 | wait_for_completion(&ia->ri_done); | 408 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
409 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
386 | rc = ia->ri_async_rc; | 410 | rc = ia->ri_async_rc; |
387 | if (rc) | 411 | if (rc) |
388 | goto out; | 412 | goto out; |
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) | |||
423 | int | 447 | int |
424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 448 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
425 | { | 449 | { |
426 | int rc; | 450 | int rc, mem_priv; |
451 | struct ib_device_attr devattr; | ||
427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 452 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
428 | 453 | ||
429 | init_completion(&ia->ri_done); | ||
430 | |||
431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 454 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
432 | if (IS_ERR(ia->ri_id)) { | 455 | if (IS_ERR(ia->ri_id)) { |
433 | rc = PTR_ERR(ia->ri_id); | 456 | rc = PTR_ERR(ia->ri_id); |
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
443 | } | 466 | } |
444 | 467 | ||
445 | /* | 468 | /* |
469 | * Query the device to determine if the requested memory | ||
470 | * registration strategy is supported. If it isn't, set the | ||
471 | * strategy to a globally supported model. | ||
472 | */ | ||
473 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
474 | if (rc) { | ||
475 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
476 | __func__, rc); | ||
477 | goto out2; | ||
478 | } | ||
479 | |||
480 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | ||
481 | ia->ri_have_dma_lkey = 1; | ||
482 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | ||
483 | } | ||
484 | |||
485 | switch (memreg) { | ||
486 | case RPCRDMA_MEMWINDOWS: | ||
487 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
488 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
489 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
490 | "specified but not supported by adapter, " | ||
491 | "using slower RPCRDMA_REGISTER\n", | ||
492 | __func__); | ||
493 | memreg = RPCRDMA_REGISTER; | ||
494 | } | ||
495 | break; | ||
496 | case RPCRDMA_MTHCAFMR: | ||
497 | if (!ia->ri_id->device->alloc_fmr) { | ||
498 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
499 | dprintk("RPC: %s: MTHCAFMR registration " | ||
500 | "specified but not supported by adapter, " | ||
501 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
502 | __func__); | ||
503 | memreg = RPCRDMA_ALLPHYSICAL; | ||
504 | #else | ||
505 | dprintk("RPC: %s: MTHCAFMR registration " | ||
506 | "specified but not supported by adapter, " | ||
507 | "using slower RPCRDMA_REGISTER\n", | ||
508 | __func__); | ||
509 | memreg = RPCRDMA_REGISTER; | ||
510 | #endif | ||
511 | } | ||
512 | break; | ||
513 | case RPCRDMA_FRMR: | ||
514 | /* Requires both frmr reg and local dma lkey */ | ||
515 | if ((devattr.device_cap_flags & | ||
516 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
517 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
518 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
519 | dprintk("RPC: %s: FRMR registration " | ||
520 | "specified but not supported by adapter, " | ||
521 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
522 | __func__); | ||
523 | memreg = RPCRDMA_ALLPHYSICAL; | ||
524 | #else | ||
525 | dprintk("RPC: %s: FRMR registration " | ||
526 | "specified but not supported by adapter, " | ||
527 | "using slower RPCRDMA_REGISTER\n", | ||
528 | __func__); | ||
529 | memreg = RPCRDMA_REGISTER; | ||
530 | #endif | ||
531 | } | ||
532 | break; | ||
533 | } | ||
534 | |||
535 | /* | ||
446 | * Optionally obtain an underlying physical identity mapping in | 536 | * Optionally obtain an underlying physical identity mapping in |
447 | * order to do a memory window-based bind. This base registration | 537 | * order to do a memory window-based bind. This base registration |
448 | * is protected from remote access - that is enabled only by binding | 538 | * is protected from remote access - that is enabled only by binding |
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
450 | * revoked after the corresponding completion similar to a storage | 540 | * revoked after the corresponding completion similar to a storage |
451 | * adapter. | 541 | * adapter. |
452 | */ | 542 | */ |
453 | if (memreg > RPCRDMA_REGISTER) { | 543 | switch (memreg) { |
454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | 544 | case RPCRDMA_BOUNCEBUFFERS: |
455 | switch (memreg) { | 545 | case RPCRDMA_REGISTER: |
546 | case RPCRDMA_FRMR: | ||
547 | break; | ||
456 | #if RPCRDMA_PERSISTENT_REGISTRATION | 548 | #if RPCRDMA_PERSISTENT_REGISTRATION |
457 | case RPCRDMA_ALLPHYSICAL: | 549 | case RPCRDMA_ALLPHYSICAL: |
458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | 550 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
459 | mem_priv |= IB_ACCESS_REMOTE_READ; | 551 | IB_ACCESS_REMOTE_WRITE | |
460 | break; | 552 | IB_ACCESS_REMOTE_READ; |
553 | goto register_setup; | ||
461 | #endif | 554 | #endif |
462 | case RPCRDMA_MEMWINDOWS_ASYNC: | 555 | case RPCRDMA_MEMWINDOWS_ASYNC: |
463 | case RPCRDMA_MEMWINDOWS: | 556 | case RPCRDMA_MEMWINDOWS: |
464 | mem_priv |= IB_ACCESS_MW_BIND; | 557 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
465 | break; | 558 | IB_ACCESS_MW_BIND; |
466 | default: | 559 | goto register_setup; |
560 | case RPCRDMA_MTHCAFMR: | ||
561 | if (ia->ri_have_dma_lkey) | ||
467 | break; | 562 | break; |
468 | } | 563 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
564 | register_setup: | ||
469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 565 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
470 | if (IS_ERR(ia->ri_bind_mem)) { | 566 | if (IS_ERR(ia->ri_bind_mem)) { |
471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 567 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
475 | memreg = RPCRDMA_REGISTER; | 571 | memreg = RPCRDMA_REGISTER; |
476 | ia->ri_bind_mem = NULL; | 572 | ia->ri_bind_mem = NULL; |
477 | } | 573 | } |
574 | break; | ||
575 | default: | ||
576 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | ||
577 | __func__, memreg); | ||
578 | rc = -EINVAL; | ||
579 | goto out2; | ||
478 | } | 580 | } |
581 | dprintk("RPC: %s: memory registration strategy is %d\n", | ||
582 | __func__, memreg); | ||
479 | 583 | ||
480 | /* Else will do memory reg/dereg for each chunk */ | 584 | /* Else will do memory reg/dereg for each chunk */ |
481 | ia->ri_memreg_strategy = memreg; | 585 | ia->ri_memreg_strategy = memreg; |
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
483 | return 0; | 587 | return 0; |
484 | out2: | 588 | out2: |
485 | rdma_destroy_id(ia->ri_id); | 589 | rdma_destroy_id(ia->ri_id); |
590 | ia->ri_id = NULL; | ||
486 | out1: | 591 | out1: |
487 | return rc; | 592 | return rc; |
488 | } | 593 | } |
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 608 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
504 | __func__, rc); | 609 | __func__, rc); |
505 | } | 610 | } |
506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | 611 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
507 | rdma_destroy_qp(ia->ri_id); | 612 | if (ia->ri_id->qp) |
613 | rdma_destroy_qp(ia->ri_id); | ||
614 | rdma_destroy_id(ia->ri_id); | ||
615 | ia->ri_id = NULL; | ||
616 | } | ||
508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 617 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { |
509 | rc = ib_dealloc_pd(ia->ri_pd); | 618 | rc = ib_dealloc_pd(ia->ri_pd); |
510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 619 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", |
511 | __func__, rc); | 620 | __func__, rc); |
512 | } | 621 | } |
513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
514 | rdma_destroy_id(ia->ri_id); | ||
515 | } | 622 | } |
516 | 623 | ||
517 | /* | 624 | /* |
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
541 | ep->rep_attr.srq = NULL; | 648 | ep->rep_attr.srq = NULL; |
542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 649 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
543 | switch (ia->ri_memreg_strategy) { | 650 | switch (ia->ri_memreg_strategy) { |
651 | case RPCRDMA_FRMR: | ||
652 | /* Add room for frmr register and invalidate WRs */ | ||
653 | ep->rep_attr.cap.max_send_wr *= 3; | ||
654 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
655 | return -EINVAL; | ||
656 | break; | ||
544 | case RPCRDMA_MEMWINDOWS_ASYNC: | 657 | case RPCRDMA_MEMWINDOWS_ASYNC: |
545 | case RPCRDMA_MEMWINDOWS: | 658 | case RPCRDMA_MEMWINDOWS: |
546 | /* Add room for mw_binds+unbinds - overkill! */ | 659 | /* Add room for mw_binds+unbinds - overkill! */ |
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
617 | ep->rep_remote_cma.private_data_len = 0; | 730 | ep->rep_remote_cma.private_data_len = 0; |
618 | 731 | ||
619 | /* Client offers RDMA Read but does not initiate */ | 732 | /* Client offers RDMA Read but does not initiate */ |
620 | switch (ia->ri_memreg_strategy) { | 733 | ep->rep_remote_cma.initiator_depth = 0; |
621 | case RPCRDMA_BOUNCEBUFFERS: | 734 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) |
622 | ep->rep_remote_cma.responder_resources = 0; | 735 | ep->rep_remote_cma.responder_resources = 0; |
623 | break; | 736 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
624 | case RPCRDMA_MTHCAFMR: | 737 | ep->rep_remote_cma.responder_resources = 32; |
625 | case RPCRDMA_REGISTER: | 738 | else |
626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
628 | break; | ||
629 | case RPCRDMA_MEMWINDOWS: | ||
630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
632 | case RPCRDMA_ALLPHYSICAL: | ||
633 | #endif | ||
634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
636 | break; | ||
637 | default: | ||
638 | break; | ||
639 | } | ||
640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 739 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
642 | ep->rep_remote_cma.initiator_depth = 0; | ||
643 | 740 | ||
644 | ep->rep_remote_cma.retry_count = 7; | 741 | ep->rep_remote_cma.retry_count = 7; |
645 | ep->rep_remote_cma.flow_control = 0; | 742 | ep->rep_remote_cma.flow_control = 0; |
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
679 | if (rc) | 776 | if (rc) |
680 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 777 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
681 | " returned %i\n", __func__, rc); | 778 | " returned %i\n", __func__, rc); |
779 | rdma_destroy_qp(ia->ri_id); | ||
780 | ia->ri_id->qp = NULL; | ||
682 | } | 781 | } |
683 | 782 | ||
684 | ep->rep_func = NULL; | ||
685 | |||
686 | /* padding - could be done in rpcrdma_buffer_destroy... */ | 783 | /* padding - could be done in rpcrdma_buffer_destroy... */ |
687 | if (ep->rep_pad_mr) { | 784 | if (ep->rep_pad_mr) { |
688 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | 785 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); |
689 | ep->rep_pad_mr = NULL; | 786 | ep->rep_pad_mr = NULL; |
690 | } | 787 | } |
691 | 788 | ||
692 | if (ia->ri_id->qp) { | ||
693 | rdma_destroy_qp(ia->ri_id); | ||
694 | ia->ri_id->qp = NULL; | ||
695 | } | ||
696 | |||
697 | rpcrdma_clean_cq(ep->rep_cq); | 789 | rpcrdma_clean_cq(ep->rep_cq); |
698 | rc = ib_destroy_cq(ep->rep_cq); | 790 | rc = ib_destroy_cq(ep->rep_cq); |
699 | if (rc) | 791 | if (rc) |
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
712 | struct rdma_cm_id *id; | 804 | struct rdma_cm_id *id; |
713 | int rc = 0; | 805 | int rc = 0; |
714 | int retry_count = 0; | 806 | int retry_count = 0; |
715 | int reconnect = (ep->rep_connected != 0); | ||
716 | 807 | ||
717 | if (reconnect) { | 808 | if (ep->rep_connected != 0) { |
718 | struct rpcrdma_xprt *xprt; | 809 | struct rpcrdma_xprt *xprt; |
719 | retry: | 810 | retry: |
720 | rc = rpcrdma_ep_disconnect(ep, ia); | 811 | rc = rpcrdma_ep_disconnect(ep, ia); |
@@ -745,6 +836,7 @@ retry: | |||
745 | goto out; | 836 | goto out; |
746 | } | 837 | } |
747 | /* END TEMP */ | 838 | /* END TEMP */ |
839 | rdma_destroy_qp(ia->ri_id); | ||
748 | rdma_destroy_id(ia->ri_id); | 840 | rdma_destroy_id(ia->ri_id); |
749 | ia->ri_id = id; | 841 | ia->ri_id = id; |
750 | } | 842 | } |
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
769 | } | 861 | } |
770 | } | 862 | } |
771 | 863 | ||
772 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
773 | * but many peers fail to complete the connection unless they | ||
774 | * == responder_resources! */ | ||
775 | if (ep->rep_remote_cma.initiator_depth != | ||
776 | ep->rep_remote_cma.responder_resources) | ||
777 | ep->rep_remote_cma.initiator_depth = | ||
778 | ep->rep_remote_cma.responder_resources; | ||
779 | |||
780 | ep->rep_connected = 0; | 864 | ep->rep_connected = 0; |
781 | 865 | ||
782 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 866 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
786 | goto out; | 870 | goto out; |
787 | } | 871 | } |
788 | 872 | ||
789 | if (reconnect) | ||
790 | return 0; | ||
791 | |||
792 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 873 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
793 | 874 | ||
794 | /* | 875 | /* |
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
805 | if (ep->rep_connected <= 0) { | 886 | if (ep->rep_connected <= 0) { |
806 | /* Sometimes, the only way to reliably connect to remote | 887 | /* Sometimes, the only way to reliably connect to remote |
807 | * CMs is to use same nonzero values for ORD and IRD. */ | 888 | * CMs is to use same nonzero values for ORD and IRD. */ |
808 | ep->rep_remote_cma.initiator_depth = | 889 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && |
809 | ep->rep_remote_cma.responder_resources; | 890 | (ep->rep_remote_cma.responder_resources == 0 || |
810 | if (ep->rep_remote_cma.initiator_depth == 0) | 891 | ep->rep_remote_cma.initiator_depth != |
811 | ++ep->rep_remote_cma.initiator_depth; | 892 | ep->rep_remote_cma.responder_resources)) { |
812 | if (ep->rep_remote_cma.responder_resources == 0) | 893 | if (ep->rep_remote_cma.responder_resources == 0) |
813 | ++ep->rep_remote_cma.responder_resources; | 894 | ep->rep_remote_cma.responder_resources = 1; |
814 | if (retry_count++ == 0) | 895 | ep->rep_remote_cma.initiator_depth = |
896 | ep->rep_remote_cma.responder_resources; | ||
815 | goto retry; | 897 | goto retry; |
898 | } | ||
816 | rc = ep->rep_connected; | 899 | rc = ep->rep_connected; |
817 | } else { | 900 | } else { |
818 | dprintk("RPC: %s: connected\n", __func__); | 901 | dprintk("RPC: %s: connected\n", __func__); |
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
863 | char *p; | 946 | char *p; |
864 | size_t len; | 947 | size_t len; |
865 | int i, rc; | 948 | int i, rc; |
949 | struct rpcrdma_mw *r; | ||
866 | 950 | ||
867 | buf->rb_max_requests = cdata->max_requests; | 951 | buf->rb_max_requests = cdata->max_requests; |
868 | spin_lock_init(&buf->rb_lock); | 952 | spin_lock_init(&buf->rb_lock); |
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
873 | * 2. arrays of struct rpcrdma_req to fill in pointers | 957 | * 2. arrays of struct rpcrdma_req to fill in pointers |
874 | * 3. array of struct rpcrdma_rep for replies | 958 | * 3. array of struct rpcrdma_rep for replies |
875 | * 4. padding, if any | 959 | * 4. padding, if any |
876 | * 5. mw's, if any | 960 | * 5. mw's, fmr's or frmr's, if any |
877 | * Send/recv buffers in req/rep need to be registered | 961 | * Send/recv buffers in req/rep need to be registered |
878 | */ | 962 | */ |
879 | 963 | ||
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
881 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 965 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
882 | len += cdata->padding; | 966 | len += cdata->padding; |
883 | switch (ia->ri_memreg_strategy) { | 967 | switch (ia->ri_memreg_strategy) { |
968 | case RPCRDMA_FRMR: | ||
969 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
970 | sizeof(struct rpcrdma_mw); | ||
971 | break; | ||
884 | case RPCRDMA_MTHCAFMR: | 972 | case RPCRDMA_MTHCAFMR: |
885 | /* TBD we are perhaps overallocating here */ | 973 | /* TBD we are perhaps overallocating here */ |
886 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 974 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
927 | * and also reduce unbind-to-bind collision. | 1015 | * and also reduce unbind-to-bind collision. |
928 | */ | 1016 | */ |
929 | INIT_LIST_HEAD(&buf->rb_mws); | 1017 | INIT_LIST_HEAD(&buf->rb_mws); |
1018 | r = (struct rpcrdma_mw *)p; | ||
930 | switch (ia->ri_memreg_strategy) { | 1019 | switch (ia->ri_memreg_strategy) { |
1020 | case RPCRDMA_FRMR: | ||
1021 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
1022 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1023 | RPCRDMA_MAX_SEGS); | ||
1024 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1025 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1026 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1027 | " failed %i\n", __func__, rc); | ||
1028 | goto out; | ||
1029 | } | ||
1030 | r->r.frmr.fr_pgl = | ||
1031 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1032 | RPCRDMA_MAX_SEGS); | ||
1033 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1034 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1035 | dprintk("RPC: %s: " | ||
1036 | "ib_alloc_fast_reg_page_list " | ||
1037 | "failed %i\n", __func__, rc); | ||
1038 | goto out; | ||
1039 | } | ||
1040 | list_add(&r->mw_list, &buf->rb_mws); | ||
1041 | ++r; | ||
1042 | } | ||
1043 | break; | ||
931 | case RPCRDMA_MTHCAFMR: | 1044 | case RPCRDMA_MTHCAFMR: |
932 | { | ||
933 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
934 | struct ib_fmr_attr fa = { | ||
935 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
936 | }; | ||
937 | /* TBD we are perhaps overallocating here */ | 1045 | /* TBD we are perhaps overallocating here */ |
938 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1046 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
1047 | static struct ib_fmr_attr fa = | ||
1048 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
939 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | 1049 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, |
940 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | 1050 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, |
941 | &fa); | 1051 | &fa); |
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
948 | list_add(&r->mw_list, &buf->rb_mws); | 1058 | list_add(&r->mw_list, &buf->rb_mws); |
949 | ++r; | 1059 | ++r; |
950 | } | 1060 | } |
951 | } | ||
952 | break; | 1061 | break; |
953 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1062 | case RPCRDMA_MEMWINDOWS_ASYNC: |
954 | case RPCRDMA_MEMWINDOWS: | 1063 | case RPCRDMA_MEMWINDOWS: |
955 | { | ||
956 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
957 | /* Allocate one extra request's worth, for full cycling */ | 1064 | /* Allocate one extra request's worth, for full cycling */ |
958 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1065 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
959 | r->r.mw = ib_alloc_mw(ia->ri_pd); | 1066 | r->r.mw = ib_alloc_mw(ia->ri_pd); |
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
966 | list_add(&r->mw_list, &buf->rb_mws); | 1073 | list_add(&r->mw_list, &buf->rb_mws); |
967 | ++r; | 1074 | ++r; |
968 | } | 1075 | } |
969 | } | ||
970 | break; | 1076 | break; |
971 | default: | 1077 | default: |
972 | break; | 1078 | break; |
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1046 | { | 1152 | { |
1047 | int rc, i; | 1153 | int rc, i; |
1048 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1154 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1155 | struct rpcrdma_mw *r; | ||
1049 | 1156 | ||
1050 | /* clean up in reverse order from create | 1157 | /* clean up in reverse order from create |
1051 | * 1. recv mr memory (mr free, then kfree) | 1158 | * 1. recv mr memory (mr free, then kfree) |
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1065 | } | 1172 | } |
1066 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1173 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
1067 | while (!list_empty(&buf->rb_mws)) { | 1174 | while (!list_empty(&buf->rb_mws)) { |
1068 | struct rpcrdma_mw *r; | ||
1069 | r = list_entry(buf->rb_mws.next, | 1175 | r = list_entry(buf->rb_mws.next, |
1070 | struct rpcrdma_mw, mw_list); | 1176 | struct rpcrdma_mw, mw_list); |
1071 | list_del(&r->mw_list); | 1177 | list_del(&r->mw_list); |
1072 | switch (ia->ri_memreg_strategy) { | 1178 | switch (ia->ri_memreg_strategy) { |
1179 | case RPCRDMA_FRMR: | ||
1180 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1181 | if (rc) | ||
1182 | dprintk("RPC: %s:" | ||
1183 | " ib_dereg_mr" | ||
1184 | " failed %i\n", | ||
1185 | __func__, rc); | ||
1186 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1187 | break; | ||
1073 | case RPCRDMA_MTHCAFMR: | 1188 | case RPCRDMA_MTHCAFMR: |
1074 | rc = ib_dealloc_fmr(r->r.fmr); | 1189 | rc = ib_dealloc_fmr(r->r.fmr); |
1075 | if (rc) | 1190 | if (rc) |
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1115 | { | 1230 | { |
1116 | struct rpcrdma_req *req; | 1231 | struct rpcrdma_req *req; |
1117 | unsigned long flags; | 1232 | unsigned long flags; |
1233 | int i; | ||
1234 | struct rpcrdma_mw *r; | ||
1118 | 1235 | ||
1119 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1236 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1120 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1237 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1135 | } | 1252 | } |
1136 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1253 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
1137 | if (!list_empty(&buffers->rb_mws)) { | 1254 | if (!list_empty(&buffers->rb_mws)) { |
1138 | int i = RPCRDMA_MAX_SEGS - 1; | 1255 | i = RPCRDMA_MAX_SEGS - 1; |
1139 | do { | 1256 | do { |
1140 | struct rpcrdma_mw *r; | ||
1141 | r = list_entry(buffers->rb_mws.next, | 1257 | r = list_entry(buffers->rb_mws.next, |
1142 | struct rpcrdma_mw, mw_list); | 1258 | struct rpcrdma_mw, mw_list); |
1143 | list_del(&r->mw_list); | 1259 | list_del(&r->mw_list); |
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1171 | req->rl_reply = NULL; | 1287 | req->rl_reply = NULL; |
1172 | } | 1288 | } |
1173 | switch (ia->ri_memreg_strategy) { | 1289 | switch (ia->ri_memreg_strategy) { |
1290 | case RPCRDMA_FRMR: | ||
1174 | case RPCRDMA_MTHCAFMR: | 1291 | case RPCRDMA_MTHCAFMR: |
1175 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1292 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1176 | case RPCRDMA_MEMWINDOWS: | 1293 | case RPCRDMA_MEMWINDOWS: |
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
1252 | va, len, DMA_BIDIRECTIONAL); | 1369 | va, len, DMA_BIDIRECTIONAL); |
1253 | iov->length = len; | 1370 | iov->length = len; |
1254 | 1371 | ||
1255 | if (ia->ri_bind_mem != NULL) { | 1372 | if (ia->ri_have_dma_lkey) { |
1373 | *mrp = NULL; | ||
1374 | iov->lkey = ia->ri_dma_lkey; | ||
1375 | return 0; | ||
1376 | } else if (ia->ri_bind_mem != NULL) { | ||
1256 | *mrp = NULL; | 1377 | *mrp = NULL; |
1257 | iov->lkey = ia->ri_bind_mem->lkey; | 1378 | iov->lkey = ia->ri_bind_mem->lkey; |
1258 | return 0; | 1379 | return 0; |
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
1329 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | 1450 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); |
1330 | } | 1451 | } |
1331 | 1452 | ||
1453 | static int | ||
1454 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1455 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1456 | struct rpcrdma_xprt *r_xprt) | ||
1457 | { | ||
1458 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1459 | struct ib_send_wr frmr_wr, *bad_wr; | ||
1460 | u8 key; | ||
1461 | int len, pageoff; | ||
1462 | int i, rc; | ||
1463 | |||
1464 | pageoff = offset_in_page(seg1->mr_offset); | ||
1465 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1466 | seg1->mr_len += pageoff; | ||
1467 | len = -pageoff; | ||
1468 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1469 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1470 | for (i = 0; i < *nsegs;) { | ||
1471 | rpcrdma_map_one(ia, seg, writing); | ||
1472 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
1473 | len += seg->mr_len; | ||
1474 | ++seg; | ||
1475 | ++i; | ||
1476 | /* Check for holes */ | ||
1477 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1478 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1479 | break; | ||
1480 | } | ||
1481 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1482 | __func__, seg1->mr_chunk.rl_mw, i); | ||
1483 | |||
1484 | /* Bump the key */ | ||
1485 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1486 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1487 | |||
1488 | /* Prepare FRMR WR */ | ||
1489 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
1490 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
1491 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
1492 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
1493 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
1494 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
1495 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1496 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
1497 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
1498 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
1499 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1500 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1501 | |||
1502 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
1503 | |||
1504 | if (rc) { | ||
1505 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1506 | " status %i\n", __func__, rc); | ||
1507 | while (i--) | ||
1508 | rpcrdma_unmap_one(ia, --seg); | ||
1509 | } else { | ||
1510 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1511 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1512 | seg1->mr_nsegs = i; | ||
1513 | seg1->mr_len = len; | ||
1514 | } | ||
1515 | *nsegs = i; | ||
1516 | return rc; | ||
1517 | } | ||
1518 | |||
1519 | static int | ||
1520 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1521 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1522 | { | ||
1523 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1524 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1525 | int rc; | ||
1526 | |||
1527 | while (seg1->mr_nsegs--) | ||
1528 | rpcrdma_unmap_one(ia, seg++); | ||
1529 | |||
1530 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1531 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1532 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
1533 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1534 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1535 | |||
1536 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1537 | if (rc) | ||
1538 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
1539 | " status %i\n", __func__, rc); | ||
1540 | return rc; | ||
1541 | } | ||
1542 | |||
1543 | static int | ||
1544 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1545 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1546 | { | ||
1547 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1548 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1549 | int len, pageoff, i, rc; | ||
1550 | |||
1551 | pageoff = offset_in_page(seg1->mr_offset); | ||
1552 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1553 | seg1->mr_len += pageoff; | ||
1554 | len = -pageoff; | ||
1555 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1556 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1557 | for (i = 0; i < *nsegs;) { | ||
1558 | rpcrdma_map_one(ia, seg, writing); | ||
1559 | physaddrs[i] = seg->mr_dma; | ||
1560 | len += seg->mr_len; | ||
1561 | ++seg; | ||
1562 | ++i; | ||
1563 | /* Check for holes */ | ||
1564 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1565 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1566 | break; | ||
1567 | } | ||
1568 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1569 | physaddrs, i, seg1->mr_dma); | ||
1570 | if (rc) { | ||
1571 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1572 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1573 | len, (unsigned long long)seg1->mr_dma, | ||
1574 | pageoff, i, rc); | ||
1575 | while (i--) | ||
1576 | rpcrdma_unmap_one(ia, --seg); | ||
1577 | } else { | ||
1578 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1579 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1580 | seg1->mr_nsegs = i; | ||
1581 | seg1->mr_len = len; | ||
1582 | } | ||
1583 | *nsegs = i; | ||
1584 | return rc; | ||
1585 | } | ||
1586 | |||
1587 | static int | ||
1588 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1589 | struct rpcrdma_ia *ia) | ||
1590 | { | ||
1591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1592 | LIST_HEAD(l); | ||
1593 | int rc; | ||
1594 | |||
1595 | list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1596 | rc = ib_unmap_fmr(&l); | ||
1597 | while (seg1->mr_nsegs--) | ||
1598 | rpcrdma_unmap_one(ia, seg++); | ||
1599 | if (rc) | ||
1600 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1601 | " status %i\n", __func__, rc); | ||
1602 | return rc; | ||
1603 | } | ||
1604 | |||
1605 | static int | ||
1606 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1607 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1608 | struct rpcrdma_xprt *r_xprt) | ||
1609 | { | ||
1610 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1611 | IB_ACCESS_REMOTE_READ); | ||
1612 | struct ib_mw_bind param; | ||
1613 | int rc; | ||
1614 | |||
1615 | *nsegs = 1; | ||
1616 | rpcrdma_map_one(ia, seg, writing); | ||
1617 | param.mr = ia->ri_bind_mem; | ||
1618 | param.wr_id = 0ULL; /* no send cookie */ | ||
1619 | param.addr = seg->mr_dma; | ||
1620 | param.length = seg->mr_len; | ||
1621 | param.send_flags = 0; | ||
1622 | param.mw_access_flags = mem_priv; | ||
1623 | |||
1624 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1625 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1626 | if (rc) { | ||
1627 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1628 | "%u@0x%llx status %i\n", | ||
1629 | __func__, seg->mr_len, | ||
1630 | (unsigned long long)seg->mr_dma, rc); | ||
1631 | rpcrdma_unmap_one(ia, seg); | ||
1632 | } else { | ||
1633 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1634 | seg->mr_base = param.addr; | ||
1635 | seg->mr_nsegs = 1; | ||
1636 | } | ||
1637 | return rc; | ||
1638 | } | ||
1639 | |||
1640 | static int | ||
1641 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1642 | struct rpcrdma_ia *ia, | ||
1643 | struct rpcrdma_xprt *r_xprt, void **r) | ||
1644 | { | ||
1645 | struct ib_mw_bind param; | ||
1646 | LIST_HEAD(l); | ||
1647 | int rc; | ||
1648 | |||
1649 | BUG_ON(seg->mr_nsegs != 1); | ||
1650 | param.mr = ia->ri_bind_mem; | ||
1651 | param.addr = 0ULL; /* unbind */ | ||
1652 | param.length = 0; | ||
1653 | param.mw_access_flags = 0; | ||
1654 | if (*r) { | ||
1655 | param.wr_id = (u64) (unsigned long) *r; | ||
1656 | param.send_flags = IB_SEND_SIGNALED; | ||
1657 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1658 | } else { | ||
1659 | param.wr_id = 0ULL; | ||
1660 | param.send_flags = 0; | ||
1661 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1662 | } | ||
1663 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1664 | rpcrdma_unmap_one(ia, seg); | ||
1665 | if (rc) | ||
1666 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1667 | " status %i\n", __func__, rc); | ||
1668 | else | ||
1669 | *r = NULL; /* will upcall on completion */ | ||
1670 | return rc; | ||
1671 | } | ||
1672 | |||
1673 | static int | ||
1674 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
1675 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1676 | { | ||
1677 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1678 | IB_ACCESS_REMOTE_READ); | ||
1679 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1680 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1681 | int len, i, rc = 0; | ||
1682 | |||
1683 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1684 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1685 | for (len = 0, i = 0; i < *nsegs;) { | ||
1686 | rpcrdma_map_one(ia, seg, writing); | ||
1687 | ipb[i].addr = seg->mr_dma; | ||
1688 | ipb[i].size = seg->mr_len; | ||
1689 | len += seg->mr_len; | ||
1690 | ++seg; | ||
1691 | ++i; | ||
1692 | /* Check for holes */ | ||
1693 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1694 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1695 | break; | ||
1696 | } | ||
1697 | seg1->mr_base = seg1->mr_dma; | ||
1698 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1699 | ipb, i, mem_priv, &seg1->mr_base); | ||
1700 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1701 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1702 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1703 | "%u@0x%llx (%d)... status %i\n", | ||
1704 | __func__, len, | ||
1705 | (unsigned long long)seg1->mr_dma, i, rc); | ||
1706 | while (i--) | ||
1707 | rpcrdma_unmap_one(ia, --seg); | ||
1708 | } else { | ||
1709 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1710 | seg1->mr_nsegs = i; | ||
1711 | seg1->mr_len = len; | ||
1712 | } | ||
1713 | *nsegs = i; | ||
1714 | return rc; | ||
1715 | } | ||
1716 | |||
1717 | static int | ||
1718 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
1719 | struct rpcrdma_ia *ia) | ||
1720 | { | ||
1721 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1722 | int rc; | ||
1723 | |||
1724 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1725 | seg1->mr_chunk.rl_mr = NULL; | ||
1726 | while (seg1->mr_nsegs--) | ||
1727 | rpcrdma_unmap_one(ia, seg++); | ||
1728 | if (rc) | ||
1729 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1730 | " status %i\n", __func__, rc); | ||
1731 | return rc; | ||
1732 | } | ||
1733 | |||
1332 | int | 1734 | int |
1333 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1735 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
1334 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1736 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
1335 | { | 1737 | { |
1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1738 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1337 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1338 | IB_ACCESS_REMOTE_READ); | ||
1339 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1340 | int i; | ||
1341 | int rc = 0; | 1739 | int rc = 0; |
1342 | 1740 | ||
1343 | switch (ia->ri_memreg_strategy) { | 1741 | switch (ia->ri_memreg_strategy) { |
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1352 | break; | 1750 | break; |
1353 | #endif | 1751 | #endif |
1354 | 1752 | ||
1355 | /* Registration using fast memory registration */ | 1753 | /* Registration using frmr registration */ |
1754 | case RPCRDMA_FRMR: | ||
1755 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
1756 | break; | ||
1757 | |||
1758 | /* Registration using fmr memory registration */ | ||
1356 | case RPCRDMA_MTHCAFMR: | 1759 | case RPCRDMA_MTHCAFMR: |
1357 | { | 1760 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
1358 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1359 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
1360 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1361 | seg1->mr_len += pageoff; | ||
1362 | len = -pageoff; | ||
1363 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1364 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1365 | for (i = 0; i < nsegs;) { | ||
1366 | rpcrdma_map_one(ia, seg, writing); | ||
1367 | physaddrs[i] = seg->mr_dma; | ||
1368 | len += seg->mr_len; | ||
1369 | ++seg; | ||
1370 | ++i; | ||
1371 | /* Check for holes */ | ||
1372 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1373 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1374 | break; | ||
1375 | } | ||
1376 | nsegs = i; | ||
1377 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1378 | physaddrs, nsegs, seg1->mr_dma); | ||
1379 | if (rc) { | ||
1380 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1381 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1382 | len, (unsigned long long)seg1->mr_dma, | ||
1383 | pageoff, nsegs, rc); | ||
1384 | while (nsegs--) | ||
1385 | rpcrdma_unmap_one(ia, --seg); | ||
1386 | } else { | ||
1387 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1388 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1389 | seg1->mr_nsegs = nsegs; | ||
1390 | seg1->mr_len = len; | ||
1391 | } | ||
1392 | } | ||
1393 | break; | 1761 | break; |
1394 | 1762 | ||
1395 | /* Registration using memory windows */ | 1763 | /* Registration using memory windows */ |
1396 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1764 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1397 | case RPCRDMA_MEMWINDOWS: | 1765 | case RPCRDMA_MEMWINDOWS: |
1398 | { | 1766 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); |
1399 | struct ib_mw_bind param; | ||
1400 | rpcrdma_map_one(ia, seg, writing); | ||
1401 | param.mr = ia->ri_bind_mem; | ||
1402 | param.wr_id = 0ULL; /* no send cookie */ | ||
1403 | param.addr = seg->mr_dma; | ||
1404 | param.length = seg->mr_len; | ||
1405 | param.send_flags = 0; | ||
1406 | param.mw_access_flags = mem_priv; | ||
1407 | |||
1408 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1409 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1410 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1411 | if (rc) { | ||
1412 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1413 | "%u@0x%llx status %i\n", | ||
1414 | __func__, seg->mr_len, | ||
1415 | (unsigned long long)seg->mr_dma, rc); | ||
1416 | rpcrdma_unmap_one(ia, seg); | ||
1417 | } else { | ||
1418 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1419 | seg->mr_base = param.addr; | ||
1420 | seg->mr_nsegs = 1; | ||
1421 | nsegs = 1; | ||
1422 | } | ||
1423 | } | ||
1424 | break; | 1767 | break; |
1425 | 1768 | ||
1426 | /* Default registration each time */ | 1769 | /* Default registration each time */ |
1427 | default: | 1770 | default: |
1428 | { | 1771 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); |
1429 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1430 | int len = 0; | ||
1431 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1432 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1433 | for (i = 0; i < nsegs;) { | ||
1434 | rpcrdma_map_one(ia, seg, writing); | ||
1435 | ipb[i].addr = seg->mr_dma; | ||
1436 | ipb[i].size = seg->mr_len; | ||
1437 | len += seg->mr_len; | ||
1438 | ++seg; | ||
1439 | ++i; | ||
1440 | /* Check for holes */ | ||
1441 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1442 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1443 | break; | ||
1444 | } | ||
1445 | nsegs = i; | ||
1446 | seg1->mr_base = seg1->mr_dma; | ||
1447 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1448 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
1449 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1450 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1451 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1452 | "%u@0x%llx (%d)... status %i\n", | ||
1453 | __func__, len, | ||
1454 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
1455 | while (nsegs--) | ||
1456 | rpcrdma_unmap_one(ia, --seg); | ||
1457 | } else { | ||
1458 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1459 | seg1->mr_nsegs = nsegs; | ||
1460 | seg1->mr_len = len; | ||
1461 | } | ||
1462 | } | ||
1463 | break; | 1772 | break; |
1464 | } | 1773 | } |
1465 | if (rc) | 1774 | if (rc) |
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1473 | struct rpcrdma_xprt *r_xprt, void *r) | 1782 | struct rpcrdma_xprt *r_xprt, void *r) |
1474 | { | 1783 | { |
1475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1784 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1476 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1477 | int nsegs = seg->mr_nsegs, rc; | 1785 | int nsegs = seg->mr_nsegs, rc; |
1478 | 1786 | ||
1479 | switch (ia->ri_memreg_strategy) { | 1787 | switch (ia->ri_memreg_strategy) { |
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1486 | break; | 1794 | break; |
1487 | #endif | 1795 | #endif |
1488 | 1796 | ||
1797 | case RPCRDMA_FRMR: | ||
1798 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
1799 | break; | ||
1800 | |||
1489 | case RPCRDMA_MTHCAFMR: | 1801 | case RPCRDMA_MTHCAFMR: |
1490 | { | 1802 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1491 | LIST_HEAD(l); | ||
1492 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1493 | rc = ib_unmap_fmr(&l); | ||
1494 | while (seg1->mr_nsegs--) | ||
1495 | rpcrdma_unmap_one(ia, seg++); | ||
1496 | } | ||
1497 | if (rc) | ||
1498 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1499 | " status %i\n", __func__, rc); | ||
1500 | break; | 1803 | break; |
1501 | 1804 | ||
1502 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1805 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1503 | case RPCRDMA_MEMWINDOWS: | 1806 | case RPCRDMA_MEMWINDOWS: |
1504 | { | 1807 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); |
1505 | struct ib_mw_bind param; | ||
1506 | BUG_ON(nsegs != 1); | ||
1507 | param.mr = ia->ri_bind_mem; | ||
1508 | param.addr = 0ULL; /* unbind */ | ||
1509 | param.length = 0; | ||
1510 | param.mw_access_flags = 0; | ||
1511 | if (r) { | ||
1512 | param.wr_id = (u64) (unsigned long) r; | ||
1513 | param.send_flags = IB_SEND_SIGNALED; | ||
1514 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1515 | } else { | ||
1516 | param.wr_id = 0ULL; | ||
1517 | param.send_flags = 0; | ||
1518 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1519 | } | ||
1520 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1521 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1522 | rpcrdma_unmap_one(ia, seg); | ||
1523 | } | ||
1524 | if (rc) | ||
1525 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1526 | " status %i\n", __func__, rc); | ||
1527 | else | ||
1528 | r = NULL; /* will upcall on completion */ | ||
1529 | break; | 1808 | break; |
1530 | 1809 | ||
1531 | default: | 1810 | default: |
1532 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | 1811 | rc = rpcrdma_deregister_default_external(seg, ia); |
1533 | seg1->mr_chunk.rl_mr = NULL; | ||
1534 | while (seg1->mr_nsegs--) | ||
1535 | rpcrdma_unmap_one(ia, seg++); | ||
1536 | if (rc) | ||
1537 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1538 | " status %i\n", __func__, rc); | ||
1539 | break; | 1812 | break; |
1540 | } | 1813 | } |
1541 | if (r) { | 1814 | if (r) { |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -51,6 +51,9 @@ | |||
51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | 51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ |
52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | 52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ |
53 | 53 | ||
54 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ | ||
55 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
56 | |||
54 | /* | 57 | /* |
55 | * Interface Adapter -- one per transport instance | 58 | * Interface Adapter -- one per transport instance |
56 | */ | 59 | */ |
@@ -58,6 +61,8 @@ struct rpcrdma_ia { | |||
58 | struct rdma_cm_id *ri_id; | 61 | struct rdma_cm_id *ri_id; |
59 | struct ib_pd *ri_pd; | 62 | struct ib_pd *ri_pd; |
60 | struct ib_mr *ri_bind_mem; | 63 | struct ib_mr *ri_bind_mem; |
64 | u32 ri_dma_lkey; | ||
65 | int ri_have_dma_lkey; | ||
61 | struct completion ri_done; | 66 | struct completion ri_done; |
62 | int ri_async_rc; | 67 | int ri_async_rc; |
63 | enum rpcrdma_memreg ri_memreg_strategy; | 68 | enum rpcrdma_memreg ri_memreg_strategy; |
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
156 | union { | 161 | union { |
157 | struct ib_mw *mw; | 162 | struct ib_mw *mw; |
158 | struct ib_fmr *fmr; | 163 | struct ib_fmr *fmr; |
164 | struct { | ||
165 | struct ib_fast_reg_page_list *fr_pgl; | ||
166 | struct ib_mr *fr_mr; | ||
167 | } frmr; | ||
159 | } r; | 168 | } r; |
160 | struct list_head mw_list; | 169 | struct list_head mw_list; |
161 | } *rl_mw; | 170 | } *rl_mw; |
@@ -175,6 +184,7 @@ struct rpcrdma_req { | |||
175 | size_t rl_size; /* actual length of buffer */ | 184 | size_t rl_size; /* actual length of buffer */ |
176 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 185 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
177 | unsigned int rl_nchunks; /* non-zero if chunks */ | 186 | unsigned int rl_nchunks; /* non-zero if chunks */ |
187 | unsigned int rl_connect_cookie; /* retry detection */ | ||
178 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 188 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
179 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 189 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
180 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ | 190 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ |
@@ -198,7 +208,7 @@ struct rpcrdma_buffer { | |||
198 | atomic_t rb_credits; /* most recent server credits */ | 208 | atomic_t rb_credits; /* most recent server credits */ |
199 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | 209 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ |
200 | int rb_max_requests;/* client max requests */ | 210 | int rb_max_requests;/* client max requests */ |
201 | struct list_head rb_mws; /* optional memory windows/fmrs */ | 211 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
202 | int rb_send_index; | 212 | int rb_send_index; |
203 | struct rpcrdma_req **rb_send_bufs; | 213 | struct rpcrdma_req **rb_send_bufs; |
204 | int rb_recv_index; | 214 | int rb_recv_index; |
@@ -273,6 +283,11 @@ struct rpcrdma_xprt { | |||
273 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) | 283 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) |
274 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 284 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
275 | 285 | ||
286 | /* Setting this to 0 ensures interoperability with early servers. | ||
287 | * Setting this to 1 enhances certain unaligned read/write performance. | ||
288 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | ||
289 | extern int xprt_rdma_pad_optimize; | ||
290 | |||
276 | /* | 291 | /* |
277 | * Interface Adapter calls - xprtrdma/verbs.c | 292 | * Interface Adapter calls - xprtrdma/verbs.c |
278 | */ | 293 | */ |