aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2015-04-14 09:54:52 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-04-22 11:33:43 -0400
commit958a27658d94cf212caeb0ffb04ee0b0bc89cc40 (patch)
tree2aff94f8b656b5526c8cfef1a8994edbb0c960fb /net/ceph
parent45002267e8d2699bf9b022315bee3dd13b044843 (diff)
crush: straw2 bucket type with an efficient 64-bit crush_ln()
This is an improved straw bucket that correctly avoids any data movement between items A and B when neither A nor B's weights are changed. Said differently, if we adjust the weight of item C (including adding it anew or removing it completely), we will only see inputs move to or from C, never between other items in the bucket. Notably, there is not intermediate scaling factor that needs to be calculated. The mapping function is a simple function of the item weights. The below commits were squashed together into this one (mostly to avoid adding and then yanking a ~6000 lines worth of crush_ln_table): - crush: add a straw2 bucket type - crush: add crush_ln to calculate nature log efficently - crush: improve straw2 adjustment slightly - crush: change crush_ln to provide 32 more digits - crush: fix crush_get_bucket_item_weight and bucket destroy for straw2 - crush/mapper: fix divide-by-0 in straw2 (with div64_s64() for draw = ln / w and INT64_MIN -> S64_MIN - need to create a proper compat.h in ceph.git) Reflects ceph.git commits 242293c908e923d474910f2b8203fa3b41eb5a53, 32a1ead92efcd351822d22a5fc37d159c65c1338, 6289912418c4a3597a11778bcf29ed5415117ad9, 35fcb04e2945717cf5cfe150b9fa89cb3d2303a1, 6445d9ee7290938de1e4ee9563912a6ab6d8ee5f, b5921d55d16796e12d66ad2c4add7305f9ce2353. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/crush/crush.c14
-rw-r--r--net/ceph/crush/crush_ln_table.h166
-rw-r--r--net/ceph/crush/mapper.c101
-rw-r--r--net/ceph/osdmap.c25
4 files changed, 306 insertions, 0 deletions
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 16bc199d9a62..9d84ce4ea0df 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
17 case CRUSH_BUCKET_LIST: return "list"; 17 case CRUSH_BUCKET_LIST: return "list";
18 case CRUSH_BUCKET_TREE: return "tree"; 18 case CRUSH_BUCKET_TREE: return "tree";
19 case CRUSH_BUCKET_STRAW: return "straw"; 19 case CRUSH_BUCKET_STRAW: return "straw";
20 case CRUSH_BUCKET_STRAW2: return "straw2";
20 default: return "unknown"; 21 default: return "unknown";
21 } 22 }
22} 23}
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
40 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)]; 41 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
41 case CRUSH_BUCKET_STRAW: 42 case CRUSH_BUCKET_STRAW:
42 return ((struct crush_bucket_straw *)b)->item_weights[p]; 43 return ((struct crush_bucket_straw *)b)->item_weights[p];
44 case CRUSH_BUCKET_STRAW2:
45 return ((struct crush_bucket_straw2 *)b)->item_weights[p];
43 } 46 }
44 return 0; 47 return 0;
45} 48}
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
77 kfree(b); 80 kfree(b);
78} 81}
79 82
83void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
84{
85 kfree(b->item_weights);
86 kfree(b->h.perm);
87 kfree(b->h.items);
88 kfree(b);
89}
90
80void crush_destroy_bucket(struct crush_bucket *b) 91void crush_destroy_bucket(struct crush_bucket *b)
81{ 92{
82 switch (b->alg) { 93 switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
92 case CRUSH_BUCKET_STRAW: 103 case CRUSH_BUCKET_STRAW:
93 crush_destroy_bucket_straw((struct crush_bucket_straw *)b); 104 crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
94 break; 105 break;
106 case CRUSH_BUCKET_STRAW2:
107 crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
108 break;
95 } 109 }
96} 110}
97 111
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644
index 000000000000..6192c7fc958c
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2015 Intel Corporation All Rights Reserved
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13#if defined(__linux__)
14#include <linux/types.h>
15#elif defined(__FreeBSD__)
16#include <sys/types.h>
17#endif
18
19#ifndef CEPH_CRUSH_LN_H
20#define CEPH_CRUSH_LN_H
21
22
23// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
24// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
25
26static int64_t __RH_LH_tbl[128*2+2] = {
27 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
28 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
29 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
30 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
31 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
32 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
33 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
34 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
35 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
36 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
37 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
38 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
39 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
40 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
41 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
42 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
43 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
44 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
45 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
46 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
47 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
48 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
49 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
50 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
51 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
52 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
53 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
54 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
55 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
56 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
57 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
58 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
59 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
60 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
61 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
62 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
63 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
64 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
65 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
66 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
67 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
68 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
69 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
70 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
71 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
72 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
73 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
74 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
75 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
76 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
77 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
78 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
79 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
80 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
81 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
82 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
83 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
84 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
85 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
86 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
87 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
88 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
89 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
90 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
91 0x0000800000000000ll, 0x0000ffff00000000ll,
92 };
93
94
95 // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
96static int64_t __LL_tbl[256] = {
97 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
98 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
99 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
100 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
101 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
102 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
103 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
104 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
105 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
106 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
107 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
108 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
109 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
110 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
111 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
112 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
113 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
114 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
115 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
116 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
117 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
118 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
119 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
120 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
121 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
122 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
123 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
124 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
125 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
126 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
127 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
128 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
129 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
130 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
131 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
132 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
133 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
134 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
135 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
136 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
137 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
138 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
139 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
140 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
141 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
142 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
143 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
144 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
145 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
146 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
147 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
148 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
149 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
150 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
151 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
152 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
153 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
154 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
155 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
156 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
157 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
158 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
159 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
160 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
161};
162
163
164
165
166#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 91c41fe83113..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/crush/crush.h> 21#include <linux/crush/crush.h>
22#include <linux/crush/hash.h> 22#include <linux/crush/hash.h>
23#include "crush_ln_table.h"
23 24
24/* 25/*
25 * Implement the core CRUSH mapping algorithm. 26 * Implement the core CRUSH mapping algorithm.
@@ -237,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
237 return bucket->h.items[high]; 238 return bucket->h.items[high];
238} 239}
239 240
241// compute 2^44*log2(input+1)
242uint64_t crush_ln(unsigned xin)
243{
244 unsigned x=xin, x1;
245 int iexpon, index1, index2;
246 uint64_t RH, LH, LL, xl64, result;
247
248 x++;
249
250 // normalize input
251 iexpon = 15;
252 while(!(x&0x18000)) { x<<=1; iexpon--; }
253
254 index1 = (x>>8)<<1;
255 // RH ~ 2^56/index1
256 RH = __RH_LH_tbl[index1 - 256];
257 // LH ~ 2^48 * log2(index1/256)
258 LH = __RH_LH_tbl[index1 + 1 - 256];
259
260 // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
261 xl64 = (int64_t)x * RH;
262 xl64 >>= 48;
263 x1 = xl64;
264
265 result = iexpon;
266 result <<= (12 + 32);
267
268 index2 = x1 & 0xff;
269 // LL ~ 2^48*log2(1.0+index2/2^15)
270 LL = __LL_tbl[index2];
271
272 LH = LH + LL;
273
274 LH >>= (48-12 - 32);
275 result += LH;
276
277 return result;
278}
279
280
281/*
282 * straw2
283 *
284 * for reference, see:
285 *
286 * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
287 *
288 */
289
290static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
291 int x, int r)
292{
293 unsigned i, high = 0;
294 unsigned u;
295 unsigned w;
296 __s64 ln, draw, high_draw = 0;
297
298 for (i = 0; i < bucket->h.size; i++) {
299 w = bucket->item_weights[i];
300 if (w) {
301 u = crush_hash32_3(bucket->h.hash, x,
302 bucket->h.items[i], r);
303 u &= 0xffff;
304
305 /*
306 * for some reason slightly less than 0x10000 produces
307 * a slightly more accurate distribution... probably a
308 * rounding effect.
309 *
310 * the natural log lookup table maps [0,0xffff]
311 * (corresponding to real numbers [1/0x10000, 1] to
312 * [0, 0xffffffffffff] (corresponding to real numbers
313 * [-11.090355,0]).
314 */
315 ln = crush_ln(u) - 0x1000000000000ll;
316
317 /*
318 * divide by 16.16 fixed-point weight. note
319 * that the ln value is negative, so a larger
320 * weight means a larger (less negative) value
321 * for draw.
322 */
323 draw = div64_s64(ln, w);
324 } else {
325 draw = S64_MIN;
326 }
327
328 if (i == 0 || draw > high_draw) {
329 high = i;
330 high_draw = draw;
331 }
332 }
333 return bucket->h.items[high];
334}
335
336
240static int crush_bucket_choose(struct crush_bucket *in, int x, int r) 337static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
241{ 338{
242 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); 339 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -254,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
254 case CRUSH_BUCKET_STRAW: 351 case CRUSH_BUCKET_STRAW:
255 return bucket_straw_choose((struct crush_bucket_straw *)in, 352 return bucket_straw_choose((struct crush_bucket_straw *)in,
256 x, r); 353 x, r);
354 case CRUSH_BUCKET_STRAW2:
355 return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
356 x, r);
257 default: 357 default:
258 dprintk("unknown bucket %d alg %d\n", in->id, in->alg); 358 dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
259 return in->items[0]; 359 return in->items[0];
260 } 360 }
261} 361}
262 362
363
263/* 364/*
264 * true if device is marked "out" (failed, fully offloaded) 365 * true if device is marked "out" (failed, fully offloaded)
265 * of the cluster 366 * of the cluster
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..15796696d64e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
122 return -EINVAL; 122 return -EINVAL;
123} 123}
124 124
125static int crush_decode_straw2_bucket(void **p, void *end,
126 struct crush_bucket_straw2 *b)
127{
128 int j;
129 dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
130 b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
131 if (b->item_weights == NULL)
132 return -ENOMEM;
133 ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
134 for (j = 0; j < b->h.size; j++)
135 b->item_weights[j] = ceph_decode_32(p);
136 return 0;
137bad:
138 return -EINVAL;
139}
140
125static int skip_name_map(void **p, void *end) 141static int skip_name_map(void **p, void *end)
126{ 142{
127 int len; 143 int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
204 case CRUSH_BUCKET_STRAW: 220 case CRUSH_BUCKET_STRAW:
205 size = sizeof(struct crush_bucket_straw); 221 size = sizeof(struct crush_bucket_straw);
206 break; 222 break;
223 case CRUSH_BUCKET_STRAW2:
224 size = sizeof(struct crush_bucket_straw2);
225 break;
207 default: 226 default:
208 err = -EINVAL; 227 err = -EINVAL;
209 goto bad; 228 goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
261 if (err < 0) 280 if (err < 0)
262 goto bad; 281 goto bad;
263 break; 282 break;
283 case CRUSH_BUCKET_STRAW2:
284 err = crush_decode_straw2_bucket(p, end,
285 (struct crush_bucket_straw2 *)b);
286 if (err < 0)
287 goto bad;
288 break;
264 } 289 }
265 } 290 }
266 291