diff options
Diffstat (limited to 'include/linux/dim.h')
| -rw-r--r-- | include/linux/dim.h | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/include/linux/dim.h b/include/linux/dim.h new file mode 100644 index 000000000000..d3a0fbfff2bb --- /dev/null +++ b/include/linux/dim.h | |||
| @@ -0,0 +1,389 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ | ||
| 2 | /* Copyright (c) 2019 Mellanox Technologies. */ | ||
| 3 | |||
| 4 | #ifndef DIM_H | ||
| 5 | #define DIM_H | ||
| 6 | |||
| 7 | #include <linux/module.h> | ||
| 8 | |||
| 9 | /** | ||
| 10 | * Number of events between DIM iterations. | ||
| 11 | * Causes a moderation of the algorithm run. | ||
| 12 | */ | ||
| 13 | #define DIM_NEVENTS 64 | ||
| 14 | |||
| 15 | /** | ||
| 16 | * Is a difference between values justifies taking an action. | ||
| 17 | * We consider 10% difference as significant. | ||
| 18 | */ | ||
| 19 | #define IS_SIGNIFICANT_DIFF(val, ref) \ | ||
| 20 | (((100UL * abs((val) - (ref))) / (ref)) > 10) | ||
| 21 | |||
| 22 | /** | ||
| 23 | * Calculate the gap between two values. | ||
| 24 | * Take wrap-around and variable size into consideration. | ||
| 25 | */ | ||
| 26 | #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ | ||
| 27 | & (BIT_ULL(bits) - 1)) | ||
| 28 | |||
| 29 | /** | ||
| 30 | * Structure for CQ moderation values. | ||
| 31 | * Used for communications between DIM and its consumer. | ||
| 32 | * | ||
| 33 | * @usec: CQ timer suggestion (by DIM) | ||
| 34 | * @pkts: CQ packet counter suggestion (by DIM) | ||
| 35 | * @cq_period_mode: CQ priod count mode (from CQE/EQE) | ||
| 36 | */ | ||
| 37 | struct dim_cq_moder { | ||
| 38 | u16 usec; | ||
| 39 | u16 pkts; | ||
| 40 | u16 comps; | ||
| 41 | u8 cq_period_mode; | ||
| 42 | }; | ||
| 43 | |||
| 44 | /** | ||
| 45 | * Structure for DIM sample data. | ||
| 46 | * Used for communications between DIM and its consumer. | ||
| 47 | * | ||
| 48 | * @time: Sample timestamp | ||
| 49 | * @pkt_ctr: Number of packets | ||
| 50 | * @byte_ctr: Number of bytes | ||
| 51 | * @event_ctr: Number of events | ||
| 52 | */ | ||
| 53 | struct dim_sample { | ||
| 54 | ktime_t time; | ||
| 55 | u32 pkt_ctr; | ||
| 56 | u32 byte_ctr; | ||
| 57 | u16 event_ctr; | ||
| 58 | u32 comp_ctr; | ||
| 59 | }; | ||
| 60 | |||
| 61 | /** | ||
| 62 | * Structure for DIM stats. | ||
| 63 | * Used for holding current measured rates. | ||
| 64 | * | ||
| 65 | * @ppms: Packets per msec | ||
| 66 | * @bpms: Bytes per msec | ||
| 67 | * @epms: Events per msec | ||
| 68 | */ | ||
| 69 | struct dim_stats { | ||
| 70 | int ppms; /* packets per msec */ | ||
| 71 | int bpms; /* bytes per msec */ | ||
| 72 | int epms; /* events per msec */ | ||
| 73 | int cpms; /* completions per msec */ | ||
| 74 | int cpe_ratio; /* ratio of completions to events */ | ||
| 75 | }; | ||
| 76 | |||
| 77 | /** | ||
| 78 | * Main structure for dynamic interrupt moderation (DIM). | ||
| 79 | * Used for holding all information about a specific DIM instance. | ||
| 80 | * | ||
| 81 | * @state: Algorithm state (see below) | ||
| 82 | * @prev_stats: Measured rates from previous iteration (for comparison) | ||
| 83 | * @start_sample: Sampled data at start of current iteration | ||
| 84 | * @work: Work to perform on action required | ||
| 85 | * @priv: A pointer to the struct that points to dim | ||
| 86 | * @profile_ix: Current moderation profile | ||
| 87 | * @mode: CQ period count mode | ||
| 88 | * @tune_state: Algorithm tuning state (see below) | ||
| 89 | * @steps_right: Number of steps taken towards higher moderation | ||
| 90 | * @steps_left: Number of steps taken towards lower moderation | ||
| 91 | * @tired: Parking depth counter | ||
| 92 | */ | ||
| 93 | struct dim { | ||
| 94 | u8 state; | ||
| 95 | struct dim_stats prev_stats; | ||
| 96 | struct dim_sample start_sample; | ||
| 97 | struct dim_sample measuring_sample; | ||
| 98 | struct work_struct work; | ||
| 99 | void *priv; | ||
| 100 | u8 profile_ix; | ||
| 101 | u8 mode; | ||
| 102 | u8 tune_state; | ||
| 103 | u8 steps_right; | ||
| 104 | u8 steps_left; | ||
| 105 | u8 tired; | ||
| 106 | }; | ||
| 107 | |||
| 108 | /** | ||
| 109 | * enum dim_cq_period_mode | ||
| 110 | * | ||
| 111 | * These are the modes for CQ period count. | ||
| 112 | * | ||
| 113 | * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE | ||
| 114 | * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) | ||
| 115 | * @DIM_CQ_PERIOD_NUM_MODES: Number of modes | ||
| 116 | */ | ||
| 117 | enum { | ||
| 118 | DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, | ||
| 119 | DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, | ||
| 120 | DIM_CQ_PERIOD_NUM_MODES | ||
| 121 | }; | ||
| 122 | |||
| 123 | /** | ||
| 124 | * enum dim_state | ||
| 125 | * | ||
| 126 | * These are the DIM algorithm states. | ||
| 127 | * These will determine if the algorithm is in a valid state to start an iteration. | ||
| 128 | * | ||
| 129 | * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) | ||
| 130 | * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if | ||
| 131 | * need to perform an action | ||
| 132 | * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure | ||
| 133 | */ | ||
| 134 | enum { | ||
| 135 | DIM_START_MEASURE, | ||
| 136 | DIM_MEASURE_IN_PROGRESS, | ||
| 137 | DIM_APPLY_NEW_PROFILE, | ||
| 138 | }; | ||
| 139 | |||
| 140 | /** | ||
| 141 | * enum dim_tune_state | ||
| 142 | * | ||
| 143 | * These are the DIM algorithm tune states. | ||
| 144 | * These will determine which action the algorithm should perform. | ||
| 145 | * | ||
| 146 | * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference | ||
| 147 | * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0 | ||
| 148 | * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels | ||
| 149 | * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels | ||
| 150 | */ | ||
| 151 | enum { | ||
| 152 | DIM_PARKING_ON_TOP, | ||
| 153 | DIM_PARKING_TIRED, | ||
| 154 | DIM_GOING_RIGHT, | ||
| 155 | DIM_GOING_LEFT, | ||
| 156 | }; | ||
| 157 | |||
| 158 | /** | ||
| 159 | * enum dim_stats_state | ||
| 160 | * | ||
| 161 | * These are the DIM algorithm statistics states. | ||
| 162 | * These will determine the verdict of current iteration. | ||
| 163 | * | ||
| 164 | * @DIM_STATS_WORSE: Current iteration shows worse performance than before | ||
| 165 | * @DIM_STATS_WORSE: Current iteration shows same performance than before | ||
| 166 | * @DIM_STATS_WORSE: Current iteration shows better performance than before | ||
| 167 | */ | ||
| 168 | enum { | ||
| 169 | DIM_STATS_WORSE, | ||
| 170 | DIM_STATS_SAME, | ||
| 171 | DIM_STATS_BETTER, | ||
| 172 | }; | ||
| 173 | |||
| 174 | /** | ||
| 175 | * enum dim_step_result | ||
| 176 | * | ||
| 177 | * These are the DIM algorithm step results. | ||
| 178 | * These describe the result of a step. | ||
| 179 | * | ||
| 180 | * @DIM_STEPPED: Performed a regular step | ||
| 181 | * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to | ||
| 182 | * tired parking | ||
| 183 | * @DIM_ON_EDGE: Stepped to the most left/right profile | ||
| 184 | */ | ||
| 185 | enum { | ||
| 186 | DIM_STEPPED, | ||
| 187 | DIM_TOO_TIRED, | ||
| 188 | DIM_ON_EDGE, | ||
| 189 | }; | ||
| 190 | |||
| 191 | /** | ||
| 192 | * dim_on_top - check if current state is a good place to stop (top location) | ||
| 193 | * @dim: DIM context | ||
| 194 | * | ||
| 195 | * Check if current profile is a good place to park at. | ||
| 196 | * This will result in reducing the DIM checks frequency as we assume we | ||
| 197 | * shouldn't probably change profiles, unless traffic pattern wasn't changed. | ||
| 198 | */ | ||
| 199 | bool dim_on_top(struct dim *dim); | ||
| 200 | |||
| 201 | /** | ||
| 202 | * dim_turn - change profile alterning direction | ||
| 203 | * @dim: DIM context | ||
| 204 | * | ||
| 205 | * Go left if we were going right and vice-versa. | ||
| 206 | * Do nothing if currently parking. | ||
| 207 | */ | ||
| 208 | void dim_turn(struct dim *dim); | ||
| 209 | |||
| 210 | /** | ||
| 211 | * dim_park_on_top - enter a parking state on a top location | ||
| 212 | * @dim: DIM context | ||
| 213 | * | ||
| 214 | * Enter parking state. | ||
| 215 | * Clear all movement history. | ||
| 216 | */ | ||
| 217 | void dim_park_on_top(struct dim *dim); | ||
| 218 | |||
| 219 | /** | ||
| 220 | * dim_park_tired - enter a tired parking state | ||
| 221 | * @dim: DIM context | ||
| 222 | * | ||
| 223 | * Enter parking state. | ||
| 224 | * Clear all movement history and cause DIM checks frequency to reduce. | ||
| 225 | */ | ||
| 226 | void dim_park_tired(struct dim *dim); | ||
| 227 | |||
| 228 | /** | ||
| 229 | * dim_calc_stats - calculate the difference between two samples | ||
| 230 | * @start: start sample | ||
| 231 | * @end: end sample | ||
| 232 | * @curr_stats: delta between samples | ||
| 233 | * | ||
| 234 | * Calculate the delta between two samples (in data rates). | ||
| 235 | * Takes into consideration counter wrap-around. | ||
| 236 | */ | ||
| 237 | void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, | ||
| 238 | struct dim_stats *curr_stats); | ||
| 239 | |||
| 240 | /** | ||
| 241 | * dim_update_sample - set a sample's fields with give values | ||
| 242 | * @event_ctr: number of events to set | ||
| 243 | * @packets: number of packets to set | ||
| 244 | * @bytes: number of bytes to set | ||
| 245 | * @s: DIM sample | ||
| 246 | */ | ||
| 247 | static inline void | ||
| 248 | dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) | ||
| 249 | { | ||
| 250 | s->time = ktime_get(); | ||
| 251 | s->pkt_ctr = packets; | ||
| 252 | s->byte_ctr = bytes; | ||
| 253 | s->event_ctr = event_ctr; | ||
| 254 | } | ||
| 255 | |||
| 256 | /** | ||
| 257 | * dim_update_sample_with_comps - set a sample's fields with given | ||
| 258 | * values including the completion parameter | ||
| 259 | * @event_ctr: number of events to set | ||
| 260 | * @packets: number of packets to set | ||
| 261 | * @bytes: number of bytes to set | ||
| 262 | * @comps: number of completions to set | ||
| 263 | * @s: DIM sample | ||
| 264 | */ | ||
| 265 | static inline void | ||
| 266 | dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, | ||
| 267 | struct dim_sample *s) | ||
| 268 | { | ||
| 269 | dim_update_sample(event_ctr, packets, bytes, s); | ||
| 270 | s->comp_ctr = comps; | ||
| 271 | } | ||
| 272 | |||
| 273 | /* Net DIM */ | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Net DIM profiles: | ||
| 277 | * There are different set of profiles for each CQ period mode. | ||
| 278 | * There are different set of profiles for RX/TX CQs. | ||
| 279 | * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES | ||
| 280 | */ | ||
| 281 | #define NET_DIM_PARAMS_NUM_PROFILES 5 | ||
| 282 | #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 | ||
| 283 | #define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 | ||
| 284 | #define NET_DIM_DEF_PROFILE_CQE 1 | ||
| 285 | #define NET_DIM_DEF_PROFILE_EQE 1 | ||
| 286 | |||
| 287 | #define NET_DIM_RX_EQE_PROFILES { \ | ||
| 288 | {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 289 | {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 290 | {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 291 | {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 292 | {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 293 | } | ||
| 294 | |||
| 295 | #define NET_DIM_RX_CQE_PROFILES { \ | ||
| 296 | {2, 256}, \ | ||
| 297 | {8, 128}, \ | ||
| 298 | {16, 64}, \ | ||
| 299 | {32, 64}, \ | ||
| 300 | {64, 64} \ | ||
| 301 | } | ||
| 302 | |||
| 303 | #define NET_DIM_TX_EQE_PROFILES { \ | ||
| 304 | {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 305 | {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 306 | {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 307 | {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ | ||
| 308 | {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ | ||
| 309 | } | ||
| 310 | |||
| 311 | #define NET_DIM_TX_CQE_PROFILES { \ | ||
| 312 | {5, 128}, \ | ||
| 313 | {8, 64}, \ | ||
| 314 | {16, 32}, \ | ||
| 315 | {32, 32}, \ | ||
| 316 | {64, 32} \ | ||
| 317 | } | ||
| 318 | |||
| 319 | static const struct dim_cq_moder | ||
| 320 | rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { | ||
| 321 | NET_DIM_RX_EQE_PROFILES, | ||
| 322 | NET_DIM_RX_CQE_PROFILES, | ||
| 323 | }; | ||
| 324 | |||
| 325 | static const struct dim_cq_moder | ||
| 326 | tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { | ||
| 327 | NET_DIM_TX_EQE_PROFILES, | ||
| 328 | NET_DIM_TX_CQE_PROFILES, | ||
| 329 | }; | ||
| 330 | |||
| 331 | /** | ||
| 332 | * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile | ||
| 333 | * @cq_period_mode: CQ period mode | ||
| 334 | * @ix: Profile index | ||
| 335 | */ | ||
| 336 | struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix); | ||
| 337 | |||
| 338 | /** | ||
| 339 | * net_dim_get_def_rx_moderation - provide the default RX moderation | ||
| 340 | * @cq_period_mode: CQ period mode | ||
| 341 | */ | ||
| 342 | struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode); | ||
| 343 | |||
| 344 | /** | ||
| 345 | * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile | ||
| 346 | * @cq_period_mode: CQ period mode | ||
| 347 | * @ix: Profile index | ||
| 348 | */ | ||
| 349 | struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix); | ||
| 350 | |||
| 351 | /** | ||
| 352 | * net_dim_get_def_tx_moderation - provide the default TX moderation | ||
| 353 | * @cq_period_mode: CQ period mode | ||
| 354 | */ | ||
| 355 | struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); | ||
| 356 | |||
| 357 | /** | ||
| 358 | * net_dim - main DIM algorithm entry point | ||
| 359 | * @dim: DIM instance information | ||
| 360 | * @end_sample: Current data measurement | ||
| 361 | * | ||
| 362 | * Called by the consumer. | ||
| 363 | * This is the main logic of the algorithm, where data is processed in order to decide on next | ||
| 364 | * required action. | ||
| 365 | */ | ||
| 366 | void net_dim(struct dim *dim, struct dim_sample end_sample); | ||
| 367 | |||
| 368 | /* RDMA DIM */ | ||
| 369 | |||
| 370 | /* | ||
| 371 | * RDMA DIM profile: | ||
| 372 | * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES. | ||
| 373 | */ | ||
| 374 | #define RDMA_DIM_PARAMS_NUM_PROFILES 9 | ||
| 375 | #define RDMA_DIM_START_PROFILE 0 | ||
| 376 | |||
| 377 | /** | ||
| 378 | * rdma_dim - Runs the adaptive moderation. | ||
| 379 | * @dim: The moderation struct. | ||
| 380 | * @completions: The number of completions collected in this round. | ||
| 381 | * | ||
| 382 | * Each call to rdma_dim takes the latest amount of completions that | ||
| 383 | * have been collected and counts them as a new event. | ||
| 384 | * Once enough events have been collected the algorithm decides a new | ||
| 385 | * moderation level. | ||
| 386 | */ | ||
| 387 | void rdma_dim(struct dim *dim, u64 completions); | ||
| 388 | |||
| 389 | #endif /* DIM_H */ | ||
