diff options
author | Steven Rostedt <srostedt@redhat.com> | 2011-01-27 22:54:33 -0500 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2011-02-07 20:56:19 -0500 |
commit | 61e9dea20e1ada886cc49a9ec6fe3c6ac0de7324 (patch) | |
tree | 8f12ee74970d57550806a6f3e905cfca8d67601a /kernel/trace | |
parent | f76690afd05e3e163149310bdcd30234f93b3a7a (diff) |
tracing/filter: Use a tree instead of stack for filter_match_preds()
Currently the filter_match_preds() requires a stack to push
and pop the preds to determine if the filter matches the record or not.
This has two drawbacks:
1) It requires a stack to store state information. As this is done
in fast paths we can't allocate the storage for this stack, and
we can't use a global as it must be re-entrant. The stack is stored
on the kernel stack and this greatly limits how many preds we
may allow.
2) All conditions are calculated even when a short circuit exists.
a || b will always calculate a and b even though a was determined
to be true.
Using a tree we can walk a constant structure that will save
the state as we go. The algorithm is simply:
pred = root;
do {
switch (move) {
case MOVE_DOWN:
if (OR or AND) {
pred = left;
continue;
}
if (pred == root)
break;
match = pred->fn();
pred = pred->parent;
move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
continue;
case MOVE_UP_FROM_LEFT:
/* Only OR or AND can be a parent */
if (match && OR || !match && AND) {
/* short circuit */
if (pred == root)
break;
pred = pred->parent;
move = left child ?
MOVE_UP_FROM_LEFT :
MOVE_UP_FROM_RIGHT;
continue;
}
pred = pred->right;
move = MOVE_DOWN;
continue;
case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = pred->parent;
move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
continue;
}
done = 1;
} while (!done);
This way there's no strict limit to how many preds we allow
and it also will short circuit the logical operations when possible.
Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 231 |
2 files changed, 194 insertions, 46 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 254d04a84ec3..bba34a72c780 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -664,6 +664,7 @@ struct event_filter { | |||
664 | int n_preds; /* Number assigned */ | 664 | int n_preds; /* Number assigned */ |
665 | int a_preds; /* allocated */ | 665 | int a_preds; /* allocated */ |
666 | struct filter_pred *preds; | 666 | struct filter_pred *preds; |
667 | struct filter_pred *root; | ||
667 | char *filter_string; | 668 | char *filter_string; |
668 | }; | 669 | }; |
669 | 670 | ||
@@ -675,6 +676,9 @@ struct event_subsystem { | |||
675 | int nr_events; | 676 | int nr_events; |
676 | }; | 677 | }; |
677 | 678 | ||
679 | #define FILTER_PRED_INVALID ((unsigned short)-1) | ||
680 | #define FILTER_PRED_IS_RIGHT (1 << 15) | ||
681 | |||
678 | struct filter_pred; | 682 | struct filter_pred; |
679 | struct regex; | 683 | struct regex; |
680 | 684 | ||
@@ -704,7 +708,10 @@ struct filter_pred { | |||
704 | int offset; | 708 | int offset; |
705 | int not; | 709 | int not; |
706 | int op; | 710 | int op; |
707 | int pop_n; | 711 | unsigned short index; |
712 | unsigned short parent; | ||
713 | unsigned short left; | ||
714 | unsigned short right; | ||
708 | }; | 715 | }; |
709 | 716 | ||
710 | extern struct list_head ftrace_common_fields; | 717 | extern struct list_head ftrace_common_fields; |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2f5458e244a3..10390491b6d0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -123,6 +123,11 @@ struct filter_parse_state { | |||
123 | } operand; | 123 | } operand; |
124 | }; | 124 | }; |
125 | 125 | ||
126 | struct pred_stack { | ||
127 | struct filter_pred **preds; | ||
128 | int index; | ||
129 | }; | ||
130 | |||
126 | #define DEFINE_COMPARISON_PRED(type) \ | 131 | #define DEFINE_COMPARISON_PRED(type) \ |
127 | static int filter_pred_##type(struct filter_pred *pred, void *event) \ | 132 | static int filter_pred_##type(struct filter_pred *pred, void *event) \ |
128 | { \ | 133 | { \ |
@@ -357,52 +362,95 @@ static void filter_build_regex(struct filter_pred *pred) | |||
357 | pred->not ^= not; | 362 | pred->not ^= not; |
358 | } | 363 | } |
359 | 364 | ||
365 | enum move_type { | ||
366 | MOVE_DOWN, | ||
367 | MOVE_UP_FROM_LEFT, | ||
368 | MOVE_UP_FROM_RIGHT | ||
369 | }; | ||
370 | |||
371 | static struct filter_pred * | ||
372 | get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, | ||
373 | int index, enum move_type *move) | ||
374 | { | ||
375 | if (pred->parent & FILTER_PRED_IS_RIGHT) | ||
376 | *move = MOVE_UP_FROM_RIGHT; | ||
377 | else | ||
378 | *move = MOVE_UP_FROM_LEFT; | ||
379 | pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT]; | ||
380 | |||
381 | return pred; | ||
382 | } | ||
383 | |||
360 | /* return 1 if event matches, 0 otherwise (discard) */ | 384 | /* return 1 if event matches, 0 otherwise (discard) */ |
361 | int filter_match_preds(struct event_filter *filter, void *rec) | 385 | int filter_match_preds(struct event_filter *filter, void *rec) |
362 | { | 386 | { |
363 | int match = -1, top = 0, val1 = 0, val2 = 0; | 387 | int match = -1; |
364 | int stack[MAX_FILTER_PRED]; | 388 | enum move_type move = MOVE_DOWN; |
365 | struct filter_pred *preds; | 389 | struct filter_pred *preds; |
366 | struct filter_pred *pred; | 390 | struct filter_pred *pred; |
391 | struct filter_pred *root; | ||
367 | int n_preds = ACCESS_ONCE(filter->n_preds); | 392 | int n_preds = ACCESS_ONCE(filter->n_preds); |
368 | int i; | 393 | int done = 0; |
369 | 394 | ||
370 | /* no filter is considered a match */ | 395 | /* no filter is considered a match */ |
371 | if (!n_preds) | 396 | if (!n_preds) |
372 | return 1; | 397 | return 1; |
373 | 398 | ||
374 | /* | 399 | /* |
375 | * n_preds and filter->preds is protect with preemption disabled. | 400 | * n_preds, root and filter->preds are protect with preemption disabled. |
376 | */ | 401 | */ |
377 | preds = rcu_dereference_sched(filter->preds); | 402 | preds = rcu_dereference_sched(filter->preds); |
403 | root = rcu_dereference_sched(filter->root); | ||
404 | if (!root) | ||
405 | return 1; | ||
378 | 406 | ||
379 | for (i = 0; i < n_preds; i++) { | 407 | pred = root; |
380 | pred = &preds[i]; | 408 | |
381 | if (!pred->pop_n) { | 409 | /* match is currently meaningless */ |
410 | match = -1; | ||
411 | |||
412 | do { | ||
413 | switch (move) { | ||
414 | case MOVE_DOWN: | ||
415 | /* only AND and OR have children */ | ||
416 | if (pred->left != FILTER_PRED_INVALID) { | ||
417 | /* keep going to leaf node */ | ||
418 | pred = &preds[pred->left]; | ||
419 | continue; | ||
420 | } | ||
382 | match = pred->fn(pred, rec); | 421 | match = pred->fn(pred, rec); |
383 | stack[top++] = match; | 422 | /* If this pred is the only pred */ |
423 | if (pred == root) | ||
424 | break; | ||
425 | pred = get_pred_parent(pred, preds, | ||
426 | pred->parent, &move); | ||
427 | continue; | ||
428 | case MOVE_UP_FROM_LEFT: | ||
429 | /* Check for short circuits */ | ||
430 | if ((match && pred->op == OP_OR) || | ||
431 | (!match && pred->op == OP_AND)) { | ||
432 | if (pred == root) | ||
433 | break; | ||
434 | pred = get_pred_parent(pred, preds, | ||
435 | pred->parent, &move); | ||
436 | continue; | ||
437 | } | ||
438 | /* now go down the right side of the tree. */ | ||
439 | pred = &preds[pred->right]; | ||
440 | move = MOVE_DOWN; | ||
441 | continue; | ||
442 | case MOVE_UP_FROM_RIGHT: | ||
443 | /* We finished this equation. */ | ||
444 | if (pred == root) | ||
445 | break; | ||
446 | pred = get_pred_parent(pred, preds, | ||
447 | pred->parent, &move); | ||
384 | continue; | 448 | continue; |
385 | } | 449 | } |
386 | if (pred->pop_n > top) { | 450 | done = 1; |
387 | WARN_ON_ONCE(1); | 451 | } while (!done); |
388 | return 0; | ||
389 | } | ||
390 | val1 = stack[--top]; | ||
391 | val2 = stack[--top]; | ||
392 | switch (pred->op) { | ||
393 | case OP_AND: | ||
394 | match = val1 && val2; | ||
395 | break; | ||
396 | case OP_OR: | ||
397 | match = val1 || val2; | ||
398 | break; | ||
399 | default: | ||
400 | WARN_ONCE(1, "filter op is not AND or OR"); | ||
401 | } | ||
402 | stack[top++] = match; | ||
403 | } | ||
404 | 452 | ||
405 | return stack[--top]; | 453 | return match; |
406 | } | 454 | } |
407 | EXPORT_SYMBOL_GPL(filter_match_preds); | 455 | EXPORT_SYMBOL_GPL(filter_match_preds); |
408 | 456 | ||
@@ -539,10 +587,58 @@ static void filter_clear_pred(struct filter_pred *pred) | |||
539 | pred->regex.len = 0; | 587 | pred->regex.len = 0; |
540 | } | 588 | } |
541 | 589 | ||
542 | static int filter_set_pred(struct filter_pred *dest, | 590 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) |
591 | { | ||
592 | stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); | ||
593 | if (!stack->preds) | ||
594 | return -ENOMEM; | ||
595 | stack->index = n_preds; | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | static void __free_pred_stack(struct pred_stack *stack) | ||
600 | { | ||
601 | kfree(stack->preds); | ||
602 | stack->index = 0; | ||
603 | } | ||
604 | |||
605 | static int __push_pred_stack(struct pred_stack *stack, | ||
606 | struct filter_pred *pred) | ||
607 | { | ||
608 | int index = stack->index; | ||
609 | |||
610 | if (WARN_ON(index == 0)) | ||
611 | return -ENOSPC; | ||
612 | |||
613 | stack->preds[--index] = pred; | ||
614 | stack->index = index; | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static struct filter_pred * | ||
619 | __pop_pred_stack(struct pred_stack *stack) | ||
620 | { | ||
621 | struct filter_pred *pred; | ||
622 | int index = stack->index; | ||
623 | |||
624 | pred = stack->preds[index++]; | ||
625 | if (!pred) | ||
626 | return NULL; | ||
627 | |||
628 | stack->index = index; | ||
629 | return pred; | ||
630 | } | ||
631 | |||
632 | static int filter_set_pred(struct event_filter *filter, | ||
633 | int idx, | ||
634 | struct pred_stack *stack, | ||
543 | struct filter_pred *src, | 635 | struct filter_pred *src, |
544 | filter_pred_fn_t fn) | 636 | filter_pred_fn_t fn) |
545 | { | 637 | { |
638 | struct filter_pred *dest = &filter->preds[idx]; | ||
639 | struct filter_pred *left; | ||
640 | struct filter_pred *right; | ||
641 | |||
546 | *dest = *src; | 642 | *dest = *src; |
547 | if (src->field_name) { | 643 | if (src->field_name) { |
548 | dest->field_name = kstrdup(src->field_name, GFP_KERNEL); | 644 | dest->field_name = kstrdup(src->field_name, GFP_KERNEL); |
@@ -550,8 +646,25 @@ static int filter_set_pred(struct filter_pred *dest, | |||
550 | return -ENOMEM; | 646 | return -ENOMEM; |
551 | } | 647 | } |
552 | dest->fn = fn; | 648 | dest->fn = fn; |
649 | dest->index = idx; | ||
553 | 650 | ||
554 | return 0; | 651 | if (dest->op == OP_OR || dest->op == OP_AND) { |
652 | right = __pop_pred_stack(stack); | ||
653 | left = __pop_pred_stack(stack); | ||
654 | if (!left || !right) | ||
655 | return -EINVAL; | ||
656 | dest->left = left->index; | ||
657 | dest->right = right->index; | ||
658 | left->parent = dest->index; | ||
659 | right->parent = dest->index | FILTER_PRED_IS_RIGHT; | ||
660 | } else | ||
661 | /* | ||
662 | * Make dest->left invalid to be used as a quick | ||
663 | * way to know this is a leaf node. | ||
664 | */ | ||
665 | dest->left = FILTER_PRED_INVALID; | ||
666 | |||
667 | return __push_pred_stack(stack, dest); | ||
555 | } | 668 | } |
556 | 669 | ||
557 | static void __free_preds(struct event_filter *filter) | 670 | static void __free_preds(struct event_filter *filter) |
@@ -574,6 +687,7 @@ static void reset_preds(struct event_filter *filter) | |||
574 | int i; | 687 | int i; |
575 | 688 | ||
576 | filter->n_preds = 0; | 689 | filter->n_preds = 0; |
690 | filter->root = NULL; | ||
577 | if (!filter->preds) | 691 | if (!filter->preds) |
578 | return; | 692 | return; |
579 | 693 | ||
@@ -707,6 +821,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, | |||
707 | struct ftrace_event_call *call, | 821 | struct ftrace_event_call *call, |
708 | struct event_filter *filter, | 822 | struct event_filter *filter, |
709 | struct filter_pred *pred, | 823 | struct filter_pred *pred, |
824 | struct pred_stack *stack, | ||
710 | filter_pred_fn_t fn) | 825 | filter_pred_fn_t fn) |
711 | { | 826 | { |
712 | int idx, err; | 827 | int idx, err; |
@@ -718,7 +833,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, | |||
718 | 833 | ||
719 | idx = filter->n_preds; | 834 | idx = filter->n_preds; |
720 | filter_clear_pred(&filter->preds[idx]); | 835 | filter_clear_pred(&filter->preds[idx]); |
721 | err = filter_set_pred(&filter->preds[idx], pred, fn); | 836 | err = filter_set_pred(filter, idx, stack, pred, fn); |
722 | if (err) | 837 | if (err) |
723 | return err; | 838 | return err; |
724 | 839 | ||
@@ -803,6 +918,7 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
803 | struct ftrace_event_call *call, | 918 | struct ftrace_event_call *call, |
804 | struct event_filter *filter, | 919 | struct event_filter *filter, |
805 | struct filter_pred *pred, | 920 | struct filter_pred *pred, |
921 | struct pred_stack *stack, | ||
806 | bool dry_run) | 922 | bool dry_run) |
807 | { | 923 | { |
808 | struct ftrace_event_field *field; | 924 | struct ftrace_event_field *field; |
@@ -812,13 +928,10 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
812 | 928 | ||
813 | fn = pred->fn = filter_pred_none; | 929 | fn = pred->fn = filter_pred_none; |
814 | 930 | ||
815 | if (pred->op == OP_AND) { | 931 | if (pred->op == OP_AND) |
816 | pred->pop_n = 2; | ||
817 | goto add_pred_fn; | 932 | goto add_pred_fn; |
818 | } else if (pred->op == OP_OR) { | 933 | else if (pred->op == OP_OR) |
819 | pred->pop_n = 2; | ||
820 | goto add_pred_fn; | 934 | goto add_pred_fn; |
821 | } | ||
822 | 935 | ||
823 | field = find_event_field(call, pred->field_name); | 936 | field = find_event_field(call, pred->field_name); |
824 | if (!field) { | 937 | if (!field) { |
@@ -867,7 +980,7 @@ static int filter_add_pred(struct filter_parse_state *ps, | |||
867 | 980 | ||
868 | add_pred_fn: | 981 | add_pred_fn: |
869 | if (!dry_run) | 982 | if (!dry_run) |
870 | return filter_add_pred_fn(ps, call, filter, pred, fn); | 983 | return filter_add_pred_fn(ps, call, filter, pred, stack, fn); |
871 | return 0; | 984 | return 0; |
872 | } | 985 | } |
873 | 986 | ||
@@ -1248,6 +1361,7 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1248 | char *operand1 = NULL, *operand2 = NULL; | 1361 | char *operand1 = NULL, *operand2 = NULL; |
1249 | struct filter_pred *pred; | 1362 | struct filter_pred *pred; |
1250 | struct postfix_elt *elt; | 1363 | struct postfix_elt *elt; |
1364 | struct pred_stack stack = { }; /* init to NULL */ | ||
1251 | int err; | 1365 | int err; |
1252 | int n_preds = 0; | 1366 | int n_preds = 0; |
1253 | 1367 | ||
@@ -1262,9 +1376,12 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1262 | return err; | 1376 | return err; |
1263 | 1377 | ||
1264 | if (!dry_run) { | 1378 | if (!dry_run) { |
1265 | err = __alloc_preds(filter, n_preds); | 1379 | err = __alloc_pred_stack(&stack, n_preds); |
1266 | if (err) | 1380 | if (err) |
1267 | return err; | 1381 | return err; |
1382 | err = __alloc_preds(filter, n_preds); | ||
1383 | if (err) | ||
1384 | goto fail; | ||
1268 | } | 1385 | } |
1269 | 1386 | ||
1270 | n_preds = 0; | 1387 | n_preds = 0; |
@@ -1276,14 +1393,16 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1276 | operand2 = elt->operand; | 1393 | operand2 = elt->operand; |
1277 | else { | 1394 | else { |
1278 | parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); | 1395 | parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); |
1279 | return -EINVAL; | 1396 | err = -EINVAL; |
1397 | goto fail; | ||
1280 | } | 1398 | } |
1281 | continue; | 1399 | continue; |
1282 | } | 1400 | } |
1283 | 1401 | ||
1284 | if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { | 1402 | if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { |
1285 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | 1403 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); |
1286 | return -ENOSPC; | 1404 | err = -ENOSPC; |
1405 | goto fail; | ||
1287 | } | 1406 | } |
1288 | 1407 | ||
1289 | if (elt->op == OP_AND || elt->op == OP_OR) { | 1408 | if (elt->op == OP_AND || elt->op == OP_OR) { |
@@ -1293,22 +1412,44 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1293 | 1412 | ||
1294 | if (!operand1 || !operand2) { | 1413 | if (!operand1 || !operand2) { |
1295 | parse_error(ps, FILT_ERR_MISSING_FIELD, 0); | 1414 | parse_error(ps, FILT_ERR_MISSING_FIELD, 0); |
1296 | return -EINVAL; | 1415 | err = -EINVAL; |
1416 | goto fail; | ||
1297 | } | 1417 | } |
1298 | 1418 | ||
1299 | pred = create_pred(elt->op, operand1, operand2); | 1419 | pred = create_pred(elt->op, operand1, operand2); |
1300 | add_pred: | 1420 | add_pred: |
1301 | if (!pred) | 1421 | if (!pred) { |
1302 | return -ENOMEM; | 1422 | err = -ENOMEM; |
1303 | err = filter_add_pred(ps, call, filter, pred, dry_run); | 1423 | goto fail; |
1424 | } | ||
1425 | err = filter_add_pred(ps, call, filter, pred, &stack, dry_run); | ||
1304 | filter_free_pred(pred); | 1426 | filter_free_pred(pred); |
1305 | if (err) | 1427 | if (err) |
1306 | return err; | 1428 | goto fail; |
1307 | 1429 | ||
1308 | operand1 = operand2 = NULL; | 1430 | operand1 = operand2 = NULL; |
1309 | } | 1431 | } |
1310 | 1432 | ||
1311 | return 0; | 1433 | if (!dry_run) { |
1434 | /* We should have one item left on the stack */ | ||
1435 | pred = __pop_pred_stack(&stack); | ||
1436 | if (!pred) | ||
1437 | return -EINVAL; | ||
1438 | /* This item is where we start from in matching */ | ||
1439 | filter->root = pred; | ||
1440 | /* Make sure the stack is empty */ | ||
1441 | pred = __pop_pred_stack(&stack); | ||
1442 | if (WARN_ON(pred)) { | ||
1443 | err = -EINVAL; | ||
1444 | filter->root = NULL; | ||
1445 | goto fail; | ||
1446 | } | ||
1447 | } | ||
1448 | |||
1449 | err = 0; | ||
1450 | fail: | ||
1451 | __free_pred_stack(&stack); | ||
1452 | return err; | ||
1312 | } | 1453 | } |
1313 | 1454 | ||
1314 | static int replace_system_preds(struct event_subsystem *system, | 1455 | static int replace_system_preds(struct event_subsystem *system, |