aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2011-01-27 22:54:33 -0500
committerSteven Rostedt <rostedt@goodmis.org>2011-02-07 20:56:19 -0500
commit61e9dea20e1ada886cc49a9ec6fe3c6ac0de7324 (patch)
tree8f12ee74970d57550806a6f3e905cfca8d67601a
parentf76690afd05e3e163149310bdcd30234f93b3a7a (diff)
tracing/filter: Use a tree instead of stack for filter_match_preds()
Currently the filter_match_preds() requires a stack to push and pop the preds to determine if the filter matches the record or not. This has two drawbacks: 1) It requires a stack to store state information. As this is done in fast paths we can't allocate the storage for this stack, and we can't use a global as it must be re-entrant. The stack is stored on the kernel stack and this greatly limits how many preds we may allow. 2) All conditions are calculated even when a short circuit exists. a || b will always calculate a and b even though a was determined to be true. Using a tree we can walk a constant structure that will save the state as we go. The algorithm is simply: pred = root; do { switch (move) { case MOVE_DOWN: if (OR or AND) { pred = left; continue; } if (pred == root) break; match = pred->fn(); pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; case MOVE_UP_FROM_LEFT: /* Only OR or AND can be a parent */ if (match && OR || !match && AND) { /* short circuit */ if (pred == root) break; pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; } pred = pred->right; move = MOVE_DOWN; continue; case MOVE_UP_FROM_RIGHT: if (pred == root) break; pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; } done = 1; } while (!done); This way there's no strict limit to how many preds we allow and it also will short circuit the logical operations when possible. Cc: Tom Zanussi <tzanussi@gmail.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--kernel/trace/trace.h9
-rw-r--r--kernel/trace/trace_events_filter.c231
2 files changed, 194 insertions, 46 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 254d04a84ec3..bba34a72c780 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -664,6 +664,7 @@ struct event_filter {
664 int n_preds; /* Number assigned */ 664 int n_preds; /* Number assigned */
665 int a_preds; /* allocated */ 665 int a_preds; /* allocated */
666 struct filter_pred *preds; 666 struct filter_pred *preds;
667 struct filter_pred *root;
667 char *filter_string; 668 char *filter_string;
668}; 669};
669 670
@@ -675,6 +676,9 @@ struct event_subsystem {
675 int nr_events; 676 int nr_events;
676}; 677};
677 678
679#define FILTER_PRED_INVALID ((unsigned short)-1)
680#define FILTER_PRED_IS_RIGHT (1 << 15)
681
678struct filter_pred; 682struct filter_pred;
679struct regex; 683struct regex;
680 684
@@ -704,7 +708,10 @@ struct filter_pred {
704 int offset; 708 int offset;
705 int not; 709 int not;
706 int op; 710 int op;
707 int pop_n; 711 unsigned short index;
712 unsigned short parent;
713 unsigned short left;
714 unsigned short right;
708}; 715};
709 716
710extern struct list_head ftrace_common_fields; 717extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2f5458e244a3..10390491b6d0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,6 +123,11 @@ struct filter_parse_state {
123 } operand; 123 } operand;
124}; 124};
125 125
126struct pred_stack {
127 struct filter_pred **preds;
128 int index;
129};
130
126#define DEFINE_COMPARISON_PRED(type) \ 131#define DEFINE_COMPARISON_PRED(type) \
127static int filter_pred_##type(struct filter_pred *pred, void *event) \ 132static int filter_pred_##type(struct filter_pred *pred, void *event) \
128{ \ 133{ \
@@ -357,52 +362,95 @@ static void filter_build_regex(struct filter_pred *pred)
357 pred->not ^= not; 362 pred->not ^= not;
358} 363}
359 364
365enum move_type {
366 MOVE_DOWN,
367 MOVE_UP_FROM_LEFT,
368 MOVE_UP_FROM_RIGHT
369};
370
371static struct filter_pred *
372get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
373 int index, enum move_type *move)
374{
375 if (pred->parent & FILTER_PRED_IS_RIGHT)
376 *move = MOVE_UP_FROM_RIGHT;
377 else
378 *move = MOVE_UP_FROM_LEFT;
379 pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
380
381 return pred;
382}
383
360/* return 1 if event matches, 0 otherwise (discard) */ 384/* return 1 if event matches, 0 otherwise (discard) */
361int filter_match_preds(struct event_filter *filter, void *rec) 385int filter_match_preds(struct event_filter *filter, void *rec)
362{ 386{
363 int match = -1, top = 0, val1 = 0, val2 = 0; 387 int match = -1;
364 int stack[MAX_FILTER_PRED]; 388 enum move_type move = MOVE_DOWN;
365 struct filter_pred *preds; 389 struct filter_pred *preds;
366 struct filter_pred *pred; 390 struct filter_pred *pred;
391 struct filter_pred *root;
367 int n_preds = ACCESS_ONCE(filter->n_preds); 392 int n_preds = ACCESS_ONCE(filter->n_preds);
368 int i; 393 int done = 0;
369 394
370 /* no filter is considered a match */ 395 /* no filter is considered a match */
371 if (!n_preds) 396 if (!n_preds)
372 return 1; 397 return 1;
373 398
374 /* 399 /*
375 * n_preds and filter->preds is protect with preemption disabled. 400 * n_preds, root and filter->preds are protect with preemption disabled.
376 */ 401 */
377 preds = rcu_dereference_sched(filter->preds); 402 preds = rcu_dereference_sched(filter->preds);
403 root = rcu_dereference_sched(filter->root);
404 if (!root)
405 return 1;
378 406
379 for (i = 0; i < n_preds; i++) { 407 pred = root;
380 pred = &preds[i]; 408
381 if (!pred->pop_n) { 409 /* match is currently meaningless */
410 match = -1;
411
412 do {
413 switch (move) {
414 case MOVE_DOWN:
415 /* only AND and OR have children */
416 if (pred->left != FILTER_PRED_INVALID) {
417 /* keep going to leaf node */
418 pred = &preds[pred->left];
419 continue;
420 }
382 match = pred->fn(pred, rec); 421 match = pred->fn(pred, rec);
383 stack[top++] = match; 422 /* If this pred is the only pred */
423 if (pred == root)
424 break;
425 pred = get_pred_parent(pred, preds,
426 pred->parent, &move);
427 continue;
428 case MOVE_UP_FROM_LEFT:
429 /* Check for short circuits */
430 if ((match && pred->op == OP_OR) ||
431 (!match && pred->op == OP_AND)) {
432 if (pred == root)
433 break;
434 pred = get_pred_parent(pred, preds,
435 pred->parent, &move);
436 continue;
437 }
438 /* now go down the right side of the tree. */
439 pred = &preds[pred->right];
440 move = MOVE_DOWN;
441 continue;
442 case MOVE_UP_FROM_RIGHT:
443 /* We finished this equation. */
444 if (pred == root)
445 break;
446 pred = get_pred_parent(pred, preds,
447 pred->parent, &move);
384 continue; 448 continue;
385 } 449 }
386 if (pred->pop_n > top) { 450 done = 1;
387 WARN_ON_ONCE(1); 451 } while (!done);
388 return 0;
389 }
390 val1 = stack[--top];
391 val2 = stack[--top];
392 switch (pred->op) {
393 case OP_AND:
394 match = val1 && val2;
395 break;
396 case OP_OR:
397 match = val1 || val2;
398 break;
399 default:
400 WARN_ONCE(1, "filter op is not AND or OR");
401 }
402 stack[top++] = match;
403 }
404 452
405 return stack[--top]; 453 return match;
406} 454}
407EXPORT_SYMBOL_GPL(filter_match_preds); 455EXPORT_SYMBOL_GPL(filter_match_preds);
408 456
@@ -539,10 +587,58 @@ static void filter_clear_pred(struct filter_pred *pred)
539 pred->regex.len = 0; 587 pred->regex.len = 0;
540} 588}
541 589
542static int filter_set_pred(struct filter_pred *dest, 590static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
591{
592 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
593 if (!stack->preds)
594 return -ENOMEM;
595 stack->index = n_preds;
596 return 0;
597}
598
599static void __free_pred_stack(struct pred_stack *stack)
600{
601 kfree(stack->preds);
602 stack->index = 0;
603}
604
605static int __push_pred_stack(struct pred_stack *stack,
606 struct filter_pred *pred)
607{
608 int index = stack->index;
609
610 if (WARN_ON(index == 0))
611 return -ENOSPC;
612
613 stack->preds[--index] = pred;
614 stack->index = index;
615 return 0;
616}
617
618static struct filter_pred *
619__pop_pred_stack(struct pred_stack *stack)
620{
621 struct filter_pred *pred;
622 int index = stack->index;
623
624 pred = stack->preds[index++];
625 if (!pred)
626 return NULL;
627
628 stack->index = index;
629 return pred;
630}
631
632static int filter_set_pred(struct event_filter *filter,
633 int idx,
634 struct pred_stack *stack,
543 struct filter_pred *src, 635 struct filter_pred *src,
544 filter_pred_fn_t fn) 636 filter_pred_fn_t fn)
545{ 637{
638 struct filter_pred *dest = &filter->preds[idx];
639 struct filter_pred *left;
640 struct filter_pred *right;
641
546 *dest = *src; 642 *dest = *src;
547 if (src->field_name) { 643 if (src->field_name) {
548 dest->field_name = kstrdup(src->field_name, GFP_KERNEL); 644 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,8 +646,25 @@ static int filter_set_pred(struct filter_pred *dest,
550 return -ENOMEM; 646 return -ENOMEM;
551 } 647 }
552 dest->fn = fn; 648 dest->fn = fn;
649 dest->index = idx;
553 650
554 return 0; 651 if (dest->op == OP_OR || dest->op == OP_AND) {
652 right = __pop_pred_stack(stack);
653 left = __pop_pred_stack(stack);
654 if (!left || !right)
655 return -EINVAL;
656 dest->left = left->index;
657 dest->right = right->index;
658 left->parent = dest->index;
659 right->parent = dest->index | FILTER_PRED_IS_RIGHT;
660 } else
661 /*
662 * Make dest->left invalid to be used as a quick
663 * way to know this is a leaf node.
664 */
665 dest->left = FILTER_PRED_INVALID;
666
667 return __push_pred_stack(stack, dest);
555} 668}
556 669
557static void __free_preds(struct event_filter *filter) 670static void __free_preds(struct event_filter *filter)
@@ -574,6 +687,7 @@ static void reset_preds(struct event_filter *filter)
574 int i; 687 int i;
575 688
576 filter->n_preds = 0; 689 filter->n_preds = 0;
690 filter->root = NULL;
577 if (!filter->preds) 691 if (!filter->preds)
578 return; 692 return;
579 693
@@ -707,6 +821,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
707 struct ftrace_event_call *call, 821 struct ftrace_event_call *call,
708 struct event_filter *filter, 822 struct event_filter *filter,
709 struct filter_pred *pred, 823 struct filter_pred *pred,
824 struct pred_stack *stack,
710 filter_pred_fn_t fn) 825 filter_pred_fn_t fn)
711{ 826{
712 int idx, err; 827 int idx, err;
@@ -718,7 +833,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
718 833
719 idx = filter->n_preds; 834 idx = filter->n_preds;
720 filter_clear_pred(&filter->preds[idx]); 835 filter_clear_pred(&filter->preds[idx]);
721 err = filter_set_pred(&filter->preds[idx], pred, fn); 836 err = filter_set_pred(filter, idx, stack, pred, fn);
722 if (err) 837 if (err)
723 return err; 838 return err;
724 839
@@ -803,6 +918,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
803 struct ftrace_event_call *call, 918 struct ftrace_event_call *call,
804 struct event_filter *filter, 919 struct event_filter *filter,
805 struct filter_pred *pred, 920 struct filter_pred *pred,
921 struct pred_stack *stack,
806 bool dry_run) 922 bool dry_run)
807{ 923{
808 struct ftrace_event_field *field; 924 struct ftrace_event_field *field;
@@ -812,13 +928,10 @@ static int filter_add_pred(struct filter_parse_state *ps,
812 928
813 fn = pred->fn = filter_pred_none; 929 fn = pred->fn = filter_pred_none;
814 930
815 if (pred->op == OP_AND) { 931 if (pred->op == OP_AND)
816 pred->pop_n = 2;
817 goto add_pred_fn; 932 goto add_pred_fn;
818 } else if (pred->op == OP_OR) { 933 else if (pred->op == OP_OR)
819 pred->pop_n = 2;
820 goto add_pred_fn; 934 goto add_pred_fn;
821 }
822 935
823 field = find_event_field(call, pred->field_name); 936 field = find_event_field(call, pred->field_name);
824 if (!field) { 937 if (!field) {
@@ -867,7 +980,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
867 980
868add_pred_fn: 981add_pred_fn:
869 if (!dry_run) 982 if (!dry_run)
870 return filter_add_pred_fn(ps, call, filter, pred, fn); 983 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
871 return 0; 984 return 0;
872} 985}
873 986
@@ -1248,6 +1361,7 @@ static int replace_preds(struct ftrace_event_call *call,
1248 char *operand1 = NULL, *operand2 = NULL; 1361 char *operand1 = NULL, *operand2 = NULL;
1249 struct filter_pred *pred; 1362 struct filter_pred *pred;
1250 struct postfix_elt *elt; 1363 struct postfix_elt *elt;
1364 struct pred_stack stack = { }; /* init to NULL */
1251 int err; 1365 int err;
1252 int n_preds = 0; 1366 int n_preds = 0;
1253 1367
@@ -1262,9 +1376,12 @@ static int replace_preds(struct ftrace_event_call *call,
1262 return err; 1376 return err;
1263 1377
1264 if (!dry_run) { 1378 if (!dry_run) {
1265 err = __alloc_preds(filter, n_preds); 1379 err = __alloc_pred_stack(&stack, n_preds);
1266 if (err) 1380 if (err)
1267 return err; 1381 return err;
1382 err = __alloc_preds(filter, n_preds);
1383 if (err)
1384 goto fail;
1268 } 1385 }
1269 1386
1270 n_preds = 0; 1387 n_preds = 0;
@@ -1276,14 +1393,16 @@ static int replace_preds(struct ftrace_event_call *call,
1276 operand2 = elt->operand; 1393 operand2 = elt->operand;
1277 else { 1394 else {
1278 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); 1395 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
1279 return -EINVAL; 1396 err = -EINVAL;
1397 goto fail;
1280 } 1398 }
1281 continue; 1399 continue;
1282 } 1400 }
1283 1401
1284 if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { 1402 if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
1285 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 1403 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1286 return -ENOSPC; 1404 err = -ENOSPC;
1405 goto fail;
1287 } 1406 }
1288 1407
1289 if (elt->op == OP_AND || elt->op == OP_OR) { 1408 if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1293,22 +1412,44 @@ static int replace_preds(struct ftrace_event_call *call,
1293 1412
1294 if (!operand1 || !operand2) { 1413 if (!operand1 || !operand2) {
1295 parse_error(ps, FILT_ERR_MISSING_FIELD, 0); 1414 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1296 return -EINVAL; 1415 err = -EINVAL;
1416 goto fail;
1297 } 1417 }
1298 1418
1299 pred = create_pred(elt->op, operand1, operand2); 1419 pred = create_pred(elt->op, operand1, operand2);
1300add_pred: 1420add_pred:
1301 if (!pred) 1421 if (!pred) {
1302 return -ENOMEM; 1422 err = -ENOMEM;
1303 err = filter_add_pred(ps, call, filter, pred, dry_run); 1423 goto fail;
1424 }
1425 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1304 filter_free_pred(pred); 1426 filter_free_pred(pred);
1305 if (err) 1427 if (err)
1306 return err; 1428 goto fail;
1307 1429
1308 operand1 = operand2 = NULL; 1430 operand1 = operand2 = NULL;
1309 } 1431 }
1310 1432
1311 return 0; 1433 if (!dry_run) {
1434 /* We should have one item left on the stack */
1435 pred = __pop_pred_stack(&stack);
1436 if (!pred)
1437 return -EINVAL;
1438 /* This item is where we start from in matching */
1439 filter->root = pred;
1440 /* Make sure the stack is empty */
1441 pred = __pop_pred_stack(&stack);
1442 if (WARN_ON(pred)) {
1443 err = -EINVAL;
1444 filter->root = NULL;
1445 goto fail;
1446 }
1447 }
1448
1449 err = 0;
1450fail:
1451 __free_pred_stack(&stack);
1452 return err;
1312} 1453}
1313 1454
1314static int replace_system_preds(struct event_subsystem *system, 1455static int replace_system_preds(struct event_subsystem *system,