aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c590
1 files changed, 426 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..f765465bffe4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,6 +23,8 @@
23#include <asm/local.h> 23#include <asm/local.h>
24#include "trace.h" 24#include "trace.h"
25 25
26static void update_pages_handler(struct work_struct *work);
27
26/* 28/*
27 * The ring buffer header is special. We must manually up keep it. 29 * The ring buffer header is special. We must manually up keep it.
28 */ 30 */
@@ -449,6 +451,7 @@ struct ring_buffer_per_cpu {
449 raw_spinlock_t reader_lock; /* serialize readers */ 451 raw_spinlock_t reader_lock; /* serialize readers */
450 arch_spinlock_t lock; 452 arch_spinlock_t lock;
451 struct lock_class_key lock_key; 453 struct lock_class_key lock_key;
454 unsigned int nr_pages;
452 struct list_head *pages; 455 struct list_head *pages;
453 struct buffer_page *head_page; /* read from head */ 456 struct buffer_page *head_page; /* read from head */
454 struct buffer_page *tail_page; /* write to tail */ 457 struct buffer_page *tail_page; /* write to tail */
@@ -466,13 +469,18 @@ struct ring_buffer_per_cpu {
466 unsigned long read_bytes; 469 unsigned long read_bytes;
467 u64 write_stamp; 470 u64 write_stamp;
468 u64 read_stamp; 471 u64 read_stamp;
472 /* ring buffer pages to update, > 0 to add, < 0 to remove */
473 int nr_pages_to_update;
474 struct list_head new_pages; /* new pages to add */
475 struct work_struct update_pages_work;
476 struct completion update_done;
469}; 477};
470 478
471struct ring_buffer { 479struct ring_buffer {
472 unsigned pages;
473 unsigned flags; 480 unsigned flags;
474 int cpus; 481 int cpus;
475 atomic_t record_disabled; 482 atomic_t record_disabled;
483 atomic_t resize_disabled;
476 cpumask_var_t cpumask; 484 cpumask_var_t cpumask;
477 485
478 struct lock_class_key *reader_lock_key; 486 struct lock_class_key *reader_lock_key;
@@ -937,6 +945,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
937 struct list_head *head = cpu_buffer->pages; 945 struct list_head *head = cpu_buffer->pages;
938 struct buffer_page *bpage, *tmp; 946 struct buffer_page *bpage, *tmp;
939 947
948 /* Reset the head page if it exists */
949 if (cpu_buffer->head_page)
950 rb_set_head_page(cpu_buffer);
951
940 rb_head_page_deactivate(cpu_buffer); 952 rb_head_page_deactivate(cpu_buffer);
941 953
942 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 954 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
@@ -963,14 +975,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
963 return 0; 975 return 0;
964} 976}
965 977
966static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 978static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
967 unsigned nr_pages)
968{ 979{
980 int i;
969 struct buffer_page *bpage, *tmp; 981 struct buffer_page *bpage, *tmp;
970 LIST_HEAD(pages);
971 unsigned i;
972
973 WARN_ON(!nr_pages);
974 982
975 for (i = 0; i < nr_pages; i++) { 983 for (i = 0; i < nr_pages; i++) {
976 struct page *page; 984 struct page *page;
@@ -981,15 +989,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
981 */ 989 */
982 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 990 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
983 GFP_KERNEL | __GFP_NORETRY, 991 GFP_KERNEL | __GFP_NORETRY,
984 cpu_to_node(cpu_buffer->cpu)); 992 cpu_to_node(cpu));
985 if (!bpage) 993 if (!bpage)
986 goto free_pages; 994 goto free_pages;
987 995
988 rb_check_bpage(cpu_buffer, bpage); 996 list_add(&bpage->list, pages);
989 997
990 list_add(&bpage->list, &pages); 998 page = alloc_pages_node(cpu_to_node(cpu),
991
992 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
993 GFP_KERNEL | __GFP_NORETRY, 0); 999 GFP_KERNEL | __GFP_NORETRY, 0);
994 if (!page) 1000 if (!page)
995 goto free_pages; 1001 goto free_pages;
@@ -997,6 +1003,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 rb_init_page(bpage->page); 1003 rb_init_page(bpage->page);
998 } 1004 }
999 1005
1006 return 0;
1007
1008free_pages:
1009 list_for_each_entry_safe(bpage, tmp, pages, list) {
1010 list_del_init(&bpage->list);
1011 free_buffer_page(bpage);
1012 }
1013
1014 return -ENOMEM;
1015}
1016
1017static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1018 unsigned nr_pages)
1019{
1020 LIST_HEAD(pages);
1021
1022 WARN_ON(!nr_pages);
1023
1024 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1025 return -ENOMEM;
1026
1000 /* 1027 /*
1001 * The ring buffer page list is a circular list that does not 1028 * The ring buffer page list is a circular list that does not
1002 * start and end with a list head. All page list items point to 1029 * start and end with a list head. All page list items point to
@@ -1005,20 +1032,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1005 cpu_buffer->pages = pages.next; 1032 cpu_buffer->pages = pages.next;
1006 list_del(&pages); 1033 list_del(&pages);
1007 1034
1035 cpu_buffer->nr_pages = nr_pages;
1036
1008 rb_check_pages(cpu_buffer); 1037 rb_check_pages(cpu_buffer);
1009 1038
1010 return 0; 1039 return 0;
1011
1012 free_pages:
1013 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1014 list_del_init(&bpage->list);
1015 free_buffer_page(bpage);
1016 }
1017 return -ENOMEM;
1018} 1040}
1019 1041
1020static struct ring_buffer_per_cpu * 1042static struct ring_buffer_per_cpu *
1021rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 1043rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1022{ 1044{
1023 struct ring_buffer_per_cpu *cpu_buffer; 1045 struct ring_buffer_per_cpu *cpu_buffer;
1024 struct buffer_page *bpage; 1046 struct buffer_page *bpage;
@@ -1035,6 +1057,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1035 raw_spin_lock_init(&cpu_buffer->reader_lock); 1057 raw_spin_lock_init(&cpu_buffer->reader_lock);
1036 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1058 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1037 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1059 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1060 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1061 init_completion(&cpu_buffer->update_done);
1038 1062
1039 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1063 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1040 GFP_KERNEL, cpu_to_node(cpu)); 1064 GFP_KERNEL, cpu_to_node(cpu));
@@ -1051,8 +1075,9 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1051 rb_init_page(bpage->page); 1075 rb_init_page(bpage->page);
1052 1076
1053 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1078 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1054 1079
1055 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 1080 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1056 if (ret < 0) 1081 if (ret < 0)
1057 goto fail_free_reader; 1082 goto fail_free_reader;
1058 1083
@@ -1113,7 +1138,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1113{ 1138{
1114 struct ring_buffer *buffer; 1139 struct ring_buffer *buffer;
1115 int bsize; 1140 int bsize;
1116 int cpu; 1141 int cpu, nr_pages;
1117 1142
1118 /* keep it in its own cache line */ 1143 /* keep it in its own cache line */
1119 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1144 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1149,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1124 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 1149 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1125 goto fail_free_buffer; 1150 goto fail_free_buffer;
1126 1151
1127 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1152 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1128 buffer->flags = flags; 1153 buffer->flags = flags;
1129 buffer->clock = trace_clock_local; 1154 buffer->clock = trace_clock_local;
1130 buffer->reader_lock_key = key; 1155 buffer->reader_lock_key = key;
1131 1156
1132 /* need at least two pages */ 1157 /* need at least two pages */
1133 if (buffer->pages < 2) 1158 if (nr_pages < 2)
1134 buffer->pages = 2; 1159 nr_pages = 2;
1135 1160
1136 /* 1161 /*
1137 * In case of non-hotplug cpu, if the ring-buffer is allocated 1162 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1179,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1154 1179
1155 for_each_buffer_cpu(buffer, cpu) { 1180 for_each_buffer_cpu(buffer, cpu) {
1156 buffer->buffers[cpu] = 1181 buffer->buffers[cpu] =
1157 rb_allocate_cpu_buffer(buffer, cpu); 1182 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1158 if (!buffer->buffers[cpu]) 1183 if (!buffer->buffers[cpu])
1159 goto fail_free_buffers; 1184 goto fail_free_buffers;
1160 } 1185 }
@@ -1222,58 +1247,221 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
1222 1247
1223static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 1248static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1224 1249
1225static void 1250static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1226rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1227{ 1251{
1228 struct buffer_page *bpage; 1252 return local_read(&bpage->entries) & RB_WRITE_MASK;
1229 struct list_head *p; 1253}
1230 unsigned i; 1254
1255static inline unsigned long rb_page_write(struct buffer_page *bpage)
1256{
1257 return local_read(&bpage->write) & RB_WRITE_MASK;
1258}
1259
1260static int
1261rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1262{
1263 struct list_head *tail_page, *to_remove, *next_page;
1264 struct buffer_page *to_remove_page, *tmp_iter_page;
1265 struct buffer_page *last_page, *first_page;
1266 unsigned int nr_removed;
1267 unsigned long head_bit;
1268 int page_entries;
1269
1270 head_bit = 0;
1231 1271
1232 raw_spin_lock_irq(&cpu_buffer->reader_lock); 1272 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1233 rb_head_page_deactivate(cpu_buffer); 1273 atomic_inc(&cpu_buffer->record_disabled);
1274 /*
1275 * We don't race with the readers since we have acquired the reader
1276 * lock. We also don't race with writers after disabling recording.
1277 * This makes it easy to figure out the first and the last page to be
1278 * removed from the list. We unlink all the pages in between including
1279 * the first and last pages. This is done in a busy loop so that we
1280 * lose the least number of traces.
1281 * The pages are freed after we restart recording and unlock readers.
1282 */
1283 tail_page = &cpu_buffer->tail_page->list;
1234 1284
1235 for (i = 0; i < nr_pages; i++) { 1285 /*
1236 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1286 * tail page might be on reader page, we remove the next page
1237 goto out; 1287 * from the ring buffer
1238 p = cpu_buffer->pages->next; 1288 */
1239 bpage = list_entry(p, struct buffer_page, list); 1289 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1240 list_del_init(&bpage->list); 1290 tail_page = rb_list_head(tail_page->next);
1241 free_buffer_page(bpage); 1291 to_remove = tail_page;
1292
1293 /* start of pages to remove */
1294 first_page = list_entry(rb_list_head(to_remove->next),
1295 struct buffer_page, list);
1296
1297 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1298 to_remove = rb_list_head(to_remove)->next;
1299 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1242 } 1300 }
1243 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1244 goto out;
1245 1301
1246 rb_reset_cpu(cpu_buffer); 1302 next_page = rb_list_head(to_remove)->next;
1247 rb_check_pages(cpu_buffer);
1248 1303
1249out: 1304 /*
1305 * Now we remove all pages between tail_page and next_page.
1306 * Make sure that we have head_bit value preserved for the
1307 * next page
1308 */
1309 tail_page->next = (struct list_head *)((unsigned long)next_page |
1310 head_bit);
1311 next_page = rb_list_head(next_page);
1312 next_page->prev = tail_page;
1313
1314 /* make sure pages points to a valid page in the ring buffer */
1315 cpu_buffer->pages = next_page;
1316
1317 /* update head page */
1318 if (head_bit)
1319 cpu_buffer->head_page = list_entry(next_page,
1320 struct buffer_page, list);
1321
1322 /*
1323 * change read pointer to make sure any read iterators reset
1324 * themselves
1325 */
1326 cpu_buffer->read = 0;
1327
1328 /* pages are removed, resume tracing and then free the pages */
1329 atomic_dec(&cpu_buffer->record_disabled);
1250 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1330 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1331
1332 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1333
1334 /* last buffer page to remove */
1335 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1336 list);
1337 tmp_iter_page = first_page;
1338
1339 do {
1340 to_remove_page = tmp_iter_page;
1341 rb_inc_page(cpu_buffer, &tmp_iter_page);
1342
1343 /* update the counters */
1344 page_entries = rb_page_entries(to_remove_page);
1345 if (page_entries) {
1346 /*
1347 * If something was added to this page, it was full
1348 * since it is not the tail page. So we deduct the
1349 * bytes consumed in ring buffer from here.
1350 * Increment overrun to account for the lost events.
1351 */
1352 local_add(page_entries, &cpu_buffer->overrun);
1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1354 }
1355
1356 /*
1357 * We have already removed references to this list item, just
1358 * free up the buffer_page and its page
1359 */
1360 free_buffer_page(to_remove_page);
1361 nr_removed--;
1362
1363 } while (to_remove_page != last_page);
1364
1365 RB_WARN_ON(cpu_buffer, nr_removed);
1366
1367 return nr_removed == 0;
1251} 1368}
1252 1369
1253static void 1370static int
1254rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 1371rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1255 struct list_head *pages, unsigned nr_pages)
1256{ 1372{
1257 struct buffer_page *bpage; 1373 struct list_head *pages = &cpu_buffer->new_pages;
1258 struct list_head *p; 1374 int retries, success;
1259 unsigned i;
1260 1375
1261 raw_spin_lock_irq(&cpu_buffer->reader_lock); 1376 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1262 rb_head_page_deactivate(cpu_buffer); 1377 /*
1378 * We are holding the reader lock, so the reader page won't be swapped
1379 * in the ring buffer. Now we are racing with the writer trying to
1380 * move head page and the tail page.
1381 * We are going to adapt the reader page update process where:
1382 * 1. We first splice the start and end of list of new pages between
1383 * the head page and its previous page.
1384 * 2. We cmpxchg the prev_page->next to point from head page to the
1385 * start of new pages list.
1386 * 3. Finally, we update the head->prev to the end of new list.
1387 *
1388 * We will try this process 10 times, to make sure that we don't keep
1389 * spinning.
1390 */
1391 retries = 10;
1392 success = 0;
1393 while (retries--) {
1394 struct list_head *head_page, *prev_page, *r;
1395 struct list_head *last_page, *first_page;
1396 struct list_head *head_page_with_bit;
1263 1397
1264 for (i = 0; i < nr_pages; i++) { 1398 head_page = &rb_set_head_page(cpu_buffer)->list;
1265 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1399 prev_page = head_page->prev;
1266 goto out; 1400
1267 p = pages->next; 1401 first_page = pages->next;
1268 bpage = list_entry(p, struct buffer_page, list); 1402 last_page = pages->prev;
1269 list_del_init(&bpage->list); 1403
1270 list_add_tail(&bpage->list, cpu_buffer->pages); 1404 head_page_with_bit = (struct list_head *)
1405 ((unsigned long)head_page | RB_PAGE_HEAD);
1406
1407 last_page->next = head_page_with_bit;
1408 first_page->prev = prev_page;
1409
1410 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1411
1412 if (r == head_page_with_bit) {
1413 /*
1414 * yay, we replaced the page pointer to our new list,
1415 * now, we just have to update to head page's prev
1416 * pointer to point to end of list
1417 */
1418 head_page->prev = last_page;
1419 success = 1;
1420 break;
1421 }
1271 } 1422 }
1272 rb_reset_cpu(cpu_buffer);
1273 rb_check_pages(cpu_buffer);
1274 1423
1275out: 1424 if (success)
1425 INIT_LIST_HEAD(pages);
1426 /*
1427 * If we weren't successful in adding in new pages, warn and stop
1428 * tracing
1429 */
1430 RB_WARN_ON(cpu_buffer, !success);
1276 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1431 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1432
1433 /* free pages if they weren't inserted */
1434 if (!success) {
1435 struct buffer_page *bpage, *tmp;
1436 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1437 list) {
1438 list_del_init(&bpage->list);
1439 free_buffer_page(bpage);
1440 }
1441 }
1442 return success;
1443}
1444
1445static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 int success;
1448
1449 if (cpu_buffer->nr_pages_to_update > 0)
1450 success = rb_insert_pages(cpu_buffer);
1451 else
1452 success = rb_remove_pages(cpu_buffer,
1453 -cpu_buffer->nr_pages_to_update);
1454
1455 if (success)
1456 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1457}
1458
1459static void update_pages_handler(struct work_struct *work)
1460{
1461 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1462 struct ring_buffer_per_cpu, update_pages_work);
1463 rb_update_pages(cpu_buffer);
1464 complete(&cpu_buffer->update_done);
1277} 1465}
1278 1466
1279/** 1467/**
@@ -1283,16 +1471,14 @@ out:
1283 * 1471 *
1284 * Minimum size is 2 * BUF_PAGE_SIZE. 1472 * Minimum size is 2 * BUF_PAGE_SIZE.
1285 * 1473 *
1286 * Returns -1 on failure. 1474 * Returns 0 on success and < 0 on failure.
1287 */ 1475 */
1288int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 1476int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1477 int cpu_id)
1289{ 1478{
1290 struct ring_buffer_per_cpu *cpu_buffer; 1479 struct ring_buffer_per_cpu *cpu_buffer;
1291 unsigned nr_pages, rm_pages, new_pages; 1480 unsigned nr_pages;
1292 struct buffer_page *bpage, *tmp; 1481 int cpu, err = 0;
1293 unsigned long buffer_size;
1294 LIST_HEAD(pages);
1295 int i, cpu;
1296 1482
1297 /* 1483 /*
1298 * Always succeed at resizing a non-existent buffer: 1484 * Always succeed at resizing a non-existent buffer:
@@ -1300,115 +1486,161 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1300 if (!buffer) 1486 if (!buffer)
1301 return size; 1487 return size;
1302 1488
1489 /* Make sure the requested buffer exists */
1490 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1491 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1492 return size;
1493
1303 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1494 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1304 size *= BUF_PAGE_SIZE; 1495 size *= BUF_PAGE_SIZE;
1305 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1306 1496
1307 /* we need a minimum of two pages */ 1497 /* we need a minimum of two pages */
1308 if (size < BUF_PAGE_SIZE * 2) 1498 if (size < BUF_PAGE_SIZE * 2)
1309 size = BUF_PAGE_SIZE * 2; 1499 size = BUF_PAGE_SIZE * 2;
1310 1500
1311 if (size == buffer_size) 1501 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1312 return size;
1313
1314 atomic_inc(&buffer->record_disabled);
1315 1502
1316 /* Make sure all writers are done with this buffer. */ 1503 /*
1317 synchronize_sched(); 1504 * Don't succeed if resizing is disabled, as a reader might be
1505 * manipulating the ring buffer and is expecting a sane state while
1506 * this is true.
1507 */
1508 if (atomic_read(&buffer->resize_disabled))
1509 return -EBUSY;
1318 1510
1511 /* prevent another thread from changing buffer sizes */
1319 mutex_lock(&buffer->mutex); 1512 mutex_lock(&buffer->mutex);
1320 get_online_cpus();
1321
1322 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1323 1513
1324 if (size < buffer_size) { 1514 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1515 /* calculate the pages to update */
1516 for_each_buffer_cpu(buffer, cpu) {
1517 cpu_buffer = buffer->buffers[cpu];
1325 1518
1326 /* easy case, just free pages */ 1519 cpu_buffer->nr_pages_to_update = nr_pages -
1327 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) 1520 cpu_buffer->nr_pages;
1328 goto out_fail; 1521 /*
1522 * nothing more to do for removing pages or no update
1523 */
1524 if (cpu_buffer->nr_pages_to_update <= 0)
1525 continue;
1526 /*
1527 * to add pages, make sure all new pages can be
1528 * allocated without receiving ENOMEM
1529 */
1530 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1531 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1532 &cpu_buffer->new_pages, cpu)) {
1533 /* not enough memory for new pages */
1534 err = -ENOMEM;
1535 goto out_err;
1536 }
1537 }
1329 1538
1330 rm_pages = buffer->pages - nr_pages; 1539 get_online_cpus();
1540 /*
1541 * Fire off all the required work handlers
1542 * We can't schedule on offline CPUs, but it's not necessary
1543 * since we can change their buffer sizes without any race.
1544 */
1545 for_each_buffer_cpu(buffer, cpu) {
1546 cpu_buffer = buffer->buffers[cpu];
1547 if (!cpu_buffer->nr_pages_to_update)
1548 continue;
1549
1550 if (cpu_online(cpu))
1551 schedule_work_on(cpu,
1552 &cpu_buffer->update_pages_work);
1553 else
1554 rb_update_pages(cpu_buffer);
1555 }
1331 1556
1557 /* wait for all the updates to complete */
1332 for_each_buffer_cpu(buffer, cpu) { 1558 for_each_buffer_cpu(buffer, cpu) {
1333 cpu_buffer = buffer->buffers[cpu]; 1559 cpu_buffer = buffer->buffers[cpu];
1334 rb_remove_pages(cpu_buffer, rm_pages); 1560 if (!cpu_buffer->nr_pages_to_update)
1561 continue;
1562
1563 if (cpu_online(cpu))
1564 wait_for_completion(&cpu_buffer->update_done);
1565 cpu_buffer->nr_pages_to_update = 0;
1335 } 1566 }
1336 goto out;
1337 }
1338 1567
1339 /* 1568 put_online_cpus();
1340 * This is a bit more difficult. We only want to add pages 1569 } else {
1341 * when we can allocate enough for all CPUs. We do this 1570 cpu_buffer = buffer->buffers[cpu_id];
1342 * by allocating all the pages and storing them on a local
1343 * link list. If we succeed in our allocation, then we
1344 * add these pages to the cpu_buffers. Otherwise we just free
1345 * them all and return -ENOMEM;
1346 */
1347 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1348 goto out_fail;
1349 1571
1350 new_pages = nr_pages - buffer->pages; 1572 if (nr_pages == cpu_buffer->nr_pages)
1573 goto out;
1351 1574
1352 for_each_buffer_cpu(buffer, cpu) { 1575 cpu_buffer->nr_pages_to_update = nr_pages -
1353 for (i = 0; i < new_pages; i++) { 1576 cpu_buffer->nr_pages;
1354 struct page *page; 1577
1355 /* 1578 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1356 * __GFP_NORETRY flag makes sure that the allocation 1579 if (cpu_buffer->nr_pages_to_update > 0 &&
1357 * fails gracefully without invoking oom-killer and 1580 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1358 * the system is not destabilized. 1581 &cpu_buffer->new_pages, cpu_id)) {
1359 */ 1582 err = -ENOMEM;
1360 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1583 goto out_err;
1361 cache_line_size()),
1362 GFP_KERNEL | __GFP_NORETRY,
1363 cpu_to_node(cpu));
1364 if (!bpage)
1365 goto free_pages;
1366 list_add(&bpage->list, &pages);
1367 page = alloc_pages_node(cpu_to_node(cpu),
1368 GFP_KERNEL | __GFP_NORETRY, 0);
1369 if (!page)
1370 goto free_pages;
1371 bpage->page = page_address(page);
1372 rb_init_page(bpage->page);
1373 } 1584 }
1374 }
1375 1585
1376 for_each_buffer_cpu(buffer, cpu) { 1586 get_online_cpus();
1377 cpu_buffer = buffer->buffers[cpu];
1378 rb_insert_pages(cpu_buffer, &pages, new_pages);
1379 }
1380 1587
1381 if (RB_WARN_ON(buffer, !list_empty(&pages))) 1588 if (cpu_online(cpu_id)) {
1382 goto out_fail; 1589 schedule_work_on(cpu_id,
1590 &cpu_buffer->update_pages_work);
1591 wait_for_completion(&cpu_buffer->update_done);
1592 } else
1593 rb_update_pages(cpu_buffer);
1594
1595 cpu_buffer->nr_pages_to_update = 0;
1596 put_online_cpus();
1597 }
1383 1598
1384 out: 1599 out:
1385 buffer->pages = nr_pages; 1600 /*
1386 put_online_cpus(); 1601 * The ring buffer resize can happen with the ring buffer
1602 * enabled, so that the update disturbs the tracing as little
1603 * as possible. But if the buffer is disabled, we do not need
1604 * to worry about that, and we can take the time to verify
1605 * that the buffer is not corrupt.
1606 */
1607 if (atomic_read(&buffer->record_disabled)) {
1608 atomic_inc(&buffer->record_disabled);
1609 /*
1610 * Even though the buffer was disabled, we must make sure
1611 * that it is truly disabled before calling rb_check_pages.
1612 * There could have been a race between checking
1613 * record_disable and incrementing it.
1614 */
1615 synchronize_sched();
1616 for_each_buffer_cpu(buffer, cpu) {
1617 cpu_buffer = buffer->buffers[cpu];
1618 rb_check_pages(cpu_buffer);
1619 }
1620 atomic_dec(&buffer->record_disabled);
1621 }
1622
1387 mutex_unlock(&buffer->mutex); 1623 mutex_unlock(&buffer->mutex);
1624 return size;
1388 1625
1389 atomic_dec(&buffer->record_disabled); 1626 out_err:
1627 for_each_buffer_cpu(buffer, cpu) {
1628 struct buffer_page *bpage, *tmp;
1390 1629
1391 return size; 1630 cpu_buffer = buffer->buffers[cpu];
1631 cpu_buffer->nr_pages_to_update = 0;
1392 1632
1393 free_pages: 1633 if (list_empty(&cpu_buffer->new_pages))
1394 list_for_each_entry_safe(bpage, tmp, &pages, list) { 1634 continue;
1395 list_del_init(&bpage->list);
1396 free_buffer_page(bpage);
1397 }
1398 put_online_cpus();
1399 mutex_unlock(&buffer->mutex);
1400 atomic_dec(&buffer->record_disabled);
1401 return -ENOMEM;
1402 1635
1403 /* 1636 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1404 * Something went totally wrong, and we are too paranoid 1637 list) {
1405 * to even clean up the mess. 1638 list_del_init(&bpage->list);
1406 */ 1639 free_buffer_page(bpage);
1407 out_fail: 1640 }
1408 put_online_cpus(); 1641 }
1409 mutex_unlock(&buffer->mutex); 1642 mutex_unlock(&buffer->mutex);
1410 atomic_dec(&buffer->record_disabled); 1643 return err;
1411 return -1;
1412} 1644}
1413EXPORT_SYMBOL_GPL(ring_buffer_resize); 1645EXPORT_SYMBOL_GPL(ring_buffer_resize);
1414 1646
@@ -1447,21 +1679,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
1447 return __rb_page_index(iter->head_page, iter->head); 1679 return __rb_page_index(iter->head_page, iter->head);
1448} 1680}
1449 1681
1450static inline unsigned long rb_page_write(struct buffer_page *bpage)
1451{
1452 return local_read(&bpage->write) & RB_WRITE_MASK;
1453}
1454
1455static inline unsigned rb_page_commit(struct buffer_page *bpage) 1682static inline unsigned rb_page_commit(struct buffer_page *bpage)
1456{ 1683{
1457 return local_read(&bpage->page->commit); 1684 return local_read(&bpage->page->commit);
1458} 1685}
1459 1686
1460static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1461{
1462 return local_read(&bpage->entries) & RB_WRITE_MASK;
1463}
1464
1465/* Size is determined by what has been committed */ 1687/* Size is determined by what has been committed */
1466static inline unsigned rb_page_size(struct buffer_page *bpage) 1688static inline unsigned rb_page_size(struct buffer_page *bpage)
1467{ 1689{
@@ -1510,7 +1732,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1510 * assign the commit to the tail. 1732 * assign the commit to the tail.
1511 */ 1733 */
1512 again: 1734 again:
1513 max_count = cpu_buffer->buffer->pages * 100; 1735 max_count = cpu_buffer->nr_pages * 100;
1514 1736
1515 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1737 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1516 if (RB_WARN_ON(cpu_buffer, !(--max_count))) 1738 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3486,6 +3708,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3486 3708
3487 iter->cpu_buffer = cpu_buffer; 3709 iter->cpu_buffer = cpu_buffer;
3488 3710
3711 atomic_inc(&buffer->resize_disabled);
3489 atomic_inc(&cpu_buffer->record_disabled); 3712 atomic_inc(&cpu_buffer->record_disabled);
3490 3713
3491 return iter; 3714 return iter;
@@ -3548,7 +3771,14 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
3548{ 3771{
3549 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3772 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3550 3773
3774 /*
3775 * Ring buffer is disabled from recording, here's a good place
3776 * to check the integrity of the ring buffer.
3777 */
3778 rb_check_pages(cpu_buffer);
3779
3551 atomic_dec(&cpu_buffer->record_disabled); 3780 atomic_dec(&cpu_buffer->record_disabled);
3781 atomic_dec(&cpu_buffer->buffer->resize_disabled);
3552 kfree(iter); 3782 kfree(iter);
3553} 3783}
3554EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 3784EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
@@ -3588,9 +3818,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
3588 * ring_buffer_size - return the size of the ring buffer (in bytes) 3818 * ring_buffer_size - return the size of the ring buffer (in bytes)
3589 * @buffer: The ring buffer. 3819 * @buffer: The ring buffer.
3590 */ 3820 */
3591unsigned long ring_buffer_size(struct ring_buffer *buffer) 3821unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3592{ 3822{
3593 return BUF_PAGE_SIZE * buffer->pages; 3823 /*
3824 * Earlier, this method returned
3825 * BUF_PAGE_SIZE * buffer->nr_pages
3826 * Since the nr_pages field is now removed, we have converted this to
3827 * return the per cpu buffer value.
3828 */
3829 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3830 return 0;
3831
3832 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3594} 3833}
3595EXPORT_SYMBOL_GPL(ring_buffer_size); 3834EXPORT_SYMBOL_GPL(ring_buffer_size);
3596 3835
@@ -3611,6 +3850,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3611 cpu_buffer->commit_page = cpu_buffer->head_page; 3850 cpu_buffer->commit_page = cpu_buffer->head_page;
3612 3851
3613 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 3852 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3853 INIT_LIST_HEAD(&cpu_buffer->new_pages);
3614 local_set(&cpu_buffer->reader_page->write, 0); 3854 local_set(&cpu_buffer->reader_page->write, 0);
3615 local_set(&cpu_buffer->reader_page->entries, 0); 3855 local_set(&cpu_buffer->reader_page->entries, 0);
3616 local_set(&cpu_buffer->reader_page->page->commit, 0); 3856 local_set(&cpu_buffer->reader_page->page->commit, 0);
@@ -3647,8 +3887,12 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3647 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3887 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3648 return; 3888 return;
3649 3889
3890 atomic_inc(&buffer->resize_disabled);
3650 atomic_inc(&cpu_buffer->record_disabled); 3891 atomic_inc(&cpu_buffer->record_disabled);
3651 3892
3893 /* Make sure all commits have finished */
3894 synchronize_sched();
3895
3652 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3896 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3653 3897
3654 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3898 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
@@ -3664,6 +3908,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3664 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3908 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3665 3909
3666 atomic_dec(&cpu_buffer->record_disabled); 3910 atomic_dec(&cpu_buffer->record_disabled);
3911 atomic_dec(&buffer->resize_disabled);
3667} 3912}
3668EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 3913EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
3669 3914
@@ -3765,8 +4010,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3765 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 4010 !cpumask_test_cpu(cpu, buffer_b->cpumask))
3766 goto out; 4011 goto out;
3767 4012
4013 cpu_buffer_a = buffer_a->buffers[cpu];
4014 cpu_buffer_b = buffer_b->buffers[cpu];
4015
3768 /* At least make sure the two buffers are somewhat the same */ 4016 /* At least make sure the two buffers are somewhat the same */
3769 if (buffer_a->pages != buffer_b->pages) 4017 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
3770 goto out; 4018 goto out;
3771 4019
3772 ret = -EAGAIN; 4020 ret = -EAGAIN;
@@ -3780,9 +4028,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3780 if (atomic_read(&buffer_b->record_disabled)) 4028 if (atomic_read(&buffer_b->record_disabled))
3781 goto out; 4029 goto out;
3782 4030
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3786 if (atomic_read(&cpu_buffer_a->record_disabled)) 4031 if (atomic_read(&cpu_buffer_a->record_disabled))
3787 goto out; 4032 goto out;
3788 4033
@@ -4071,6 +4316,8 @@ static int rb_cpu_notify(struct notifier_block *self,
4071 struct ring_buffer *buffer = 4316 struct ring_buffer *buffer =
4072 container_of(self, struct ring_buffer, cpu_notify); 4317 container_of(self, struct ring_buffer, cpu_notify);
4073 long cpu = (long)hcpu; 4318 long cpu = (long)hcpu;
4319 int cpu_i, nr_pages_same;
4320 unsigned int nr_pages;
4074 4321
4075 switch (action) { 4322 switch (action) {
4076 case CPU_UP_PREPARE: 4323 case CPU_UP_PREPARE:
@@ -4078,8 +4325,23 @@ static int rb_cpu_notify(struct notifier_block *self,
4078 if (cpumask_test_cpu(cpu, buffer->cpumask)) 4325 if (cpumask_test_cpu(cpu, buffer->cpumask))
4079 return NOTIFY_OK; 4326 return NOTIFY_OK;
4080 4327
4328 nr_pages = 0;
4329 nr_pages_same = 1;
4330 /* check if all cpu sizes are same */
4331 for_each_buffer_cpu(buffer, cpu_i) {
4332 /* fill in the size from first enabled cpu */
4333 if (nr_pages == 0)
4334 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4335 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4336 nr_pages_same = 0;
4337 break;
4338 }
4339 }
4340 /* allocate minimum pages, user can later expand it */
4341 if (!nr_pages_same)
4342 nr_pages = 2;
4081 buffer->buffers[cpu] = 4343 buffer->buffers[cpu] =
4082 rb_allocate_cpu_buffer(buffer, cpu); 4344 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4083 if (!buffer->buffers[cpu]) { 4345 if (!buffer->buffers[cpu]) {
4084 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 4346 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4085 cpu); 4347 cpu);