aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c585
1 files changed, 421 insertions, 164 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..6420cda62336 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,6 +23,8 @@
23#include <asm/local.h> 23#include <asm/local.h>
24#include "trace.h" 24#include "trace.h"
25 25
26static void update_pages_handler(struct work_struct *work);
27
26/* 28/*
27 * The ring buffer header is special. We must manually up keep it. 29 * The ring buffer header is special. We must manually up keep it.
28 */ 30 */
@@ -449,6 +451,7 @@ struct ring_buffer_per_cpu {
449 raw_spinlock_t reader_lock; /* serialize readers */ 451 raw_spinlock_t reader_lock; /* serialize readers */
450 arch_spinlock_t lock; 452 arch_spinlock_t lock;
451 struct lock_class_key lock_key; 453 struct lock_class_key lock_key;
454 unsigned int nr_pages;
452 struct list_head *pages; 455 struct list_head *pages;
453 struct buffer_page *head_page; /* read from head */ 456 struct buffer_page *head_page; /* read from head */
454 struct buffer_page *tail_page; /* write to tail */ 457 struct buffer_page *tail_page; /* write to tail */
@@ -466,13 +469,18 @@ struct ring_buffer_per_cpu {
466 unsigned long read_bytes; 469 unsigned long read_bytes;
467 u64 write_stamp; 470 u64 write_stamp;
468 u64 read_stamp; 471 u64 read_stamp;
472 /* ring buffer pages to update, > 0 to add, < 0 to remove */
473 int nr_pages_to_update;
474 struct list_head new_pages; /* new pages to add */
475 struct work_struct update_pages_work;
476 struct completion update_done;
469}; 477};
470 478
471struct ring_buffer { 479struct ring_buffer {
472 unsigned pages;
473 unsigned flags; 480 unsigned flags;
474 int cpus; 481 int cpus;
475 atomic_t record_disabled; 482 atomic_t record_disabled;
483 atomic_t resize_disabled;
476 cpumask_var_t cpumask; 484 cpumask_var_t cpumask;
477 485
478 struct lock_class_key *reader_lock_key; 486 struct lock_class_key *reader_lock_key;
@@ -937,6 +945,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
937 struct list_head *head = cpu_buffer->pages; 945 struct list_head *head = cpu_buffer->pages;
938 struct buffer_page *bpage, *tmp; 946 struct buffer_page *bpage, *tmp;
939 947
948 /* Reset the head page if it exists */
949 if (cpu_buffer->head_page)
950 rb_set_head_page(cpu_buffer);
951
940 rb_head_page_deactivate(cpu_buffer); 952 rb_head_page_deactivate(cpu_buffer);
941 953
942 if (RB_WARN_ON(cpu_buffer, head->next->prev != head)) 954 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
@@ -963,14 +975,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
963 return 0; 975 return 0;
964} 976}
965 977
966static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, 978static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
967 unsigned nr_pages)
968{ 979{
980 int i;
969 struct buffer_page *bpage, *tmp; 981 struct buffer_page *bpage, *tmp;
970 LIST_HEAD(pages);
971 unsigned i;
972
973 WARN_ON(!nr_pages);
974 982
975 for (i = 0; i < nr_pages; i++) { 983 for (i = 0; i < nr_pages; i++) {
976 struct page *page; 984 struct page *page;
@@ -981,15 +989,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
981 */ 989 */
982 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 990 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
983 GFP_KERNEL | __GFP_NORETRY, 991 GFP_KERNEL | __GFP_NORETRY,
984 cpu_to_node(cpu_buffer->cpu)); 992 cpu_to_node(cpu));
985 if (!bpage) 993 if (!bpage)
986 goto free_pages; 994 goto free_pages;
987 995
988 rb_check_bpage(cpu_buffer, bpage); 996 list_add(&bpage->list, pages);
989 997
990 list_add(&bpage->list, &pages); 998 page = alloc_pages_node(cpu_to_node(cpu),
991
992 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
993 GFP_KERNEL | __GFP_NORETRY, 0); 999 GFP_KERNEL | __GFP_NORETRY, 0);
994 if (!page) 1000 if (!page)
995 goto free_pages; 1001 goto free_pages;
@@ -997,6 +1003,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 rb_init_page(bpage->page); 1003 rb_init_page(bpage->page);
998 } 1004 }
999 1005
1006 return 0;
1007
1008free_pages:
1009 list_for_each_entry_safe(bpage, tmp, pages, list) {
1010 list_del_init(&bpage->list);
1011 free_buffer_page(bpage);
1012 }
1013
1014 return -ENOMEM;
1015}
1016
1017static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1018 unsigned nr_pages)
1019{
1020 LIST_HEAD(pages);
1021
1022 WARN_ON(!nr_pages);
1023
1024 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1025 return -ENOMEM;
1026
1000 /* 1027 /*
1001 * The ring buffer page list is a circular list that does not 1028 * The ring buffer page list is a circular list that does not
1002 * start and end with a list head. All page list items point to 1029 * start and end with a list head. All page list items point to
@@ -1005,20 +1032,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1005 cpu_buffer->pages = pages.next; 1032 cpu_buffer->pages = pages.next;
1006 list_del(&pages); 1033 list_del(&pages);
1007 1034
1035 cpu_buffer->nr_pages = nr_pages;
1036
1008 rb_check_pages(cpu_buffer); 1037 rb_check_pages(cpu_buffer);
1009 1038
1010 return 0; 1039 return 0;
1011
1012 free_pages:
1013 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1014 list_del_init(&bpage->list);
1015 free_buffer_page(bpage);
1016 }
1017 return -ENOMEM;
1018} 1040}
1019 1041
1020static struct ring_buffer_per_cpu * 1042static struct ring_buffer_per_cpu *
1021rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 1043rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1022{ 1044{
1023 struct ring_buffer_per_cpu *cpu_buffer; 1045 struct ring_buffer_per_cpu *cpu_buffer;
1024 struct buffer_page *bpage; 1046 struct buffer_page *bpage;
@@ -1035,6 +1057,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1035 raw_spin_lock_init(&cpu_buffer->reader_lock); 1057 raw_spin_lock_init(&cpu_buffer->reader_lock);
1036 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1058 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1037 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 1059 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1060 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1061 init_completion(&cpu_buffer->update_done);
1038 1062
1039 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1063 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1040 GFP_KERNEL, cpu_to_node(cpu)); 1064 GFP_KERNEL, cpu_to_node(cpu));
@@ -1052,7 +1076,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1052 1076
1053 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1054 1078
1055 ret = rb_allocate_pages(cpu_buffer, buffer->pages); 1079 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1056 if (ret < 0) 1080 if (ret < 0)
1057 goto fail_free_reader; 1081 goto fail_free_reader;
1058 1082
@@ -1113,7 +1137,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1113{ 1137{
1114 struct ring_buffer *buffer; 1138 struct ring_buffer *buffer;
1115 int bsize; 1139 int bsize;
1116 int cpu; 1140 int cpu, nr_pages;
1117 1141
1118 /* keep it in its own cache line */ 1142 /* keep it in its own cache line */
1119 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), 1143 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1148,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1124 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) 1148 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1125 goto fail_free_buffer; 1149 goto fail_free_buffer;
1126 1150
1127 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1151 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1128 buffer->flags = flags; 1152 buffer->flags = flags;
1129 buffer->clock = trace_clock_local; 1153 buffer->clock = trace_clock_local;
1130 buffer->reader_lock_key = key; 1154 buffer->reader_lock_key = key;
1131 1155
1132 /* need at least two pages */ 1156 /* need at least two pages */
1133 if (buffer->pages < 2) 1157 if (nr_pages < 2)
1134 buffer->pages = 2; 1158 nr_pages = 2;
1135 1159
1136 /* 1160 /*
1137 * In case of non-hotplug cpu, if the ring-buffer is allocated 1161 * In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1178,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1154 1178
1155 for_each_buffer_cpu(buffer, cpu) { 1179 for_each_buffer_cpu(buffer, cpu) {
1156 buffer->buffers[cpu] = 1180 buffer->buffers[cpu] =
1157 rb_allocate_cpu_buffer(buffer, cpu); 1181 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1158 if (!buffer->buffers[cpu]) 1182 if (!buffer->buffers[cpu])
1159 goto fail_free_buffers; 1183 goto fail_free_buffers;
1160 } 1184 }
@@ -1222,58 +1246,222 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
1222 1246
1223static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 1247static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1224 1248
1225static void 1249static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1226rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1227{ 1250{
1228 struct buffer_page *bpage; 1251 return local_read(&bpage->entries) & RB_WRITE_MASK;
1229 struct list_head *p; 1252}
1230 unsigned i; 1253
1254static inline unsigned long rb_page_write(struct buffer_page *bpage)
1255{
1256 return local_read(&bpage->write) & RB_WRITE_MASK;
1257}
1258
1259static int
1260rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1261{
1262 struct list_head *tail_page, *to_remove, *next_page;
1263 struct buffer_page *to_remove_page, *tmp_iter_page;
1264 struct buffer_page *last_page, *first_page;
1265 unsigned int nr_removed;
1266 unsigned long head_bit;
1267 int page_entries;
1268
1269 head_bit = 0;
1231 1270
1232 raw_spin_lock_irq(&cpu_buffer->reader_lock); 1271 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1233 rb_head_page_deactivate(cpu_buffer); 1272 atomic_inc(&cpu_buffer->record_disabled);
1273 /*
1274 * We don't race with the readers since we have acquired the reader
1275 * lock. We also don't race with writers after disabling recording.
1276 * This makes it easy to figure out the first and the last page to be
1277 * removed from the list. We unlink all the pages in between including
1278 * the first and last pages. This is done in a busy loop so that we
1279 * lose the least number of traces.
1280 * The pages are freed after we restart recording and unlock readers.
1281 */
1282 tail_page = &cpu_buffer->tail_page->list;
1234 1283
1235 for (i = 0; i < nr_pages; i++) { 1284 /*
1236 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1285 * tail page might be on reader page, we remove the next page
1237 goto out; 1286 * from the ring buffer
1238 p = cpu_buffer->pages->next; 1287 */
1239 bpage = list_entry(p, struct buffer_page, list); 1288 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1240 list_del_init(&bpage->list); 1289 tail_page = rb_list_head(tail_page->next);
1241 free_buffer_page(bpage); 1290 to_remove = tail_page;
1291
1292 /* start of pages to remove */
1293 first_page = list_entry(rb_list_head(to_remove->next),
1294 struct buffer_page, list);
1295
1296 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1297 to_remove = rb_list_head(to_remove)->next;
1298 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1242 } 1299 }
1243 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1244 goto out;
1245 1300
1246 rb_reset_cpu(cpu_buffer); 1301 next_page = rb_list_head(to_remove)->next;
1247 rb_check_pages(cpu_buffer);
1248 1302
1249out: 1303 /*
1304 * Now we remove all pages between tail_page and next_page.
1305 * Make sure that we have head_bit value preserved for the
1306 * next page
1307 */
1308 tail_page->next = (struct list_head *)((unsigned long)next_page |
1309 head_bit);
1310 next_page = rb_list_head(next_page);
1311 next_page->prev = tail_page;
1312
1313 /* make sure pages points to a valid page in the ring buffer */
1314 cpu_buffer->pages = next_page;
1315
1316 /* update head page */
1317 if (head_bit)
1318 cpu_buffer->head_page = list_entry(next_page,
1319 struct buffer_page, list);
1320
1321 /*
1322 * change read pointer to make sure any read iterators reset
1323 * themselves
1324 */
1325 cpu_buffer->read = 0;
1326
1327 /* pages are removed, resume tracing and then free the pages */
1328 atomic_dec(&cpu_buffer->record_disabled);
1250 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1329 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1330
1331 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1332
1333 /* last buffer page to remove */
1334 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1335 list);
1336 tmp_iter_page = first_page;
1337
1338 do {
1339 to_remove_page = tmp_iter_page;
1340 rb_inc_page(cpu_buffer, &tmp_iter_page);
1341
1342 /* update the counters */
1343 page_entries = rb_page_entries(to_remove_page);
1344 if (page_entries) {
1345 /*
1346 * If something was added to this page, it was full
1347 * since it is not the tail page. So we deduct the
1348 * bytes consumed in ring buffer from here.
1349 * No need to update overruns, since this page is
1350 * deleted from ring buffer and its entries are
1351 * already accounted for.
1352 */
1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1354 }
1355
1356 /*
1357 * We have already removed references to this list item, just
1358 * free up the buffer_page and its page
1359 */
1360 free_buffer_page(to_remove_page);
1361 nr_removed--;
1362
1363 } while (to_remove_page != last_page);
1364
1365 RB_WARN_ON(cpu_buffer, nr_removed);
1366
1367 return nr_removed == 0;
1251} 1368}
1252 1369
1253static void 1370static int
1254rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 1371rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1255 struct list_head *pages, unsigned nr_pages)
1256{ 1372{
1257 struct buffer_page *bpage; 1373 struct list_head *pages = &cpu_buffer->new_pages;
1258 struct list_head *p; 1374 int retries, success;
1259 unsigned i;
1260 1375
1261 raw_spin_lock_irq(&cpu_buffer->reader_lock); 1376 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1262 rb_head_page_deactivate(cpu_buffer); 1377 /*
1378 * We are holding the reader lock, so the reader page won't be swapped
1379 * in the ring buffer. Now we are racing with the writer trying to
1380 * move head page and the tail page.
1381 * We are going to adapt the reader page update process where:
1382 * 1. We first splice the start and end of list of new pages between
1383 * the head page and its previous page.
1384 * 2. We cmpxchg the prev_page->next to point from head page to the
1385 * start of new pages list.
1386 * 3. Finally, we update the head->prev to the end of new list.
1387 *
1388 * We will try this process 10 times, to make sure that we don't keep
1389 * spinning.
1390 */
1391 retries = 10;
1392 success = 0;
1393 while (retries--) {
1394 struct list_head *head_page, *prev_page, *r;
1395 struct list_head *last_page, *first_page;
1396 struct list_head *head_page_with_bit;
1263 1397
1264 for (i = 0; i < nr_pages; i++) { 1398 head_page = &rb_set_head_page(cpu_buffer)->list;
1265 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1399 prev_page = head_page->prev;
1266 goto out; 1400
1267 p = pages->next; 1401 first_page = pages->next;
1268 bpage = list_entry(p, struct buffer_page, list); 1402 last_page = pages->prev;
1269 list_del_init(&bpage->list); 1403
1270 list_add_tail(&bpage->list, cpu_buffer->pages); 1404 head_page_with_bit = (struct list_head *)
1405 ((unsigned long)head_page | RB_PAGE_HEAD);
1406
1407 last_page->next = head_page_with_bit;
1408 first_page->prev = prev_page;
1409
1410 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1411
1412 if (r == head_page_with_bit) {
1413 /*
1414 * yay, we replaced the page pointer to our new list,
1415 * now, we just have to update to head page's prev
1416 * pointer to point to end of list
1417 */
1418 head_page->prev = last_page;
1419 success = 1;
1420 break;
1421 }
1271 } 1422 }
1272 rb_reset_cpu(cpu_buffer);
1273 rb_check_pages(cpu_buffer);
1274 1423
1275out: 1424 if (success)
1425 INIT_LIST_HEAD(pages);
1426 /*
1427 * If we weren't successful in adding in new pages, warn and stop
1428 * tracing
1429 */
1430 RB_WARN_ON(cpu_buffer, !success);
1276 raw_spin_unlock_irq(&cpu_buffer->reader_lock); 1431 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1432
1433 /* free pages if they weren't inserted */
1434 if (!success) {
1435 struct buffer_page *bpage, *tmp;
1436 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1437 list) {
1438 list_del_init(&bpage->list);
1439 free_buffer_page(bpage);
1440 }
1441 }
1442 return success;
1443}
1444
1445static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 int success;
1448
1449 if (cpu_buffer->nr_pages_to_update > 0)
1450 success = rb_insert_pages(cpu_buffer);
1451 else
1452 success = rb_remove_pages(cpu_buffer,
1453 -cpu_buffer->nr_pages_to_update);
1454
1455 if (success)
1456 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1457}
1458
1459static void update_pages_handler(struct work_struct *work)
1460{
1461 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1462 struct ring_buffer_per_cpu, update_pages_work);
1463 rb_update_pages(cpu_buffer);
1464 complete(&cpu_buffer->update_done);
1277} 1465}
1278 1466
1279/** 1467/**
@@ -1283,16 +1471,14 @@ out:
1283 * 1471 *
1284 * Minimum size is 2 * BUF_PAGE_SIZE. 1472 * Minimum size is 2 * BUF_PAGE_SIZE.
1285 * 1473 *
1286 * Returns -1 on failure. 1474 * Returns 0 on success and < 0 on failure.
1287 */ 1475 */
1288int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) 1476int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1477 int cpu_id)
1289{ 1478{
1290 struct ring_buffer_per_cpu *cpu_buffer; 1479 struct ring_buffer_per_cpu *cpu_buffer;
1291 unsigned nr_pages, rm_pages, new_pages; 1480 unsigned nr_pages;
1292 struct buffer_page *bpage, *tmp; 1481 int cpu, err = 0;
1293 unsigned long buffer_size;
1294 LIST_HEAD(pages);
1295 int i, cpu;
1296 1482
1297 /* 1483 /*
1298 * Always succeed at resizing a non-existent buffer: 1484 * Always succeed at resizing a non-existent buffer:
@@ -1302,113 +1488,154 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1302 1488
1303 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1489 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1304 size *= BUF_PAGE_SIZE; 1490 size *= BUF_PAGE_SIZE;
1305 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1306 1491
1307 /* we need a minimum of two pages */ 1492 /* we need a minimum of two pages */
1308 if (size < BUF_PAGE_SIZE * 2) 1493 if (size < BUF_PAGE_SIZE * 2)
1309 size = BUF_PAGE_SIZE * 2; 1494 size = BUF_PAGE_SIZE * 2;
1310 1495
1311 if (size == buffer_size) 1496 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1312 return size;
1313
1314 atomic_inc(&buffer->record_disabled);
1315 1497
1316 /* Make sure all writers are done with this buffer. */ 1498 /*
1317 synchronize_sched(); 1499 * Don't succeed if resizing is disabled, as a reader might be
1500 * manipulating the ring buffer and is expecting a sane state while
1501 * this is true.
1502 */
1503 if (atomic_read(&buffer->resize_disabled))
1504 return -EBUSY;
1318 1505
1506 /* prevent another thread from changing buffer sizes */
1319 mutex_lock(&buffer->mutex); 1507 mutex_lock(&buffer->mutex);
1320 get_online_cpus();
1321
1322 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1323 1508
1324 if (size < buffer_size) { 1509 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1510 /* calculate the pages to update */
1511 for_each_buffer_cpu(buffer, cpu) {
1512 cpu_buffer = buffer->buffers[cpu];
1325 1513
1326 /* easy case, just free pages */ 1514 cpu_buffer->nr_pages_to_update = nr_pages -
1327 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) 1515 cpu_buffer->nr_pages;
1328 goto out_fail; 1516 /*
1517 * nothing more to do for removing pages or no update
1518 */
1519 if (cpu_buffer->nr_pages_to_update <= 0)
1520 continue;
1521 /*
1522 * to add pages, make sure all new pages can be
1523 * allocated without receiving ENOMEM
1524 */
1525 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1526 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1527 &cpu_buffer->new_pages, cpu)) {
1528 /* not enough memory for new pages */
1529 err = -ENOMEM;
1530 goto out_err;
1531 }
1532 }
1329 1533
1330 rm_pages = buffer->pages - nr_pages; 1534 get_online_cpus();
1535 /*
1536 * Fire off all the required work handlers
1537 * We can't schedule on offline CPUs, but it's not necessary
1538 * since we can change their buffer sizes without any race.
1539 */
1540 for_each_buffer_cpu(buffer, cpu) {
1541 cpu_buffer = buffer->buffers[cpu];
1542 if (!cpu_buffer->nr_pages_to_update)
1543 continue;
1544
1545 if (cpu_online(cpu))
1546 schedule_work_on(cpu,
1547 &cpu_buffer->update_pages_work);
1548 else
1549 rb_update_pages(cpu_buffer);
1550 }
1331 1551
1552 /* wait for all the updates to complete */
1332 for_each_buffer_cpu(buffer, cpu) { 1553 for_each_buffer_cpu(buffer, cpu) {
1333 cpu_buffer = buffer->buffers[cpu]; 1554 cpu_buffer = buffer->buffers[cpu];
1334 rb_remove_pages(cpu_buffer, rm_pages); 1555 if (!cpu_buffer->nr_pages_to_update)
1556 continue;
1557
1558 if (cpu_online(cpu))
1559 wait_for_completion(&cpu_buffer->update_done);
1560 cpu_buffer->nr_pages_to_update = 0;
1335 } 1561 }
1336 goto out;
1337 }
1338 1562
1339 /* 1563 put_online_cpus();
1340 * This is a bit more difficult. We only want to add pages 1564 } else {
1341 * when we can allocate enough for all CPUs. We do this 1565 cpu_buffer = buffer->buffers[cpu_id];
1342 * by allocating all the pages and storing them on a local
1343 * link list. If we succeed in our allocation, then we
1344 * add these pages to the cpu_buffers. Otherwise we just free
1345 * them all and return -ENOMEM;
1346 */
1347 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1348 goto out_fail;
1349 1566
1350 new_pages = nr_pages - buffer->pages; 1567 if (nr_pages == cpu_buffer->nr_pages)
1568 goto out;
1351 1569
1352 for_each_buffer_cpu(buffer, cpu) { 1570 cpu_buffer->nr_pages_to_update = nr_pages -
1353 for (i = 0; i < new_pages; i++) { 1571 cpu_buffer->nr_pages;
1354 struct page *page; 1572
1355 /* 1573 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1356 * __GFP_NORETRY flag makes sure that the allocation 1574 if (cpu_buffer->nr_pages_to_update > 0 &&
1357 * fails gracefully without invoking oom-killer and 1575 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1358 * the system is not destabilized. 1576 &cpu_buffer->new_pages, cpu_id)) {
1359 */ 1577 err = -ENOMEM;
1360 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1578 goto out_err;
1361 cache_line_size()),
1362 GFP_KERNEL | __GFP_NORETRY,
1363 cpu_to_node(cpu));
1364 if (!bpage)
1365 goto free_pages;
1366 list_add(&bpage->list, &pages);
1367 page = alloc_pages_node(cpu_to_node(cpu),
1368 GFP_KERNEL | __GFP_NORETRY, 0);
1369 if (!page)
1370 goto free_pages;
1371 bpage->page = page_address(page);
1372 rb_init_page(bpage->page);
1373 } 1579 }
1374 }
1375 1580
1376 for_each_buffer_cpu(buffer, cpu) { 1581 get_online_cpus();
1377 cpu_buffer = buffer->buffers[cpu];
1378 rb_insert_pages(cpu_buffer, &pages, new_pages);
1379 }
1380 1582
1381 if (RB_WARN_ON(buffer, !list_empty(&pages))) 1583 if (cpu_online(cpu_id)) {
1382 goto out_fail; 1584 schedule_work_on(cpu_id,
1585 &cpu_buffer->update_pages_work);
1586 wait_for_completion(&cpu_buffer->update_done);
1587 } else
1588 rb_update_pages(cpu_buffer);
1589
1590 cpu_buffer->nr_pages_to_update = 0;
1591 put_online_cpus();
1592 }
1383 1593
1384 out: 1594 out:
1385 buffer->pages = nr_pages; 1595 /*
1386 put_online_cpus(); 1596 * The ring buffer resize can happen with the ring buffer
1597 * enabled, so that the update disturbs the tracing as little
1598 * as possible. But if the buffer is disabled, we do not need
1599 * to worry about that, and we can take the time to verify
1600 * that the buffer is not corrupt.
1601 */
1602 if (atomic_read(&buffer->record_disabled)) {
1603 atomic_inc(&buffer->record_disabled);
1604 /*
1605 * Even though the buffer was disabled, we must make sure
1606 * that it is truly disabled before calling rb_check_pages.
1607 * There could have been a race between checking
1608 * record_disable and incrementing it.
1609 */
1610 synchronize_sched();
1611 for_each_buffer_cpu(buffer, cpu) {
1612 cpu_buffer = buffer->buffers[cpu];
1613 rb_check_pages(cpu_buffer);
1614 }
1615 atomic_dec(&buffer->record_disabled);
1616 }
1617
1387 mutex_unlock(&buffer->mutex); 1618 mutex_unlock(&buffer->mutex);
1619 return size;
1388 1620
1389 atomic_dec(&buffer->record_disabled); 1621 out_err:
1622 for_each_buffer_cpu(buffer, cpu) {
1623 struct buffer_page *bpage, *tmp;
1390 1624
1391 return size; 1625 cpu_buffer = buffer->buffers[cpu];
1626 cpu_buffer->nr_pages_to_update = 0;
1392 1627
1393 free_pages: 1628 if (list_empty(&cpu_buffer->new_pages))
1394 list_for_each_entry_safe(bpage, tmp, &pages, list) { 1629 continue;
1395 list_del_init(&bpage->list);
1396 free_buffer_page(bpage);
1397 }
1398 put_online_cpus();
1399 mutex_unlock(&buffer->mutex);
1400 atomic_dec(&buffer->record_disabled);
1401 return -ENOMEM;
1402 1630
1403 /* 1631 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1404 * Something went totally wrong, and we are too paranoid 1632 list) {
1405 * to even clean up the mess. 1633 list_del_init(&bpage->list);
1406 */ 1634 free_buffer_page(bpage);
1407 out_fail: 1635 }
1408 put_online_cpus(); 1636 }
1409 mutex_unlock(&buffer->mutex); 1637 mutex_unlock(&buffer->mutex);
1410 atomic_dec(&buffer->record_disabled); 1638 return err;
1411 return -1;
1412} 1639}
1413EXPORT_SYMBOL_GPL(ring_buffer_resize); 1640EXPORT_SYMBOL_GPL(ring_buffer_resize);
1414 1641
@@ -1447,21 +1674,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
1447 return __rb_page_index(iter->head_page, iter->head); 1674 return __rb_page_index(iter->head_page, iter->head);
1448} 1675}
1449 1676
1450static inline unsigned long rb_page_write(struct buffer_page *bpage)
1451{
1452 return local_read(&bpage->write) & RB_WRITE_MASK;
1453}
1454
1455static inline unsigned rb_page_commit(struct buffer_page *bpage) 1677static inline unsigned rb_page_commit(struct buffer_page *bpage)
1456{ 1678{
1457 return local_read(&bpage->page->commit); 1679 return local_read(&bpage->page->commit);
1458} 1680}
1459 1681
1460static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1461{
1462 return local_read(&bpage->entries) & RB_WRITE_MASK;
1463}
1464
1465/* Size is determined by what has been committed */ 1682/* Size is determined by what has been committed */
1466static inline unsigned rb_page_size(struct buffer_page *bpage) 1683static inline unsigned rb_page_size(struct buffer_page *bpage)
1467{ 1684{
@@ -1510,7 +1727,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1510 * assign the commit to the tail. 1727 * assign the commit to the tail.
1511 */ 1728 */
1512 again: 1729 again:
1513 max_count = cpu_buffer->buffer->pages * 100; 1730 max_count = cpu_buffer->nr_pages * 100;
1514 1731
1515 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 1732 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1516 if (RB_WARN_ON(cpu_buffer, !(--max_count))) 1733 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3486,6 +3703,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3486 3703
3487 iter->cpu_buffer = cpu_buffer; 3704 iter->cpu_buffer = cpu_buffer;
3488 3705
3706 atomic_inc(&buffer->resize_disabled);
3489 atomic_inc(&cpu_buffer->record_disabled); 3707 atomic_inc(&cpu_buffer->record_disabled);
3490 3708
3491 return iter; 3709 return iter;
@@ -3548,7 +3766,14 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
3548{ 3766{
3549 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 3767 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3550 3768
3769 /*
3770 * Ring buffer is disabled from recording, here's a good place
3771 * to check the integrity of the ring buffer.
3772 */
3773 rb_check_pages(cpu_buffer);
3774
3551 atomic_dec(&cpu_buffer->record_disabled); 3775 atomic_dec(&cpu_buffer->record_disabled);
3776 atomic_dec(&cpu_buffer->buffer->resize_disabled);
3552 kfree(iter); 3777 kfree(iter);
3553} 3778}
3554EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 3779EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
@@ -3588,9 +3813,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
3588 * ring_buffer_size - return the size of the ring buffer (in bytes) 3813 * ring_buffer_size - return the size of the ring buffer (in bytes)
3589 * @buffer: The ring buffer. 3814 * @buffer: The ring buffer.
3590 */ 3815 */
3591unsigned long ring_buffer_size(struct ring_buffer *buffer) 3816unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3592{ 3817{
3593 return BUF_PAGE_SIZE * buffer->pages; 3818 /*
3819 * Earlier, this method returned
3820 * BUF_PAGE_SIZE * buffer->nr_pages
3821 * Since the nr_pages field is now removed, we have converted this to
3822 * return the per cpu buffer value.
3823 */
3824 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3825 return 0;
3826
3827 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3594} 3828}
3595EXPORT_SYMBOL_GPL(ring_buffer_size); 3829EXPORT_SYMBOL_GPL(ring_buffer_size);
3596 3830
@@ -3611,6 +3845,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3611 cpu_buffer->commit_page = cpu_buffer->head_page; 3845 cpu_buffer->commit_page = cpu_buffer->head_page;
3612 3846
3613 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 3847 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3848 INIT_LIST_HEAD(&cpu_buffer->new_pages);
3614 local_set(&cpu_buffer->reader_page->write, 0); 3849 local_set(&cpu_buffer->reader_page->write, 0);
3615 local_set(&cpu_buffer->reader_page->entries, 0); 3850 local_set(&cpu_buffer->reader_page->entries, 0);
3616 local_set(&cpu_buffer->reader_page->page->commit, 0); 3851 local_set(&cpu_buffer->reader_page->page->commit, 0);
@@ -3647,8 +3882,12 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3647 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3882 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3648 return; 3883 return;
3649 3884
3885 atomic_inc(&buffer->resize_disabled);
3650 atomic_inc(&cpu_buffer->record_disabled); 3886 atomic_inc(&cpu_buffer->record_disabled);
3651 3887
3888 /* Make sure all commits have finished */
3889 synchronize_sched();
3890
3652 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3891 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3653 3892
3654 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3893 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
@@ -3664,6 +3903,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3664 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3903 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3665 3904
3666 atomic_dec(&cpu_buffer->record_disabled); 3905 atomic_dec(&cpu_buffer->record_disabled);
3906 atomic_dec(&buffer->resize_disabled);
3667} 3907}
3668EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 3908EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
3669 3909
@@ -3765,8 +4005,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3765 !cpumask_test_cpu(cpu, buffer_b->cpumask)) 4005 !cpumask_test_cpu(cpu, buffer_b->cpumask))
3766 goto out; 4006 goto out;
3767 4007
4008 cpu_buffer_a = buffer_a->buffers[cpu];
4009 cpu_buffer_b = buffer_b->buffers[cpu];
4010
3768 /* At least make sure the two buffers are somewhat the same */ 4011 /* At least make sure the two buffers are somewhat the same */
3769 if (buffer_a->pages != buffer_b->pages) 4012 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
3770 goto out; 4013 goto out;
3771 4014
3772 ret = -EAGAIN; 4015 ret = -EAGAIN;
@@ -3780,9 +4023,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3780 if (atomic_read(&buffer_b->record_disabled)) 4023 if (atomic_read(&buffer_b->record_disabled))
3781 goto out; 4024 goto out;
3782 4025
3783 cpu_buffer_a = buffer_a->buffers[cpu];
3784 cpu_buffer_b = buffer_b->buffers[cpu];
3785
3786 if (atomic_read(&cpu_buffer_a->record_disabled)) 4026 if (atomic_read(&cpu_buffer_a->record_disabled))
3787 goto out; 4027 goto out;
3788 4028
@@ -4071,6 +4311,8 @@ static int rb_cpu_notify(struct notifier_block *self,
4071 struct ring_buffer *buffer = 4311 struct ring_buffer *buffer =
4072 container_of(self, struct ring_buffer, cpu_notify); 4312 container_of(self, struct ring_buffer, cpu_notify);
4073 long cpu = (long)hcpu; 4313 long cpu = (long)hcpu;
4314 int cpu_i, nr_pages_same;
4315 unsigned int nr_pages;
4074 4316
4075 switch (action) { 4317 switch (action) {
4076 case CPU_UP_PREPARE: 4318 case CPU_UP_PREPARE:
@@ -4078,8 +4320,23 @@ static int rb_cpu_notify(struct notifier_block *self,
4078 if (cpumask_test_cpu(cpu, buffer->cpumask)) 4320 if (cpumask_test_cpu(cpu, buffer->cpumask))
4079 return NOTIFY_OK; 4321 return NOTIFY_OK;
4080 4322
4323 nr_pages = 0;
4324 nr_pages_same = 1;
4325 /* check if all cpu sizes are same */
4326 for_each_buffer_cpu(buffer, cpu_i) {
4327 /* fill in the size from first enabled cpu */
4328 if (nr_pages == 0)
4329 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4330 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4331 nr_pages_same = 0;
4332 break;
4333 }
4334 }
4335 /* allocate minimum pages, user can later expand it */
4336 if (!nr_pages_same)
4337 nr_pages = 2;
4081 buffer->buffers[cpu] = 4338 buffer->buffers[cpu] =
4082 rb_allocate_cpu_buffer(buffer, cpu); 4339 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4083 if (!buffer->buffers[cpu]) { 4340 if (!buffer->buffers[cpu]) {
4084 WARN(1, "failed to allocate ring buffer on CPU %ld\n", 4341 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4085 cpu); 4342 cpu);