aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-06-11 14:17:59 -0400
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:15:28 -0400
commit6fa70da6081bbcf948801fd5ee0be4d222298a43 (patch)
tree519fef6c63abe15d80173ad25c7cd5aae673df55 /net
parent0f4b1c7e89e699f588807a914ec6e6396c851a72 (diff)
rds: recycle FMRs through lockless lists
FRM allocation and recycling is performance critical and fairly lock intensive. The current code has a per connection lock that all processes bang on and it becomes a major bottleneck on large systems. This changes things to use a number of cmpxchg based lists instead, allowing us to go through the whole FMR lifecycle without locking inside RDS. Zach Brown pointed out that our usage of cmpxchg for xlist removal is racey if someone manages to remove and add back an FMR struct into the list while another CPU can see the FMR's address at the head of the list. The second CPU might assume the list hasn't changed when in fact any number of operations might have happened in between the deletion and reinsertion. This commit maintains a per cpu count of CPUs that are currently in xlist removal, and establishes a grace period to make sure that nobody can see an entry we have just removed from the list. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'net')
-rw-r--r--net/rds/ib_rdma.c214
-rw-r--r--net/rds/xlist.h110
2 files changed, 282 insertions, 42 deletions
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 64b5ede037c8..8c40391de5a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -36,7 +36,10 @@
36 36
37#include "rds.h" 37#include "rds.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
42#define CLEAN_LIST_BUSY_BIT 0
40 43
41/* 44/*
42 * This is stored as mr->r_trans_private. 45 * This is stored as mr->r_trans_private.
@@ -45,7 +48,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 48 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 49 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 50 struct ib_fmr *fmr;
48 struct list_head list; 51
52 struct xlist_head xlist;
53
54 /* unmap_list is for freeing */
55 struct list_head unmap_list;
49 unsigned int remap_count; 56 unsigned int remap_count;
50 57
51 struct scatterlist *sg; 58 struct scatterlist *sg;
@@ -61,12 +68,14 @@ struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 68 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 69 struct work_struct flush_worker; /* flush worker */
63 70
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 71 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 72 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 73
68 struct list_head free_list; /* unused MRs */ 74 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 75 struct xlist_head free_list; /* unused MRs */
76 struct xlist_head clean_list; /* global unused & unamapped MRs */
77 wait_queue_head_t flush_wait;
78
70 atomic_t free_pinned; /* memory pinned by free MRs */ 79 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 80 unsigned long max_items;
72 unsigned long max_items_soft; 81 unsigned long max_items_soft;
@@ -74,7 +83,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 83 struct ib_fmr_attr fmr_attr;
75}; 84};
76 85
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 86static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 87static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 88static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 89
@@ -212,11 +221,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
212 if (!pool) 221 if (!pool)
213 return ERR_PTR(-ENOMEM); 222 return ERR_PTR(-ENOMEM);
214 223
215 INIT_LIST_HEAD(&pool->free_list); 224 INIT_XLIST_HEAD(&pool->free_list);
216 INIT_LIST_HEAD(&pool->drop_list); 225 INIT_XLIST_HEAD(&pool->drop_list);
217 INIT_LIST_HEAD(&pool->clean_list); 226 INIT_XLIST_HEAD(&pool->clean_list);
218 mutex_init(&pool->flush_lock); 227 mutex_init(&pool->flush_lock);
219 spin_lock_init(&pool->list_lock); 228 init_waitqueue_head(&pool->flush_wait);
220 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 229 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
221 230
222 pool->fmr_attr.max_pages = fmr_message_size; 231 pool->fmr_attr.max_pages = fmr_message_size;
@@ -246,27 +255,50 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
246void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 255void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
247{ 256{
248 cancel_work_sync(&pool->flush_worker); 257 cancel_work_sync(&pool->flush_worker);
249 rds_ib_flush_mr_pool(pool, 1); 258 rds_ib_flush_mr_pool(pool, 1, NULL);
250 WARN_ON(atomic_read(&pool->item_count)); 259 WARN_ON(atomic_read(&pool->item_count));
251 WARN_ON(atomic_read(&pool->free_pinned)); 260 WARN_ON(atomic_read(&pool->free_pinned));
252 kfree(pool); 261 kfree(pool);
253} 262}
254 263
264static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
265 struct rds_ib_mr **ibmr_ret)
266{
267 struct xlist_head *ibmr_xl;
268 ibmr_xl = xlist_del_head_fast(xl);
269 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
270}
271
255static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 272static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
256{ 273{
257 struct rds_ib_mr *ibmr = NULL; 274 struct rds_ib_mr *ibmr = NULL;
258 unsigned long flags; 275 struct xlist_head *ret;
276 unsigned long *flag;
259 277
260 spin_lock_irqsave(&pool->list_lock, flags); 278 preempt_disable();
261 if (!list_empty(&pool->clean_list)) { 279 flag = &__get_cpu_var(clean_list_grace);
262 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 280 set_bit(CLEAN_LIST_BUSY_BIT, flag);
263 list_del_init(&ibmr->list); 281 ret = xlist_del_head(&pool->clean_list);
264 } 282 if (ret)
265 spin_unlock_irqrestore(&pool->list_lock, flags); 283 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
266 284
285 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
286 preempt_enable();
267 return ibmr; 287 return ibmr;
268} 288}
269 289
290static inline void wait_clean_list_grace(void)
291{
292 int cpu;
293 unsigned long *flag;
294
295 for_each_online_cpu(cpu) {
296 flag = &per_cpu(clean_list_grace, cpu);
297 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
298 cpu_relax();
299 }
300}
301
270static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 302static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
271{ 303{
272 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 304 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
@@ -299,7 +331,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
299 331
300 /* We do have some empty MRs. Flush them out. */ 332 /* We do have some empty MRs. Flush them out. */
301 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 333 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
302 rds_ib_flush_mr_pool(pool, 0); 334 rds_ib_flush_mr_pool(pool, 0, &ibmr);
335 if (ibmr)
336 return ibmr;
303 } 337 }
304 338
305 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev)); 339 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
@@ -494,33 +528,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
494} 528}
495 529
496/* 530/*
531 * given an xlist of mrs, put them all into the list_head for more processing
532 */
533static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
534{
535 struct rds_ib_mr *ibmr;
536 struct xlist_head splice;
537 struct xlist_head *cur;
538 struct xlist_head *next;
539
540 splice.next = NULL;
541 xlist_splice(xlist, &splice);
542 cur = splice.next;
543 while (cur) {
544 next = cur->next;
545 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
546 list_add_tail(&ibmr->unmap_list, list);
547 cur = next;
548 }
549}
550
551/*
552 * this takes a list head of mrs and turns it into an xlist of clusters.
553 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
554 * reuse.
555 */
556static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
557 struct list_head *list, struct xlist_head *xlist,
558 struct xlist_head **tail_ret)
559{
560 struct rds_ib_mr *ibmr;
561 struct xlist_head *cur_mr = xlist;
562 struct xlist_head *tail_mr = NULL;
563
564 list_for_each_entry(ibmr, list, unmap_list) {
565 tail_mr = &ibmr->xlist;
566 tail_mr->next = NULL;
567 cur_mr->next = tail_mr;
568 cur_mr = tail_mr;
569 }
570 *tail_ret = tail_mr;
571}
572
573/*
497 * Flush our pool of MRs. 574 * Flush our pool of MRs.
498 * At a minimum, all currently unused MRs are unmapped. 575 * At a minimum, all currently unused MRs are unmapped.
499 * If the number of MRs allocated exceeds the limit, we also try 576 * If the number of MRs allocated exceeds the limit, we also try
500 * to free as many MRs as needed to get back to this limit. 577 * to free as many MRs as needed to get back to this limit.
501 */ 578 */
502static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 579static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
580 int free_all, struct rds_ib_mr **ibmr_ret)
503{ 581{
504 struct rds_ib_mr *ibmr, *next; 582 struct rds_ib_mr *ibmr, *next;
583 struct xlist_head clean_xlist;
584 struct xlist_head *clean_tail;
505 LIST_HEAD(unmap_list); 585 LIST_HEAD(unmap_list);
506 LIST_HEAD(fmr_list); 586 LIST_HEAD(fmr_list);
507 unsigned long unpinned = 0; 587 unsigned long unpinned = 0;
508 unsigned long flags;
509 unsigned int nfreed = 0, ncleaned = 0, free_goal; 588 unsigned int nfreed = 0, ncleaned = 0, free_goal;
510 int ret = 0; 589 int ret = 0;
511 590
512 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 591 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
513 592
514 mutex_lock(&pool->flush_lock); 593 if (ibmr_ret) {
594 DEFINE_WAIT(wait);
595 while(!mutex_trylock(&pool->flush_lock)) {
596 ibmr = rds_ib_reuse_fmr(pool);
597 if (ibmr) {
598 *ibmr_ret = ibmr;
599 finish_wait(&pool->flush_wait, &wait);
600 goto out_nolock;
601 }
602
603 prepare_to_wait(&pool->flush_wait, &wait,
604 TASK_UNINTERRUPTIBLE);
605 if (xlist_empty(&pool->clean_list))
606 schedule();
607
608 ibmr = rds_ib_reuse_fmr(pool);
609 if (ibmr) {
610 *ibmr_ret = ibmr;
611 finish_wait(&pool->flush_wait, &wait);
612 goto out_nolock;
613 }
614 }
615 finish_wait(&pool->flush_wait, &wait);
616 } else
617 mutex_lock(&pool->flush_lock);
618
619 if (ibmr_ret) {
620 ibmr = rds_ib_reuse_fmr(pool);
621 if (ibmr) {
622 *ibmr_ret = ibmr;
623 goto out;
624 }
625 }
515 626
516 spin_lock_irqsave(&pool->list_lock, flags);
517 /* Get the list of all MRs to be dropped. Ordering matters - 627 /* Get the list of all MRs to be dropped. Ordering matters -
518 * we want to put drop_list ahead of free_list. */ 628 * we want to put drop_list ahead of free_list.
519 list_splice_init(&pool->free_list, &unmap_list); 629 */
520 list_splice_init(&pool->drop_list, &unmap_list); 630 xlist_append_to_list(&pool->drop_list, &unmap_list);
631 xlist_append_to_list(&pool->free_list, &unmap_list);
521 if (free_all) 632 if (free_all)
522 list_splice_init(&pool->clean_list, &unmap_list); 633 xlist_append_to_list(&pool->clean_list, &unmap_list);
523 spin_unlock_irqrestore(&pool->list_lock, flags);
524 634
525 free_goal = rds_ib_flush_goal(pool, free_all); 635 free_goal = rds_ib_flush_goal(pool, free_all);
526 636
@@ -528,19 +638,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
528 goto out; 638 goto out;
529 639
530 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 640 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
531 list_for_each_entry(ibmr, &unmap_list, list) 641 list_for_each_entry(ibmr, &unmap_list, unmap_list)
532 list_add(&ibmr->fmr->list, &fmr_list); 642 list_add(&ibmr->fmr->list, &fmr_list);
643
533 ret = ib_unmap_fmr(&fmr_list); 644 ret = ib_unmap_fmr(&fmr_list);
534 if (ret) 645 if (ret)
535 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 646 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
536 647
537 /* Now we can destroy the DMA mapping and unpin any pages */ 648 /* Now we can destroy the DMA mapping and unpin any pages */
538 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 649 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
539 unpinned += ibmr->sg_len; 650 unpinned += ibmr->sg_len;
540 __rds_ib_teardown_mr(ibmr); 651 __rds_ib_teardown_mr(ibmr);
541 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 652 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
542 rds_ib_stats_inc(s_ib_rdma_mr_free); 653 rds_ib_stats_inc(s_ib_rdma_mr_free);
543 list_del(&ibmr->list); 654 list_del(&ibmr->unmap_list);
544 ib_dealloc_fmr(ibmr->fmr); 655 ib_dealloc_fmr(ibmr->fmr);
545 kfree(ibmr); 656 kfree(ibmr);
546 nfreed++; 657 nfreed++;
@@ -548,9 +659,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
548 ncleaned++; 659 ncleaned++;
549 } 660 }
550 661
551 spin_lock_irqsave(&pool->list_lock, flags); 662 if (!list_empty(&unmap_list)) {
552 list_splice(&unmap_list, &pool->clean_list); 663 /* we have to make sure that none of the things we're about
553 spin_unlock_irqrestore(&pool->list_lock, flags); 664 * to put on the clean list would race with other cpus trying
665 * to pull items off. The xlist would explode if we managed to
666 * remove something from the clean list and then add it back again
667 * while another CPU was spinning on that same item in xlist_del_head.
668 *
669 * This is pretty unlikely, but just in case wait for an xlist grace period
670 * here before adding anything back into the clean list.
671 */
672 wait_clean_list_grace();
673
674 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
675 if (ibmr_ret)
676 refill_local(pool, &clean_xlist, ibmr_ret);
677
678 /* refill_local may have emptied our list */
679 if (!xlist_empty(&clean_xlist))
680 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
681
682 }
554 683
555 atomic_sub(unpinned, &pool->free_pinned); 684 atomic_sub(unpinned, &pool->free_pinned);
556 atomic_sub(ncleaned, &pool->dirty_count); 685 atomic_sub(ncleaned, &pool->dirty_count);
@@ -558,6 +687,9 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
558 687
559out: 688out:
560 mutex_unlock(&pool->flush_lock); 689 mutex_unlock(&pool->flush_lock);
690 if (waitqueue_active(&pool->flush_wait))
691 wake_up(&pool->flush_wait);
692out_nolock:
561 return ret; 693 return ret;
562} 694}
563 695
@@ -565,7 +697,7 @@ static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
565{ 697{
566 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 698 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker);
567 699
568 rds_ib_flush_mr_pool(pool, 0); 700 rds_ib_flush_mr_pool(pool, 0, NULL);
569} 701}
570 702
571void rds_ib_free_mr(void *trans_private, int invalidate) 703void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -573,20 +705,17 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
573 struct rds_ib_mr *ibmr = trans_private; 705 struct rds_ib_mr *ibmr = trans_private;
574 struct rds_ib_device *rds_ibdev = ibmr->device; 706 struct rds_ib_device *rds_ibdev = ibmr->device;
575 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 707 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
576 unsigned long flags;
577 708
578 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 709 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
579 710
580 /* Return it to the pool's free list */ 711 /* Return it to the pool's free list */
581 spin_lock_irqsave(&pool->list_lock, flags);
582 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 712 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
583 list_add(&ibmr->list, &pool->drop_list); 713 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
584 else 714 else
585 list_add(&ibmr->list, &pool->free_list); 715 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
586 716
587 atomic_add(ibmr->sg_len, &pool->free_pinned); 717 atomic_add(ibmr->sg_len, &pool->free_pinned);
588 atomic_inc(&pool->dirty_count); 718 atomic_inc(&pool->dirty_count);
589 spin_unlock_irqrestore(&pool->list_lock, flags);
590 719
591 /* If we've pinned too many pages, request a flush */ 720 /* If we've pinned too many pages, request a flush */
592 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 721 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
@@ -595,7 +724,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
595 724
596 if (invalidate) { 725 if (invalidate) {
597 if (likely(!in_interrupt())) { 726 if (likely(!in_interrupt())) {
598 rds_ib_flush_mr_pool(pool, 0); 727 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } else { 728 } else {
600 /* We get here if the user created a MR marked 729 /* We get here if the user created a MR marked
601 * as use_once and invalidate at the same time. */ 730 * as use_once and invalidate at the same time. */
@@ -614,7 +743,7 @@ void rds_ib_flush_mrs(void)
614 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 743 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
615 744
616 if (pool) 745 if (pool)
617 rds_ib_flush_mr_pool(pool, 0); 746 rds_ib_flush_mr_pool(pool, 0, NULL);
618 } 747 }
619} 748}
620 749
@@ -659,3 +788,4 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
659 rds_ib_dev_put(rds_ibdev); 788 rds_ib_dev_put(rds_ibdev);
660 return ibmr; 789 return ibmr;
661} 790}
791
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 000000000000..8c21aca49d50
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,110 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13/*
14 * XLIST_PTR_TAIL can be used to prevent double insertion. See
15 * xlist_protect()
16 */
17#define XLIST_PTR_TAIL ((struct xlist_head *)0x1)
18
19static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, struct xlist_head *head)
20{
21 struct xlist_head *cur;
22 struct xlist_head *check;
23
24 while (1) {
25 cur = head->next;
26 tail->next = cur;
27 check = cmpxchg(&head->next, cur, new);
28 if (check == cur)
29 break;
30 }
31}
32
33/*
34 * To avoid duplicate insertion by two CPUs of the same xlist item
35 * you can call xlist_protect. It will stuff XLIST_PTR_TAIL
36 * into the entry->next pointer with xchg, and only return 1
37 * if there was a NULL there before.
38 *
39 * if xlist_protect returns zero, someone else is busy working
40 * on this entry. Getting a NULL into the entry in a race
41 * free manner is the caller's job.
42 */
43static inline int xlist_protect(struct xlist_head *entry)
44{
45 struct xlist_head *val;
46
47 val = xchg(&entry->next, XLIST_PTR_TAIL);
48 if (val == NULL)
49 return 1;
50 return 0;
51}
52
53static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
54{
55 struct xlist_head *cur;
56 struct xlist_head *check;
57 struct xlist_head *next;
58
59 while (1) {
60 cur = head->next;
61 if (!cur)
62 goto out;
63
64 if (cur == XLIST_PTR_TAIL) {
65 cur = NULL;
66 goto out;
67 }
68
69 next = cur->next;
70 check = cmpxchg(&head->next, cur, next);
71 if (check == cur)
72 goto out;
73 }
74out:
75 return cur;
76}
77
78static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
79{
80 struct xlist_head *cur;
81
82 cur = head->next;
83 if (!cur || cur == XLIST_PTR_TAIL)
84 return NULL;
85
86 head->next = cur->next;
87 return cur;
88}
89
90static inline void xlist_splice(struct xlist_head *list,
91 struct xlist_head *head)
92{
93 struct xlist_head *cur;
94
95 WARN_ON(head->next);
96 cur = xchg(&list->next, NULL);
97 head->next = cur;
98}
99
100static inline void INIT_XLIST_HEAD(struct xlist_head *list)
101{
102 list->next = NULL;
103}
104
105static inline int xlist_empty(struct xlist_head *head)
106{
107 return head->next == NULL || head->next == XLIST_PTR_TAIL;
108}
109
110#endif