litmus-rt-ext-res.git - LITMUS^RT with extended reservations for Forbidden Zones paper @ RTAS'20

diff options

author	Ilya Dryomov <idryomov@redhat.com>	2014-11-05 07:45:58 -0500
committer	Ilya Dryomov <idryomov@redhat.com>	2014-11-13 14:21:14 -0500
commit	ba9d114ec5578e6e99a4dfa37ff8ae688040fd64 (patch)
tree	afdc6c5bbf5c1f690498e22908f3492f50f950d8 /net/ceph/osd_client.c
parent	a390de0208e7f2f8fdb2fbf970240e4f7b308037 (diff)

libceph: clear r_req_lru_item in __unregister_linger_request()

kick_requests() can put linger requests on the notarget list. This means we need to clear the much-overloaded req->r_req_lru_item in __unregister_linger_request() as well, or we get an assertion failure in ceph_osdc_release_request() - !list_empty(&req->r_req_lru_item). AFAICT the assumption was that registered linger requests cannot be on any of req->r_req_lru_item lists, but that's clearly not the case. Signed-off-by: Ilya Dryomov <idryomov@redhat.com> Reviewed-by: Alex Elder <elder@linaro.org>

Diffstat (limited to 'net/ceph/osd_client.c')

-rw-r--r--

net/ceph/osd_client.c

1 files changed, 2 insertions, 0 deletions


diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 75abaa87abac..decc3b74e65f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c
@@ -1254,6 +1254,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
1254	if (list_empty(&req->r_osd_item))	1254	if (list_empty(&req->r_osd_item))
1255	req->r_osd = NULL;	1255	req->r_osd = NULL;
1256	}	1256	}
		1257
		1258	list_del_init(&req->r_req_lru_item); /* can be on notarget */
1257	ceph_osdc_put_request(req);	1259	ceph_osdc_put_request(req);
1258	}	1260	}
1259		1261

#ifndef _LINUX_PAGEMAP_H #define _LINUX_PAGEMAP_H /* * Copyright 1995 Linus Torvalds */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/highmem.h> #include <linux/compiler.h> #include <asm/uaccess.h> #include <linux/gfp.h> #include <linux/bitops.h> #include <linux/hardirq.h> /* for in_interrupt() */ #include <linux/hugetlb_inline.h> /* * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page * allocation mode flags. */ enum mapping_flags { AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */ AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ }; static inline void mapping_set_error(struct address_space *mapping, int error) { if (unlikely(error)) { if (error == -ENOSPC) set_bit(AS_ENOSPC, &mapping->flags); else set_bit(AS_EIO, &mapping->flags); } } static inline void mapping_set_unevictable(struct address_space *mapping) { set_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_clear_unevictable(struct address_space *mapping) { clear_bit(AS_UNEVICTABLE, &mapping->flags); } static inline int mapping_unevictable(struct address_space *mapping) { if (mapping) return test_bit(AS_UNEVICTABLE, &mapping->flags); return !!mapping; } static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; } /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... */ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) { m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) | (__force unsigned long)mask; } /* * The page cache can done in larger chunks than * one page, because it allows for more efficient * throughput (it can then be mapped into user * space in smaller chunks for same flexibility). * * Or rather, it _will_ be done in larger chunks. */ #define PAGE_CACHE_SHIFT PAGE_SHIFT #define PAGE_CACHE_SIZE PAGE_SIZE #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, int cold); /* * speculatively take a reference to a page. * If the page is free (_count == 0), then _count is untouched, and 0 * is returned. Otherwise, _count is incremented by 1 and 1 is returned. * * This function must be called inside the same rcu_read_lock() section as has * been used to lookup the page in the pagecache radix-tree (or page table): * this allows allocators to use a synchronize_rcu() to stabilize _count. * * Unless an RCU grace period has passed, the count of all pages coming out * of the allocator must be considered unstable. page_count may return higher * than expected, and put_page must be able to do the right thing when the * page has been finished with, no matter what it is subsequently allocated * for (because put_page is what is used here to drop an invalid speculative * reference). * * This is the interesting part of the lockless pagecache (and lockless * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) * has the following pattern: * 1. find page in radix tree * 2. conditionally increment refcount * 3. check the page is still in pagecache (if no, goto 1) * * Remove-side that cares about stability of _count (eg. reclaim) has the * following (with tree_lock held for write): * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) * B. remove page from pagecache * C. free the page * * There are 2 critical interleavings that matter: * - 2 runs before A: in this case, A sees elevated refcount and bails out * - A runs before 2: in this case, 2 sees zero refcount and retries; * subsequently, B will complete and 1 will find no page, causing the * lookup to return NULL. * * It is possible that between 1 and 2, the page is removed then the exact same * page is inserted into the same position in pagecache. That's OK: the * old find_get_page using tree_lock could equally have run before or after * such a re-insertion, depending on order that locks are granted. * * Lookups racing against pagecache insertion isn't a big problem: either 1 * will find the page or it will not. Likewise, the old find_get_page could run * either before the insertion or afterwards, depending on timing. */ static inline int page_cache_get_speculative(struct page *page) { VM_BUG_ON(in_interrupt()); #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif /* * Preempt must be disabled here - we rely on rcu_read_lock doing * this for us. * * Pagecache won't be truncated from interrupt context, so if we have * found a page in the radix tree here, we have pinned its refcount by * disabling preempt, and hence no need for the "speculative get" that * SMP requires. */ VM_BUG_ON(page_count(page) == 0); atomic_inc(&page->_count); #else if (unlikely(!get_page_unless_zero(page))) { /* * Either the page has been freed, or will be freed. * In either case, retry here and the caller should * do the right thing (see comments above). */ return 0; } #endif VM_BUG_ON(PageTail(page)); return 1; } /* * Same as above, but add instead of inc (could just be merged) */ static inline int page_cache_add_speculative(struct page *page, int count) { VM_BUG_ON(in_interrupt()); #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif VM_BUG_ON(page_count(page) == 0);


context:
space:
mode: