aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2011-03-04 19:26:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-04 20:28:52 -0500
commite9e3d724e2145f5039b423c290ce2b2c3d8f94bc (patch)
tree9b0ff4de361fe358a5a2400b35a48206688b5f71 /fs
parent3256f80fbbc25bd2504bd564844c615227621e56 (diff)
nfs4: Ensure that ACL pages sent over NFS were not allocated from the slab (v3)
The "bad_page()" page allocator sanity check was reported recently (call chain as follows): bad_page+0x69/0x91 free_hot_cold_page+0x81/0x144 skb_release_data+0x5f/0x98 __kfree_skb+0x11/0x1a tcp_ack+0x6a3/0x1868 tcp_rcv_established+0x7a6/0x8b9 tcp_v4_do_rcv+0x2a/0x2fa tcp_v4_rcv+0x9a2/0x9f6 do_timer+0x2df/0x52c ip_local_deliver+0x19d/0x263 ip_rcv+0x539/0x57c netif_receive_skb+0x470/0x49f :virtio_net:virtnet_poll+0x46b/0x5c5 net_rx_action+0xac/0x1b3 __do_softirq+0x89/0x133 call_softirq+0x1c/0x28 do_softirq+0x2c/0x7d do_IRQ+0xec/0xf5 default_idle+0x0/0x50 ret_from_intr+0x0/0xa default_idle+0x29/0x50 cpu_idle+0x95/0xb8 start_kernel+0x220/0x225 _sinittext+0x22f/0x236 It occurs because an skb with a fraglist was freed from the tcp retransmit queue when it was acked, but a page on that fraglist had PG_Slab set (indicating it was allocated from the Slab allocator (which means the free path above can't safely free it via put_page. We tracked this back to an nfsv4 setacl operation, in which the nfs code attempted to fill convert the passed in buffer to an array of pages in __nfs4_proc_set_acl, which gets used by the skb->frags list in xs_sendpages. __nfs4_proc_set_acl just converts each page in the buffer to a page struct via virt_to_page, but the vfs allocates the buffer via kmalloc, meaning the PG_slab bit is set. We can't create a buffer with kmalloc and free it later in the tcp ack path with put_page, so we need to either: 1) ensure that when we create the list of pages, no page struct has PG_Slab set or 2) not use a page list to send this data Given that these buffers can be multiple pages and arbitrarily sized, I think (1) is the right way to go. I've written the below patch to allocate a page from the buddy allocator directly and copy the data over to it. This ensures that we have a put_page free-able page for every entry that winds up on an skb frag list, so it can be safely freed when the frame is acked. We do a put page on each entry after the rpc_call_sync call so as to drop our own reference count to the page, leaving only the ref count taken by tcp_sendpages. This way the data will be properly freed when the ack comes in Successfully tested by myself to solve the above oops. Note, as this is the result of a setacl operation that exceeded a page of data, I think this amounts to a local DOS triggerable by an uprivlidged user, so I'm CCing security on this as well. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: Trond Myklebust <Trond.Myklebust@netapp.com> CC: security@kernel.org CC: Jeff Layton <jlayton@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/nfs4proc.c44
1 files changed, 42 insertions, 2 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..1ff76acc7e98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h> 53#include <linux/utsname.h>
54#include <linux/mm.h>
54 55
55#include "nfs4_fs.h" 56#include "nfs4_fs.h"
56#include "delegation.h" 57#include "delegation.h"
@@ -3252,6 +3253,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3253 }
3253} 3254}
3254 3255
3256static int buf_to_pages_noslab(const void *buf, size_t buflen,
3257 struct page **pages, unsigned int *pgbase)
3258{
3259 struct page *newpage, **spages;
3260 int rc = 0;
3261 size_t len;
3262 spages = pages;
3263
3264 do {
3265 len = min(PAGE_CACHE_SIZE, buflen);
3266 newpage = alloc_page(GFP_KERNEL);
3267
3268 if (newpage == NULL)
3269 goto unwind;
3270 memcpy(page_address(newpage), buf, len);
3271 buf += len;
3272 buflen -= len;
3273 *pages++ = newpage;
3274 rc++;
3275 } while (buflen != 0);
3276
3277 return rc;
3278
3279unwind:
3280 for(; rc > 0; rc--)
3281 __free_page(spages[rc-1]);
3282 return -ENOMEM;
3283}
3284
3255struct nfs4_cached_acl { 3285struct nfs4_cached_acl {
3256 int cached; 3286 int cached;
3257 size_t len; 3287 size_t len;
@@ -3420,13 +3450,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3450 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3451 .rpc_resp = &res,
3422 }; 3452 };
3423 int ret; 3453 int ret, i;
3424 3454
3425 if (!nfs4_server_supports_acls(server)) 3455 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3456 return -EOPNOTSUPP;
3457 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3458 if (i < 0)
3459 return i;
3427 nfs_inode_return_delegation(inode); 3460 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3461 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3462
3463 /*
3464 * Free each page after tx, so the only ref left is
3465 * held by the network stack
3466 */
3467 for (; i > 0; i--)
3468 put_page(pages[i-1]);
3469
3430 /* 3470 /*
3431 * Acl update can result in inode attribute update. 3471 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3472 * so mark the attribute cache invalid.