aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/scripts/python
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk.kim@samsung.com>2013-01-31 01:36:04 -0500
committerJaegeuk Kim <jaegeuk.kim@samsung.com>2013-02-11 17:15:01 -0500
commitd4686d56ec912d55fd8a9d6d509de50de24e90ab (patch)
treef79a5df1353af58c671c58a4eaa7339d7ba80461 /tools/perf/scripts/python
parent369a708c2a6557fabd409cf17a03db271c54931a (diff)
f2fs: avoid balanc_fs during evict_inode
1. Background Previously, if f2fs tries to move data blocks of an *evicting* inode during the cleaning process, it stops the process incompletely and then restarts the whole process, since it needs a locked inode to grab victim data pages in its address space. In order to get a locked inode, iget_locked() by f2fs_iget() is normally used, but, it waits if the inode is on freeing. So, here is a deadlock scenario. 1. f2fs_evict_inode() <- inode "A" 2. f2fs_balance_fs() 3. f2fs_gc() 4. gc_data_segment() 5. f2fs_iget() <- inode "A" too! If step #1 and #5 treat a same inode "A", step #5 would fall into deadlock since the inode "A" is on freeing. In order to resolve this, f2fs_iget_nowait() which skips __wait_on_freeing_inode() was introduced in step #5, and stops f2fs_gc() to complete f2fs_evict_inode(). 1. f2fs_evict_inode() <- inode "A" 2. f2fs_balance_fs() 3. f2fs_gc() 4. gc_data_segment() 5. f2fs_iget_nowait() <- inode "A", then stop f2fs_gc() w/ -ENOENT 2. Problem and Solution In the above scenario, however, f2fs cannot finish f2fs_evict_inode() only if: o there are not enough free sections, and o f2fs_gc() tries to move data blocks of the *evicting* inode repeatedly. So, the final solution is to use f2fs_iget() and remove f2fs_balance_fs() in f2fs_evict_inode(). The f2fs_evict_inode() actually truncates all the data and node blocks, which means that it doesn't produce any dirty node pages accordingly. So, we don't need to do f2fs_balance_fs() in practical. Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'tools/perf/scripts/python')
0 files changed, 0 insertions, 0 deletions
n301'>301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
/*
 * Checksum updating actions
 *
 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 */

#include <linux/types.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>

#include <linux/netlink.h>
#include <net/netlink.h>
#include <linux/rtnetlink.h>

#include <linux/skbuff.h>

#include <net/ip.h>
#include <net/ipv6.h>
#include <net/icmp.h>
#include <linux/icmpv6.h>
#include <linux/igmp.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/ip6_checksum.h>

#include <net/act_api.h>

#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>

#define CSUM_TAB_MASK 15

static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};

static int csum_net_id;
static struct tc_action_ops act_csum_ops;

static int tcf_csum_init(struct net *net, struct nlattr *nla,
			 struct nlattr *est, struct tc_action **a, int ovr,
			 int bind)
{
	struct tc_action_net *tn = net_generic(net, csum_net_id);
	struct nlattr *tb[TCA_CSUM_MAX + 1];
	struct tc_csum *parm;
	struct tcf_csum *p;
	int ret = 0, err;

	if (nla == NULL)
		return -EINVAL;

	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
	if (err < 0)
		return err;

	if (tb[TCA_CSUM_PARMS] == NULL)
		return -EINVAL;
	parm = nla_data(tb[TCA_CSUM_PARMS]);

	if (!tcf_hash_check(tn, parm->index, a, bind)) {
		ret = tcf_hash_create(tn, parm->index, est, a,
				      &act_csum_ops, bind, false);
		if (ret)
			return ret;
		ret = ACT_P_CREATED;
	} else {
		if (bind)/* dont override defaults */
			return 0;
		tcf_hash_release(*a, bind);
		if (!ovr)
			return -EEXIST;
	}

	p = to_tcf_csum(*a);
	spin_lock_bh(&p->tcf_lock);
	p->tcf_action = parm->action;
	p->update_flags = parm->update_flags;
	spin_unlock_bh(&p->tcf_lock);

	if (ret == ACT_P_CREATED)
		tcf_hash_insert(tn, *a);

	return ret;
}

/**
 * tcf_csum_skb_nextlayer - Get next layer pointer
 * @skb: sk_buff to use
 * @ihl: previous summed headers length
 * @ipl: complete packet length
 * @jhl: next header length
 *
 * Check the expected next layer availability in the specified sk_buff.
 * Return the next layer pointer if pass, NULL otherwise.
 */
static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
				    unsigned int ihl, unsigned int ipl,
				    unsigned int jhl)
{
	int ntkoff = skb_network_offset(skb);
	int hl = ihl + jhl;

	if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
	    skb_try_make_writable(skb, hl + ntkoff))
		return NULL;
	else
		return (void *)(skb_network_header(skb) + ihl);
}

static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
			      unsigned int ihl, unsigned int ipl)
{
	struct icmphdr *icmph;

	icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
	if (icmph == NULL)
		return 0;

	icmph->checksum = 0;
	skb->csum = csum_partial(icmph, ipl - ihl, 0);
	icmph->checksum = csum_fold(skb->csum);

	skb->ip_summed = CHECKSUM_NONE;

	return 1;
}

static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
			      unsigned int ihl, unsigned int ipl)
{
	struct igmphdr *igmph;

	igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
	if (igmph == NULL)
		return 0;

	igmph->csum = 0;
	skb->csum = csum_partial(igmph, ipl - ihl, 0);
	igmph->csum = csum_fold(skb->csum);

	skb->ip_summed = CHECKSUM_NONE;

	return 1;
}

static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
			      unsigned int ihl, unsigned int ipl)
{
	struct icmp6hdr *icmp6h;
	const struct ipv6hdr *ip6h;

	icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
	if (icmp6h == NULL)
		return 0;

	ip6h = ipv6_hdr(skb);
	icmp6h->icmp6_cksum = 0;
	skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
	icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
					      ipl - ihl, IPPROTO_ICMPV6,
					      skb->csum);

	skb->ip_summed = CHECKSUM_NONE;

	return 1;
}

static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
			     unsigned int ihl, unsigned int ipl)
{
	struct tcphdr *tcph;
	const struct iphdr *iph;

	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
	if (tcph == NULL)
		return 0;

	iph = ip_hdr(skb);
	tcph->check = 0;
	skb->csum = csum_partial(tcph, ipl - ihl, 0);
	tcph->check = tcp_v4_check(ipl - ihl,
				   iph->saddr, iph->daddr, skb->csum);

	skb->ip_summed = CHECKSUM_NONE;

	return 1;
}

static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
			     unsigned int ihl, unsigned int ipl)
{
	struct tcphdr *tcph;
	const struct ipv6hdr *ip6h;

	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
	if (tcph == NULL)
		return 0;

	ip6h = ipv6_hdr(skb);
	tcph->check = 0;
	skb->csum = csum_partial(tcph, ipl - ihl, 0);
	tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
				      ipl - ihl, IPPROTO_TCP,
				      skb->csum);

	skb->ip_summed = CHECKSUM_NONE;

	return 1;
}

static int tcf_csum_ipv4_udp(struct sk_buff *skb,
			     unsigned int ihl, unsigned int ipl, int udplite)
{
	struct udphdr *udph;
	const struct iphdr *iph;
	u16 ul;

	/*
	 * Support both UDP and UDPLITE checksum algorithms, Don't use
	 * udph->len to get the real length without any protocol check,
	 * UDPLITE uses udph->len for another thing,
	 * Use iph->tot_len, or just ipl.
	 */

	udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
	if (udph == NULL)
		return 0;

	iph = ip_hdr(skb);
	ul = ntohs(udph->len);

	if (udplite || udph->check) {

		udph->check = 0;

		if (udplite) {
			if (ul == 0)
				skb->csum = csum_partial(udph, ipl - ihl, 0);
			else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
				skb->csum = csum_partial(udph, ul, 0);
			else
				goto ignore_obscure_skb;
		} else {
			if (ul != ipl - ihl)
				goto ignore_obscure_skb;

			skb->csum = csum_partial(udph, ul, 0);
		}

		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
						ul, iph->protocol,
						skb->csum);

		if (!udph->check)
			udph->check = CSUM_MANGLED_0;
	}

	skb->ip_summed = CHECKSUM_NONE;

ignore_obscure_skb:
	return 1;
}

static int tcf_csum_ipv6_udp(struct sk_buff *skb,
			     unsigned int ihl, unsigned int ipl, int udplite)
{
	struct udphdr *udph;
	const struct ipv6hdr *ip6h;
	u16 ul;

	/*
	 * Support both UDP and UDPLITE checksum algorithms, Don't use
	 * udph->len to get the real length without any protocol check,
	 * UDPLITE uses udph->len for another thing,
	 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
	 */

	udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
	if (udph == NULL)
		return 0;

	ip6h = ipv6_hdr(skb);
	ul = ntohs(udph->len);

	udph->check = 0;

	if (udplite) {
		if (ul == 0)
			skb->csum = csum_partial(udph, ipl - ihl, 0);

		else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
			skb->csum = csum_partial(udph, ul, 0);

		else
			goto ignore_obscure_skb;
	} else {
		if (ul != ipl - ihl)
			goto ignore_obscure_skb;

		skb->csum = csum_partial(udph, ul, 0);
	}

	udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
				      udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
				      skb->csum);

	if (!udph->check)
		udph->check = CSUM_MANGLED_0;

	skb->ip_summed = CHECKSUM_NONE;

ignore_obscure_skb:
	return 1;
}

static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
{
	const struct iphdr *iph;
	int ntkoff;

	ntkoff = skb_network_offset(skb);

	if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
		goto fail;

	iph = ip_hdr(skb);

	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
	case IPPROTO_ICMP:
		if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
			if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
						ntohs(iph->tot_len)))
				goto fail;
		break;
	case IPPROTO_IGMP:
		if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
			if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
						ntohs(iph->tot_len)))
				goto fail;
		break;
	case IPPROTO_TCP:
		if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
			if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
					       ntohs(iph->tot_len)))
				goto fail;
		break;
	case IPPROTO_UDP:
		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
					       ntohs(iph->tot_len), 0))
				goto fail;
		break;
	case IPPROTO_UDPLITE:
		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
					       ntohs(iph->tot_len), 1))
				goto fail;
		break;
	}

	if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
		if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
			goto fail;

		ip_send_check(ip_hdr(skb));
	}

	return 1;

fail:
	return 0;
}

static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
				 unsigned int ixhl, unsigned int *pl)
{
	int off, len, optlen;
	unsigned char *xh = (void *)ip6xh;

	off = sizeof(*ip6xh);
	len = ixhl - off;

	while (len > 1) {
		switch (xh[off]) {
		case IPV6_TLV_PAD1:
			optlen = 1;
			break;
		case IPV6_TLV_JUMBO:
			optlen = xh[off + 1] + 2;
			if (optlen != 6 || len < 6 || (off & 3) != 2)
				/* wrong jumbo option length/alignment */
				return 0;
			*pl = ntohl(*(__be32 *)(xh + off + 2));
			goto done;
		default:
			optlen = xh[off + 1] + 2;
			if (optlen > len)