diff options
| -rw-r--r-- | block/blk-core.c | 20 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 101 | ||||
| -rw-r--r-- | block/elevator.c | 8 | ||||
| -rw-r--r-- | drivers/block/brd.c | 53 | ||||
| -rw-r--r-- | drivers/block/cciss_scsi.c | 2 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_int.h | 14 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_main.c | 68 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 45 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.c | 54 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_worker.c | 24 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 64 | ||||
| -rw-r--r-- | fs/pipe.c | 77 | ||||
| -rw-r--r-- | fs/splice.c | 2 | ||||
| -rw-r--r-- | fs/sync.c | 2 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 9 | ||||
| -rw-r--r-- | include/linux/drbd.h | 2 | ||||
| -rw-r--r-- | include/linux/iocontext.h | 1 | ||||
| -rw-r--r-- | include/linux/pipe_fs_i.h | 4 | ||||
| -rw-r--r-- | include/linux/writeback.h | 10 | ||||
| -rw-r--r-- | kernel/sysctl.c | 8 | ||||
| -rw-r--r-- | mm/page-writeback.c | 4 |
23 files changed, 311 insertions, 264 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 3bc5579d6f54..f84cce42fc58 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -467,6 +467,9 @@ static int blk_init_free_list(struct request_queue *q) | |||
| 467 | { | 467 | { |
| 468 | struct request_list *rl = &q->rq; | 468 | struct request_list *rl = &q->rq; |
| 469 | 469 | ||
| 470 | if (unlikely(rl->rq_pool)) | ||
| 471 | return 0; | ||
| 472 | |||
| 470 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; | 473 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
| 471 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; | 474 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
| 472 | rl->elvpriv = 0; | 475 | rl->elvpriv = 0; |
| @@ -570,9 +573,17 @@ EXPORT_SYMBOL(blk_init_queue); | |||
| 570 | struct request_queue * | 573 | struct request_queue * |
| 571 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 574 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
| 572 | { | 575 | { |
| 573 | struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 576 | struct request_queue *uninit_q, *q; |
| 577 | |||
| 578 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); | ||
| 579 | if (!uninit_q) | ||
| 580 | return NULL; | ||
| 581 | |||
| 582 | q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); | ||
| 583 | if (!q) | ||
| 584 | blk_cleanup_queue(uninit_q); | ||
| 574 | 585 | ||
| 575 | return blk_init_allocated_queue_node(q, rfn, lock, node_id); | 586 | return q; |
| 576 | } | 587 | } |
| 577 | EXPORT_SYMBOL(blk_init_queue_node); | 588 | EXPORT_SYMBOL(blk_init_queue_node); |
| 578 | 589 | ||
| @@ -592,10 +603,8 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | |||
| 592 | return NULL; | 603 | return NULL; |
| 593 | 604 | ||
| 594 | q->node = node_id; | 605 | q->node = node_id; |
| 595 | if (blk_init_free_list(q)) { | 606 | if (blk_init_free_list(q)) |
| 596 | kmem_cache_free(blk_requestq_cachep, q); | ||
| 597 | return NULL; | 607 | return NULL; |
| 598 | } | ||
| 599 | 608 | ||
| 600 | q->request_fn = rfn; | 609 | q->request_fn = rfn; |
| 601 | q->prep_rq_fn = NULL; | 610 | q->prep_rq_fn = NULL; |
| @@ -618,7 +627,6 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | |||
| 618 | return q; | 627 | return q; |
| 619 | } | 628 | } |
| 620 | 629 | ||
| 621 | blk_put_queue(q); | ||
| 622 | return NULL; | 630 | return NULL; |
| 623 | } | 631 | } |
| 624 | EXPORT_SYMBOL(blk_init_allocated_queue_node); | 632 | EXPORT_SYMBOL(blk_init_allocated_queue_node); |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ed897b5ef315..5ff4f4850e71 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -64,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); | |||
| 64 | static struct completion *ioc_gone; | 64 | static struct completion *ioc_gone; |
| 65 | static DEFINE_SPINLOCK(ioc_gone_lock); | 65 | static DEFINE_SPINLOCK(ioc_gone_lock); |
| 66 | 66 | ||
| 67 | static DEFINE_SPINLOCK(cic_index_lock); | ||
| 68 | static DEFINE_IDA(cic_index_ida); | ||
| 69 | |||
| 67 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR | 70 | #define CFQ_PRIO_LISTS IOPRIO_BE_NR |
| 68 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) | 71 | #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) |
| 69 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) | 72 | #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) |
| @@ -271,6 +274,7 @@ struct cfq_data { | |||
| 271 | unsigned int cfq_latency; | 274 | unsigned int cfq_latency; |
| 272 | unsigned int cfq_group_isolation; | 275 | unsigned int cfq_group_isolation; |
| 273 | 276 | ||
| 277 | unsigned int cic_index; | ||
| 274 | struct list_head cic_list; | 278 | struct list_head cic_list; |
| 275 | 279 | ||
| 276 | /* | 280 | /* |
| @@ -430,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, | |||
| 430 | cic->cfqq[is_sync] = cfqq; | 434 | cic->cfqq[is_sync] = cfqq; |
| 431 | } | 435 | } |
| 432 | 436 | ||
| 437 | #define CIC_DEAD_KEY 1ul | ||
| 438 | #define CIC_DEAD_INDEX_SHIFT 1 | ||
| 439 | |||
| 440 | static inline void *cfqd_dead_key(struct cfq_data *cfqd) | ||
| 441 | { | ||
| 442 | return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); | ||
| 443 | } | ||
| 444 | |||
| 445 | static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) | ||
| 446 | { | ||
| 447 | struct cfq_data *cfqd = cic->key; | ||
| 448 | |||
| 449 | if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) | ||
| 450 | return NULL; | ||
| 451 | |||
| 452 | return cfqd; | ||
| 453 | } | ||
| 454 | |||
| 433 | /* | 455 | /* |
| 434 | * We regard a request as SYNC, if it's either a read or has the SYNC bit | 456 | * We regard a request as SYNC, if it's either a read or has the SYNC bit |
| 435 | * set (in which case it could also be direct WRITE). | 457 | * set (in which case it could also be direct WRITE). |
| @@ -2510,11 +2532,12 @@ static void cfq_cic_free(struct cfq_io_context *cic) | |||
| 2510 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | 2532 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) |
| 2511 | { | 2533 | { |
| 2512 | unsigned long flags; | 2534 | unsigned long flags; |
| 2535 | unsigned long dead_key = (unsigned long) cic->key; | ||
| 2513 | 2536 | ||
| 2514 | BUG_ON(!cic->dead_key); | 2537 | BUG_ON(!(dead_key & CIC_DEAD_KEY)); |
| 2515 | 2538 | ||
| 2516 | spin_lock_irqsave(&ioc->lock, flags); | 2539 | spin_lock_irqsave(&ioc->lock, flags); |
| 2517 | radix_tree_delete(&ioc->radix_root, cic->dead_key); | 2540 | radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT); |
| 2518 | hlist_del_rcu(&cic->cic_list); | 2541 | hlist_del_rcu(&cic->cic_list); |
| 2519 | spin_unlock_irqrestore(&ioc->lock, flags); | 2542 | spin_unlock_irqrestore(&ioc->lock, flags); |
| 2520 | 2543 | ||
| @@ -2537,15 +2560,10 @@ static void cfq_free_io_context(struct io_context *ioc) | |||
| 2537 | __call_for_each_cic(ioc, cic_free_func); | 2560 | __call_for_each_cic(ioc, cic_free_func); |
| 2538 | } | 2561 | } |
| 2539 | 2562 | ||
| 2540 | static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2563 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
| 2541 | { | 2564 | { |
| 2542 | struct cfq_queue *__cfqq, *next; | 2565 | struct cfq_queue *__cfqq, *next; |
| 2543 | 2566 | ||
| 2544 | if (unlikely(cfqq == cfqd->active_queue)) { | ||
| 2545 | __cfq_slice_expired(cfqd, cfqq, 0); | ||
| 2546 | cfq_schedule_dispatch(cfqd); | ||
| 2547 | } | ||
| 2548 | |||
| 2549 | /* | 2567 | /* |
| 2550 | * If this queue was scheduled to merge with another queue, be | 2568 | * If this queue was scheduled to merge with another queue, be |
| 2551 | * sure to drop the reference taken on that queue (and others in | 2569 | * sure to drop the reference taken on that queue (and others in |
| @@ -2561,6 +2579,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
| 2561 | cfq_put_queue(__cfqq); | 2579 | cfq_put_queue(__cfqq); |
| 2562 | __cfqq = next; | 2580 | __cfqq = next; |
| 2563 | } | 2581 | } |
| 2582 | } | ||
| 2583 | |||
| 2584 | static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
| 2585 | { | ||
| 2586 | if (unlikely(cfqq == cfqd->active_queue)) { | ||
| 2587 | __cfq_slice_expired(cfqd, cfqq, 0); | ||
| 2588 | cfq_schedule_dispatch(cfqd); | ||
| 2589 | } | ||
| 2590 | |||
| 2591 | cfq_put_cooperator(cfqq); | ||
| 2564 | 2592 | ||
| 2565 | cfq_put_queue(cfqq); | 2593 | cfq_put_queue(cfqq); |
| 2566 | } | 2594 | } |
| @@ -2573,11 +2601,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
| 2573 | list_del_init(&cic->queue_list); | 2601 | list_del_init(&cic->queue_list); |
| 2574 | 2602 | ||
| 2575 | /* | 2603 | /* |
| 2576 | * Make sure key == NULL is seen for dead queues | 2604 | * Make sure dead mark is seen for dead queues |
| 2577 | */ | 2605 | */ |
| 2578 | smp_wmb(); | 2606 | smp_wmb(); |
| 2579 | cic->dead_key = (unsigned long) cic->key; | 2607 | cic->key = cfqd_dead_key(cfqd); |
| 2580 | cic->key = NULL; | ||
| 2581 | 2608 | ||
| 2582 | if (ioc->ioc_data == cic) | 2609 | if (ioc->ioc_data == cic) |
| 2583 | rcu_assign_pointer(ioc->ioc_data, NULL); | 2610 | rcu_assign_pointer(ioc->ioc_data, NULL); |
| @@ -2596,7 +2623,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
| 2596 | static void cfq_exit_single_io_context(struct io_context *ioc, | 2623 | static void cfq_exit_single_io_context(struct io_context *ioc, |
| 2597 | struct cfq_io_context *cic) | 2624 | struct cfq_io_context *cic) |
| 2598 | { | 2625 | { |
| 2599 | struct cfq_data *cfqd = cic->key; | 2626 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
| 2600 | 2627 | ||
| 2601 | if (cfqd) { | 2628 | if (cfqd) { |
| 2602 | struct request_queue *q = cfqd->queue; | 2629 | struct request_queue *q = cfqd->queue; |
| @@ -2609,7 +2636,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc, | |||
| 2609 | * race between exiting task and queue | 2636 | * race between exiting task and queue |
| 2610 | */ | 2637 | */ |
| 2611 | smp_read_barrier_depends(); | 2638 | smp_read_barrier_depends(); |
| 2612 | if (cic->key) | 2639 | if (cic->key == cfqd) |
| 2613 | __cfq_exit_single_io_context(cfqd, cic); | 2640 | __cfq_exit_single_io_context(cfqd, cic); |
| 2614 | 2641 | ||
| 2615 | spin_unlock_irqrestore(q->queue_lock, flags); | 2642 | spin_unlock_irqrestore(q->queue_lock, flags); |
| @@ -2689,7 +2716,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) | |||
| 2689 | 2716 | ||
| 2690 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) | 2717 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) |
| 2691 | { | 2718 | { |
| 2692 | struct cfq_data *cfqd = cic->key; | 2719 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
| 2693 | struct cfq_queue *cfqq; | 2720 | struct cfq_queue *cfqq; |
| 2694 | unsigned long flags; | 2721 | unsigned long flags; |
| 2695 | 2722 | ||
| @@ -2746,7 +2773,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
| 2746 | static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) | 2773 | static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) |
| 2747 | { | 2774 | { |
| 2748 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); | 2775 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); |
| 2749 | struct cfq_data *cfqd = cic->key; | 2776 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
| 2750 | unsigned long flags; | 2777 | unsigned long flags; |
| 2751 | struct request_queue *q; | 2778 | struct request_queue *q; |
| 2752 | 2779 | ||
| @@ -2883,12 +2910,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, | |||
| 2883 | unsigned long flags; | 2910 | unsigned long flags; |
| 2884 | 2911 | ||
| 2885 | WARN_ON(!list_empty(&cic->queue_list)); | 2912 | WARN_ON(!list_empty(&cic->queue_list)); |
| 2913 | BUG_ON(cic->key != cfqd_dead_key(cfqd)); | ||
| 2886 | 2914 | ||
| 2887 | spin_lock_irqsave(&ioc->lock, flags); | 2915 | spin_lock_irqsave(&ioc->lock, flags); |
| 2888 | 2916 | ||
| 2889 | BUG_ON(ioc->ioc_data == cic); | 2917 | BUG_ON(ioc->ioc_data == cic); |
| 2890 | 2918 | ||
| 2891 | radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); | 2919 | radix_tree_delete(&ioc->radix_root, cfqd->cic_index); |
| 2892 | hlist_del_rcu(&cic->cic_list); | 2920 | hlist_del_rcu(&cic->cic_list); |
| 2893 | spin_unlock_irqrestore(&ioc->lock, flags); | 2921 | spin_unlock_irqrestore(&ioc->lock, flags); |
| 2894 | 2922 | ||
| @@ -2900,7 +2928,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
| 2900 | { | 2928 | { |
| 2901 | struct cfq_io_context *cic; | 2929 | struct cfq_io_context *cic; |
| 2902 | unsigned long flags; | 2930 | unsigned long flags; |
| 2903 | void *k; | ||
| 2904 | 2931 | ||
| 2905 | if (unlikely(!ioc)) | 2932 | if (unlikely(!ioc)) |
| 2906 | return NULL; | 2933 | return NULL; |
| @@ -2917,13 +2944,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
| 2917 | } | 2944 | } |
| 2918 | 2945 | ||
| 2919 | do { | 2946 | do { |
| 2920 | cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); | 2947 | cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); |
| 2921 | rcu_read_unlock(); | 2948 | rcu_read_unlock(); |
| 2922 | if (!cic) | 2949 | if (!cic) |
| 2923 | break; | 2950 | break; |
| 2924 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | 2951 | if (unlikely(cic->key != cfqd)) { |
| 2925 | k = cic->key; | ||
| 2926 | if (unlikely(!k)) { | ||
| 2927 | cfq_drop_dead_cic(cfqd, ioc, cic); | 2952 | cfq_drop_dead_cic(cfqd, ioc, cic); |
| 2928 | rcu_read_lock(); | 2953 | rcu_read_lock(); |
| 2929 | continue; | 2954 | continue; |
| @@ -2956,7 +2981,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | |||
| 2956 | 2981 | ||
| 2957 | spin_lock_irqsave(&ioc->lock, flags); | 2982 | spin_lock_irqsave(&ioc->lock, flags); |
| 2958 | ret = radix_tree_insert(&ioc->radix_root, | 2983 | ret = radix_tree_insert(&ioc->radix_root, |
| 2959 | (unsigned long) cfqd, cic); | 2984 | cfqd->cic_index, cic); |
| 2960 | if (!ret) | 2985 | if (!ret) |
| 2961 | hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | 2986 | hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); |
| 2962 | spin_unlock_irqrestore(&ioc->lock, flags); | 2987 | spin_unlock_irqrestore(&ioc->lock, flags); |
| @@ -3516,6 +3541,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) | |||
| 3516 | } | 3541 | } |
| 3517 | 3542 | ||
| 3518 | cic_set_cfqq(cic, NULL, 1); | 3543 | cic_set_cfqq(cic, NULL, 1); |
| 3544 | |||
| 3545 | cfq_put_cooperator(cfqq); | ||
| 3546 | |||
| 3519 | cfq_put_queue(cfqq); | 3547 | cfq_put_queue(cfqq); |
| 3520 | return NULL; | 3548 | return NULL; |
| 3521 | } | 3549 | } |
| @@ -3708,10 +3736,32 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
| 3708 | 3736 | ||
| 3709 | cfq_shutdown_timer_wq(cfqd); | 3737 | cfq_shutdown_timer_wq(cfqd); |
| 3710 | 3738 | ||
| 3739 | spin_lock(&cic_index_lock); | ||
| 3740 | ida_remove(&cic_index_ida, cfqd->cic_index); | ||
| 3741 | spin_unlock(&cic_index_lock); | ||
| 3742 | |||
| 3711 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ | 3743 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ |
| 3712 | call_rcu(&cfqd->rcu, cfq_cfqd_free); | 3744 | call_rcu(&cfqd->rcu, cfq_cfqd_free); |
| 3713 | } | 3745 | } |
| 3714 | 3746 | ||
| 3747 | static int cfq_alloc_cic_index(void) | ||
| 3748 | { | ||
| 3749 | int index, error; | ||
| 3750 | |||
| 3751 | do { | ||
| 3752 | if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) | ||
| 3753 | return -ENOMEM; | ||
| 3754 | |||
| 3755 | spin_lock(&cic_index_lock); | ||
| 3756 | error = ida_get_new(&cic_index_ida, &index); | ||
| 3757 | spin_unlock(&cic_index_lock); | ||
| 3758 | if (error && error != -EAGAIN) | ||
| 3759 | return error; | ||
| 3760 | } while (error); | ||
| 3761 | |||
| 3762 | return index; | ||
| 3763 | } | ||
| 3764 | |||
| 3715 | static void *cfq_init_queue(struct request_queue *q) | 3765 | static void *cfq_init_queue(struct request_queue *q) |
| 3716 | { | 3766 | { |
| 3717 | struct cfq_data *cfqd; | 3767 | struct cfq_data *cfqd; |
| @@ -3719,10 +3769,16 @@ static void *cfq_init_queue(struct request_queue *q) | |||
| 3719 | struct cfq_group *cfqg; | 3769 | struct cfq_group *cfqg; |
| 3720 | struct cfq_rb_root *st; | 3770 | struct cfq_rb_root *st; |
| 3721 | 3771 | ||
| 3772 | i = cfq_alloc_cic_index(); | ||
| 3773 | if (i < 0) | ||
| 3774 | return NULL; | ||
| 3775 | |||
| 3722 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3776 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
| 3723 | if (!cfqd) | 3777 | if (!cfqd) |
| 3724 | return NULL; | 3778 | return NULL; |
| 3725 | 3779 | ||
| 3780 | cfqd->cic_index = i; | ||
| 3781 | |||
| 3726 | /* Init root service tree */ | 3782 | /* Init root service tree */ |
| 3727 | cfqd->grp_service_tree = CFQ_RB_ROOT; | 3783 | cfqd->grp_service_tree = CFQ_RB_ROOT; |
| 3728 | 3784 | ||
| @@ -3984,6 +4040,7 @@ static void __exit cfq_exit(void) | |||
| 3984 | */ | 4040 | */ |
| 3985 | if (elv_ioc_count_read(cfq_ioc_count)) | 4041 | if (elv_ioc_count_read(cfq_ioc_count)) |
| 3986 | wait_for_completion(&all_gone); | 4042 | wait_for_completion(&all_gone); |
| 4043 | ida_destroy(&cic_index_ida); | ||
| 3987 | cfq_slab_kill(); | 4044 | cfq_slab_kill(); |
| 3988 | } | 4045 | } |
| 3989 | 4046 | ||
diff --git a/block/elevator.c b/block/elevator.c index 6df2b5056b51..923a9139106c 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
| @@ -242,9 +242,11 @@ int elevator_init(struct request_queue *q, char *name) | |||
| 242 | { | 242 | { |
| 243 | struct elevator_type *e = NULL; | 243 | struct elevator_type *e = NULL; |
| 244 | struct elevator_queue *eq; | 244 | struct elevator_queue *eq; |
| 245 | int ret = 0; | ||
| 246 | void *data; | 245 | void *data; |
| 247 | 246 | ||
| 247 | if (unlikely(q->elevator)) | ||
| 248 | return 0; | ||
| 249 | |||
| 248 | INIT_LIST_HEAD(&q->queue_head); | 250 | INIT_LIST_HEAD(&q->queue_head); |
| 249 | q->last_merge = NULL; | 251 | q->last_merge = NULL; |
| 250 | q->end_sector = 0; | 252 | q->end_sector = 0; |
| @@ -284,7 +286,7 @@ int elevator_init(struct request_queue *q, char *name) | |||
| 284 | } | 286 | } |
| 285 | 287 | ||
| 286 | elevator_attach(q, eq, data); | 288 | elevator_attach(q, eq, data); |
| 287 | return ret; | 289 | return 0; |
| 288 | } | 290 | } |
| 289 | EXPORT_SYMBOL(elevator_init); | 291 | EXPORT_SYMBOL(elevator_init); |
| 290 | 292 | ||
| @@ -1097,7 +1099,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) | |||
| 1097 | struct elevator_type *__e; | 1099 | struct elevator_type *__e; |
| 1098 | int len = 0; | 1100 | int len = 0; |
| 1099 | 1101 | ||
| 1100 | if (!q->elevator) | 1102 | if (!q->elevator || !blk_queue_stackable(q)) |
| 1101 | return sprintf(name, "none\n"); | 1103 | return sprintf(name, "none\n"); |
| 1102 | 1104 | ||
| 1103 | elv = e->elevator_type; | 1105 | elv = e->elevator_type; |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6081e81d5738..f1bf79d9bc0a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
| @@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | |||
| 133 | return page; | 133 | return page; |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | static void brd_free_page(struct brd_device *brd, sector_t sector) | ||
| 137 | { | ||
| 138 | struct page *page; | ||
| 139 | pgoff_t idx; | ||
| 140 | |||
| 141 | spin_lock(&brd->brd_lock); | ||
| 142 | idx = sector >> PAGE_SECTORS_SHIFT; | ||
| 143 | page = radix_tree_delete(&brd->brd_pages, idx); | ||
| 144 | spin_unlock(&brd->brd_lock); | ||
| 145 | if (page) | ||
| 146 | __free_page(page); | ||
| 147 | } | ||
| 148 | |||
| 149 | static void brd_zero_page(struct brd_device *brd, sector_t sector) | ||
| 150 | { | ||
| 151 | struct page *page; | ||
| 152 | |||
| 153 | page = brd_lookup_page(brd, sector); | ||
| 154 | if (page) | ||
| 155 | clear_highpage(page); | ||
| 156 | } | ||
| 157 | |||
| 136 | /* | 158 | /* |
| 137 | * Free all backing store pages and radix tree. This must only be called when | 159 | * Free all backing store pages and radix tree. This must only be called when |
| 138 | * there are no other users of the device. | 160 | * there are no other users of the device. |
| @@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) | |||
| 189 | return 0; | 211 | return 0; |
| 190 | } | 212 | } |
| 191 | 213 | ||
| 214 | static void discard_from_brd(struct brd_device *brd, | ||
| 215 | sector_t sector, size_t n) | ||
| 216 | { | ||
| 217 | while (n >= PAGE_SIZE) { | ||
| 218 | /* | ||
| 219 | * Don't want to actually discard pages here because | ||
| 220 | * re-allocating the pages can result in writeback | ||
| 221 | * deadlocks under heavy load. | ||
| 222 | */ | ||
| 223 | if (0) | ||
| 224 | brd_free_page(brd, sector); | ||
| 225 | else | ||
| 226 | brd_zero_page(brd, sector); | ||
| 227 | sector += PAGE_SIZE >> SECTOR_SHIFT; | ||
| 228 | n -= PAGE_SIZE; | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 192 | /* | 232 | /* |
| 193 | * Copy n bytes from src to the brd starting at sector. Does not sleep. | 233 | * Copy n bytes from src to the brd starting at sector. Does not sleep. |
| 194 | */ | 234 | */ |
| @@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) | |||
| 300 | get_capacity(bdev->bd_disk)) | 340 | get_capacity(bdev->bd_disk)) |
| 301 | goto out; | 341 | goto out; |
| 302 | 342 | ||
| 343 | if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { | ||
| 344 | err = 0; | ||
| 345 | discard_from_brd(brd, sector, bio->bi_size); | ||
| 346 | goto out; | ||
| 347 | } | ||
| 348 | |||
| 303 | rw = bio_rw(bio); | 349 | rw = bio_rw(bio); |
| 304 | if (rw == READA) | 350 | if (rw == READA) |
| 305 | rw = READ; | 351 | rw = READ; |
| @@ -320,7 +366,7 @@ out: | |||
| 320 | } | 366 | } |
| 321 | 367 | ||
| 322 | #ifdef CONFIG_BLK_DEV_XIP | 368 | #ifdef CONFIG_BLK_DEV_XIP |
| 323 | static int brd_direct_access (struct block_device *bdev, sector_t sector, | 369 | static int brd_direct_access(struct block_device *bdev, sector_t sector, |
| 324 | void **kaddr, unsigned long *pfn) | 370 | void **kaddr, unsigned long *pfn) |
| 325 | { | 371 | { |
| 326 | struct brd_device *brd = bdev->bd_disk->private_data; | 372 | struct brd_device *brd = bdev->bd_disk->private_data; |
| @@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i) | |||
| 437 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); | 483 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); |
| 438 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); | 484 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
| 439 | 485 | ||
| 486 | brd->brd_queue->limits.discard_granularity = PAGE_SIZE; | ||
| 487 | brd->brd_queue->limits.max_discard_sectors = UINT_MAX; | ||
| 488 | brd->brd_queue->limits.discard_zeroes_data = 1; | ||
| 489 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); | ||
| 490 | |||
| 440 | disk = brd->brd_disk = alloc_disk(1 << part_shift); | 491 | disk = brd->brd_disk = alloc_disk(1 << part_shift); |
| 441 | if (!disk) | 492 | if (!disk) |
| 442 | goto out_free_queue; | 493 | goto out_free_queue; |
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1d0e2cfec72..3381505c8a6c 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c | |||
| @@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd) | |||
| 188 | 188 | ||
| 189 | sa = h->scsi_ctlr; | 189 | sa = h->scsi_ctlr; |
| 190 | stk = &sa->cmd_stack; | 190 | stk = &sa->cmd_stack; |
| 191 | stk->top++; | ||
| 191 | if (stk->top >= CMD_STACK_SIZE) { | 192 | if (stk->top >= CMD_STACK_SIZE) { |
| 192 | printk("cciss: scsi_cmd_free called too many times.\n"); | 193 | printk("cciss: scsi_cmd_free called too many times.\n"); |
| 193 | BUG(); | 194 | BUG(); |
| 194 | } | 195 | } |
| 195 | stk->top++; | ||
| 196 | stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; | 196 | stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; |
| 197 | } | 197 | } |
| 198 | 198 | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e9654c8d5b62..485ed8c7d623 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
| @@ -943,8 +943,7 @@ struct drbd_conf { | |||
| 943 | struct drbd_work resync_work, | 943 | struct drbd_work resync_work, |
| 944 | unplug_work, | 944 | unplug_work, |
| 945 | md_sync_work, | 945 | md_sync_work, |
| 946 | delay_probe_work, | 946 | delay_probe_work; |
| 947 | uuid_work; | ||
| 948 | struct timer_list resync_timer; | 947 | struct timer_list resync_timer; |
| 949 | struct timer_list md_sync_timer; | 948 | struct timer_list md_sync_timer; |
| 950 | struct timer_list delay_probe_timer; | 949 | struct timer_list delay_probe_timer; |
| @@ -1069,7 +1068,6 @@ struct drbd_conf { | |||
| 1069 | struct timeval dps_time; /* delay-probes-start-time */ | 1068 | struct timeval dps_time; /* delay-probes-start-time */ |
| 1070 | unsigned int dp_volume_last; /* send_cnt of last delay probe */ | 1069 | unsigned int dp_volume_last; /* send_cnt of last delay probe */ |
| 1071 | int c_sync_rate; /* current resync rate after delay_probe magic */ | 1070 | int c_sync_rate; /* current resync rate after delay_probe magic */ |
| 1072 | atomic_t new_c_uuid; | ||
| 1073 | }; | 1071 | }; |
| 1074 | 1072 | ||
| 1075 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1073 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
| @@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | |||
| 1476 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | 1474 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); |
| 1477 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | 1475 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); |
| 1478 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | 1476 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); |
| 1479 | extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); | ||
| 1480 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | 1477 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); |
| 1481 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | 1478 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); |
| 1482 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | 1479 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); |
| @@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) | |||
| 1542 | 1539 | ||
| 1543 | static inline void drbd_tcp_quickack(struct socket *sock) | 1540 | static inline void drbd_tcp_quickack(struct socket *sock) |
| 1544 | { | 1541 | { |
| 1545 | int __user val = 1; | 1542 | int __user val = 2; |
| 1546 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, | 1543 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, |
| 1547 | (char __user *)&val, sizeof(val)); | 1544 | (char __user *)&val, sizeof(val)); |
| 1548 | } | 1545 | } |
| @@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
| 1728 | switch (mdev->ldev->dc.on_io_error) { | 1725 | switch (mdev->ldev->dc.on_io_error) { |
| 1729 | case EP_PASS_ON: | 1726 | case EP_PASS_ON: |
| 1730 | if (!forcedetach) { | 1727 | if (!forcedetach) { |
| 1731 | if (printk_ratelimit()) | 1728 | if (__ratelimit(&drbd_ratelimit_state)) |
| 1732 | dev_err(DEV, "Local IO failed in %s." | 1729 | dev_err(DEV, "Local IO failed in %s." |
| 1733 | "Passing error on...\n", where); | 1730 | "Passing error on...\n", where); |
| 1734 | break; | 1731 | break; |
| @@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | |||
| 2219 | return 0; | 2216 | return 0; |
| 2220 | if (test_bit(BITMAP_IO, &mdev->flags)) | 2217 | if (test_bit(BITMAP_IO, &mdev->flags)) |
| 2221 | return 0; | 2218 | return 0; |
| 2222 | if (atomic_read(&mdev->new_c_uuid)) | ||
| 2223 | return 0; | ||
| 2224 | return 1; | 2219 | return 1; |
| 2225 | } | 2220 | } |
| 2226 | 2221 | ||
| @@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) | |||
| 2241 | * to avoid races with the reconnect code, | 2236 | * to avoid races with the reconnect code, |
| 2242 | * we need to atomic_inc within the spinlock. */ | 2237 | * we need to atomic_inc within the spinlock. */ |
| 2243 | 2238 | ||
| 2244 | if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) | ||
| 2245 | drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); | ||
| 2246 | |||
| 2247 | spin_lock_irq(&mdev->req_lock); | 2239 | spin_lock_irq(&mdev->req_lock); |
| 2248 | while (!__inc_ap_bio_cond(mdev)) { | 2240 | while (!__inc_ap_bio_cond(mdev)) { |
| 2249 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | 2241 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be2d2da9cdba..6b077f93acc6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
| @@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1215 | ns.pdsk == D_OUTDATED)) { | 1215 | ns.pdsk == D_OUTDATED)) { |
| 1216 | if (get_ldev(mdev)) { | 1216 | if (get_ldev(mdev)) { |
| 1217 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | 1217 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && |
| 1218 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && | 1218 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { |
| 1219 | !atomic_read(&mdev->new_c_uuid)) | 1219 | drbd_uuid_new_current(mdev); |
| 1220 | atomic_set(&mdev->new_c_uuid, 2); | 1220 | drbd_send_uuids(mdev); |
| 1221 | } | ||
| 1221 | put_ldev(mdev); | 1222 | put_ldev(mdev); |
| 1222 | } | 1223 | } |
| 1223 | } | 1224 | } |
| 1224 | 1225 | ||
| 1225 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | 1226 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { |
| 1226 | /* Diskless peer becomes primary or got connected do diskless, primary peer. */ | 1227 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) |
| 1227 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && | 1228 | drbd_uuid_new_current(mdev); |
| 1228 | !atomic_read(&mdev->new_c_uuid)) | ||
| 1229 | atomic_set(&mdev->new_c_uuid, 2); | ||
| 1230 | 1229 | ||
| 1231 | /* D_DISKLESS Peer becomes secondary */ | 1230 | /* D_DISKLESS Peer becomes secondary */ |
| 1232 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1231 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
| @@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
| 1350 | drbd_md_sync(mdev); | 1349 | drbd_md_sync(mdev); |
| 1351 | } | 1350 | } |
| 1352 | 1351 | ||
| 1353 | static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
| 1354 | { | ||
| 1355 | if (get_ldev(mdev)) { | ||
| 1356 | if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { | ||
| 1357 | drbd_uuid_new_current(mdev); | ||
| 1358 | if (get_net_conf(mdev)) { | ||
| 1359 | drbd_send_uuids(mdev); | ||
| 1360 | put_net_conf(mdev); | ||
| 1361 | } | ||
| 1362 | drbd_md_sync(mdev); | ||
| 1363 | } | ||
| 1364 | put_ldev(mdev); | ||
| 1365 | } | ||
| 1366 | atomic_dec(&mdev->new_c_uuid); | ||
| 1367 | wake_up(&mdev->misc_wait); | ||
| 1368 | |||
| 1369 | return 1; | ||
| 1370 | } | ||
| 1371 | 1352 | ||
| 1372 | static int drbd_thread_setup(void *arg) | 1353 | static int drbd_thread_setup(void *arg) |
| 1373 | { | 1354 | { |
| @@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * | |||
| 2291 | * with page_count == 0 or PageSlab. | 2272 | * with page_count == 0 or PageSlab. |
| 2292 | */ | 2273 | */ |
| 2293 | static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, | 2274 | static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, |
| 2294 | int offset, size_t size) | 2275 | int offset, size_t size, unsigned msg_flags) |
| 2295 | { | 2276 | { |
| 2296 | int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); | 2277 | int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); |
| 2297 | kunmap(page); | 2278 | kunmap(page); |
| 2298 | if (sent == size) | 2279 | if (sent == size) |
| 2299 | mdev->send_cnt += size>>9; | 2280 | mdev->send_cnt += size>>9; |
| @@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, | |||
| 2301 | } | 2282 | } |
| 2302 | 2283 | ||
| 2303 | static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, | 2284 | static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, |
| 2304 | int offset, size_t size) | 2285 | int offset, size_t size, unsigned msg_flags) |
| 2305 | { | 2286 | { |
| 2306 | mm_segment_t oldfs = get_fs(); | 2287 | mm_segment_t oldfs = get_fs(); |
| 2307 | int sent, ok; | 2288 | int sent, ok; |
| @@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, | |||
| 2314 | * __page_cache_release a page that would actually still be referenced | 2295 | * __page_cache_release a page that would actually still be referenced |
| 2315 | * by someone, leading to some obscure delayed Oops somewhere else. */ | 2296 | * by someone, leading to some obscure delayed Oops somewhere else. */ |
| 2316 | if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) | 2297 | if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) |
| 2317 | return _drbd_no_send_page(mdev, page, offset, size); | 2298 | return _drbd_no_send_page(mdev, page, offset, size, msg_flags); |
| 2318 | 2299 | ||
| 2300 | msg_flags |= MSG_NOSIGNAL; | ||
| 2319 | drbd_update_congested(mdev); | 2301 | drbd_update_congested(mdev); |
| 2320 | set_fs(KERNEL_DS); | 2302 | set_fs(KERNEL_DS); |
| 2321 | do { | 2303 | do { |
| 2322 | sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, | 2304 | sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, |
| 2323 | offset, len, | 2305 | offset, len, |
| 2324 | MSG_NOSIGNAL); | 2306 | msg_flags); |
| 2325 | if (sent == -EAGAIN) { | 2307 | if (sent == -EAGAIN) { |
| 2326 | if (we_should_drop_the_connection(mdev, | 2308 | if (we_should_drop_the_connection(mdev, |
| 2327 | mdev->data.socket)) | 2309 | mdev->data.socket)) |
| @@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) | |||
| 2350 | { | 2332 | { |
| 2351 | struct bio_vec *bvec; | 2333 | struct bio_vec *bvec; |
| 2352 | int i; | 2334 | int i; |
| 2335 | /* hint all but last page with MSG_MORE */ | ||
| 2353 | __bio_for_each_segment(bvec, bio, i, 0) { | 2336 | __bio_for_each_segment(bvec, bio, i, 0) { |
| 2354 | if (!_drbd_no_send_page(mdev, bvec->bv_page, | 2337 | if (!_drbd_no_send_page(mdev, bvec->bv_page, |
| 2355 | bvec->bv_offset, bvec->bv_len)) | 2338 | bvec->bv_offset, bvec->bv_len, |
| 2339 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | ||
| 2356 | return 0; | 2340 | return 0; |
| 2357 | } | 2341 | } |
| 2358 | return 1; | 2342 | return 1; |
| @@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) | |||
| 2362 | { | 2346 | { |
| 2363 | struct bio_vec *bvec; | 2347 | struct bio_vec *bvec; |
| 2364 | int i; | 2348 | int i; |
| 2349 | /* hint all but last page with MSG_MORE */ | ||
| 2365 | __bio_for_each_segment(bvec, bio, i, 0) { | 2350 | __bio_for_each_segment(bvec, bio, i, 0) { |
| 2366 | if (!_drbd_send_page(mdev, bvec->bv_page, | 2351 | if (!_drbd_send_page(mdev, bvec->bv_page, |
| 2367 | bvec->bv_offset, bvec->bv_len)) | 2352 | bvec->bv_offset, bvec->bv_len, |
| 2353 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | ||
| 2368 | return 0; | 2354 | return 0; |
| 2369 | } | 2355 | } |
| 2370 | |||
| 2371 | return 1; | 2356 | return 1; |
| 2372 | } | 2357 | } |
| 2373 | 2358 | ||
| @@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) | |||
| 2375 | { | 2360 | { |
| 2376 | struct page *page = e->pages; | 2361 | struct page *page = e->pages; |
| 2377 | unsigned len = e->size; | 2362 | unsigned len = e->size; |
| 2363 | /* hint all but last page with MSG_MORE */ | ||
| 2378 | page_chain_for_each(page) { | 2364 | page_chain_for_each(page) { |
| 2379 | unsigned l = min_t(unsigned, len, PAGE_SIZE); | 2365 | unsigned l = min_t(unsigned, len, PAGE_SIZE); |
| 2380 | if (!_drbd_send_page(mdev, page, 0, l)) | 2366 | if (!_drbd_send_page(mdev, page, 0, l, |
| 2367 | page_chain_next(page) ? MSG_MORE : 0)) | ||
| 2381 | return 0; | 2368 | return 0; |
| 2382 | len -= l; | 2369 | len -= l; |
| 2383 | } | 2370 | } |
| @@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
| 2457 | p.dp_flags = cpu_to_be32(dp_flags); | 2444 | p.dp_flags = cpu_to_be32(dp_flags); |
| 2458 | set_bit(UNPLUG_REMOTE, &mdev->flags); | 2445 | set_bit(UNPLUG_REMOTE, &mdev->flags); |
| 2459 | ok = (sizeof(p) == | 2446 | ok = (sizeof(p) == |
| 2460 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); | 2447 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); |
| 2461 | if (ok && dgs) { | 2448 | if (ok && dgs) { |
| 2462 | dgb = mdev->int_dig_out; | 2449 | dgb = mdev->int_dig_out; |
| 2463 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); | 2450 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); |
| 2464 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2451 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
| 2465 | } | 2452 | } |
| 2466 | if (ok) { | 2453 | if (ok) { |
| 2467 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) | 2454 | if (mdev->net_conf->wire_protocol == DRBD_PROT_A) |
| @@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
| 2510 | return 0; | 2497 | return 0; |
| 2511 | 2498 | ||
| 2512 | ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, | 2499 | ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, |
| 2513 | sizeof(p), MSG_MORE); | 2500 | sizeof(p), dgs ? MSG_MORE : 0); |
| 2514 | if (ok && dgs) { | 2501 | if (ok && dgs) { |
| 2515 | dgb = mdev->int_dig_out; | 2502 | dgb = mdev->int_dig_out; |
| 2516 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); | 2503 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); |
| 2517 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2504 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); |
| 2518 | } | 2505 | } |
| 2519 | if (ok) | 2506 | if (ok) |
| 2520 | ok = _drbd_send_zc_ee(mdev, e); | 2507 | ok = _drbd_send_zc_ee(mdev, e); |
| @@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
| 2708 | atomic_set(&mdev->net_cnt, 0); | 2695 | atomic_set(&mdev->net_cnt, 0); |
| 2709 | atomic_set(&mdev->packet_seq, 0); | 2696 | atomic_set(&mdev->packet_seq, 0); |
| 2710 | atomic_set(&mdev->pp_in_use, 0); | 2697 | atomic_set(&mdev->pp_in_use, 0); |
| 2711 | atomic_set(&mdev->new_c_uuid, 0); | ||
| 2712 | 2698 | ||
| 2713 | mutex_init(&mdev->md_io_mutex); | 2699 | mutex_init(&mdev->md_io_mutex); |
| 2714 | mutex_init(&mdev->data.mutex); | 2700 | mutex_init(&mdev->data.mutex); |
| @@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
| 2739 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 2725 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
| 2740 | INIT_LIST_HEAD(&mdev->delay_probes); | 2726 | INIT_LIST_HEAD(&mdev->delay_probes); |
| 2741 | INIT_LIST_HEAD(&mdev->delay_probe_work.list); | 2727 | INIT_LIST_HEAD(&mdev->delay_probe_work.list); |
| 2742 | INIT_LIST_HEAD(&mdev->uuid_work.list); | ||
| 2743 | 2728 | ||
| 2744 | mdev->resync_work.cb = w_resync_inactive; | 2729 | mdev->resync_work.cb = w_resync_inactive; |
| 2745 | mdev->unplug_work.cb = w_send_write_hint; | 2730 | mdev->unplug_work.cb = w_send_write_hint; |
| 2746 | mdev->md_sync_work.cb = w_md_sync; | 2731 | mdev->md_sync_work.cb = w_md_sync; |
| 2747 | mdev->bm_io_work.w.cb = w_bitmap_io; | 2732 | mdev->bm_io_work.w.cb = w_bitmap_io; |
| 2748 | mdev->delay_probe_work.cb = w_delay_probes; | 2733 | mdev->delay_probe_work.cb = w_delay_probes; |
| 2749 | mdev->uuid_work.cb = w_new_current_uuid; | ||
| 2750 | init_timer(&mdev->resync_timer); | 2734 | init_timer(&mdev->resync_timer); |
| 2751 | init_timer(&mdev->md_sync_timer); | 2735 | init_timer(&mdev->md_sync_timer); |
| 2752 | init_timer(&mdev->delay_probe_timer); | 2736 | init_timer(&mdev->delay_probe_timer); |
| @@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) | |||
| 3799 | if (ret) { | 3783 | if (ret) { |
| 3800 | fault_count++; | 3784 | fault_count++; |
| 3801 | 3785 | ||
| 3802 | if (printk_ratelimit()) | 3786 | if (__ratelimit(&drbd_ratelimit_state)) |
| 3803 | dev_warn(DEV, "***Simulating %s failure\n", | 3787 | dev_warn(DEV, "***Simulating %s failure\n", |
| 3804 | _drbd_fault_str(type)); | 3788 | _drbd_fault_str(type)); |
| 3805 | } | 3789 | } |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc9ab7fb2cc7..dff48701b84d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | #include <linux/unistd.h> | 42 | #include <linux/unistd.h> |
| 43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
| 44 | #include <linux/random.h> | 44 | #include <linux/random.h> |
| 45 | #include <linux/mm.h> | ||
| 46 | #include <linux/string.h> | 45 | #include <linux/string.h> |
| 47 | #include <linux/scatterlist.h> | 46 | #include <linux/scatterlist.h> |
| 48 | #include "drbd_int.h" | 47 | #include "drbd_int.h" |
| @@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) | |||
| 571 | return rv; | 570 | return rv; |
| 572 | } | 571 | } |
| 573 | 572 | ||
| 573 | /* quoting tcp(7): | ||
| 574 | * On individual connections, the socket buffer size must be set prior to the | ||
| 575 | * listen(2) or connect(2) calls in order to have it take effect. | ||
| 576 | * This is our wrapper to do so. | ||
| 577 | */ | ||
| 578 | static void drbd_setbufsize(struct socket *sock, unsigned int snd, | ||
| 579 | unsigned int rcv) | ||
| 580 | { | ||
| 581 | /* open coded SO_SNDBUF, SO_RCVBUF */ | ||
| 582 | if (snd) { | ||
| 583 | sock->sk->sk_sndbuf = snd; | ||
| 584 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
| 585 | } | ||
| 586 | if (rcv) { | ||
| 587 | sock->sk->sk_rcvbuf = rcv; | ||
| 588 | sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
| 589 | } | ||
| 590 | } | ||
| 591 | |||
| 574 | static struct socket *drbd_try_connect(struct drbd_conf *mdev) | 592 | static struct socket *drbd_try_connect(struct drbd_conf *mdev) |
| 575 | { | 593 | { |
| 576 | const char *what; | 594 | const char *what; |
| @@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) | |||
| 592 | 610 | ||
| 593 | sock->sk->sk_rcvtimeo = | 611 | sock->sk->sk_rcvtimeo = |
| 594 | sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; | 612 | sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; |
| 613 | drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, | ||
| 614 | mdev->net_conf->rcvbuf_size); | ||
| 595 | 615 | ||
| 596 | /* explicitly bind to the configured IP as source IP | 616 | /* explicitly bind to the configured IP as source IP |
| 597 | * for the outgoing connections. | 617 | * for the outgoing connections. |
| @@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) | |||
| 670 | s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ | 690 | s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ |
| 671 | s_listen->sk->sk_rcvtimeo = timeo; | 691 | s_listen->sk->sk_rcvtimeo = timeo; |
| 672 | s_listen->sk->sk_sndtimeo = timeo; | 692 | s_listen->sk->sk_sndtimeo = timeo; |
| 693 | drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, | ||
| 694 | mdev->net_conf->rcvbuf_size); | ||
| 673 | 695 | ||
| 674 | what = "bind before listen"; | 696 | what = "bind before listen"; |
| 675 | err = s_listen->ops->bind(s_listen, | 697 | err = s_listen->ops->bind(s_listen, |
| @@ -856,16 +878,6 @@ retry: | |||
| 856 | sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; | 878 | sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; |
| 857 | msock->sk->sk_priority = TC_PRIO_INTERACTIVE; | 879 | msock->sk->sk_priority = TC_PRIO_INTERACTIVE; |
| 858 | 880 | ||
| 859 | if (mdev->net_conf->sndbuf_size) { | ||
| 860 | sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; | ||
| 861 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
| 862 | } | ||
| 863 | |||
| 864 | if (mdev->net_conf->rcvbuf_size) { | ||
| 865 | sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; | ||
| 866 | sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
| 867 | } | ||
| 868 | |||
| 869 | /* NOT YET ... | 881 | /* NOT YET ... |
| 870 | * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; | 882 | * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; |
| 871 | * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; | 883 | * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; |
| @@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | |||
| 1154 | unsigned n_bios = 0; | 1166 | unsigned n_bios = 0; |
| 1155 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | 1167 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; |
| 1156 | 1168 | ||
| 1157 | if (atomic_read(&mdev->new_c_uuid)) { | ||
| 1158 | if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { | ||
| 1159 | drbd_uuid_new_current(mdev); | ||
| 1160 | drbd_md_sync(mdev); | ||
| 1161 | |||
| 1162 | atomic_dec(&mdev->new_c_uuid); | ||
| 1163 | wake_up(&mdev->misc_wait); | ||
| 1164 | } | ||
| 1165 | wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /* In most cases, we will only need one bio. But in case the lower | 1169 | /* In most cases, we will only need one bio. But in case the lower |
| 1169 | * level restrictions happen to be different at this offset on this | 1170 | * level restrictions happen to be different at this offset on this |
| 1170 | * side than those of the sending peer, we may need to submit the | 1171 | * side than those of the sending peer, we may need to submit the |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3397f11d0ba9..654f1ef5cbb0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
| @@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const | |||
| 102 | } | 102 | } |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | /* if it was a local io error, we want to notify our | 105 | drbd_req_free(req); |
| 106 | * peer about that, and see if we need to | ||
| 107 | * detach the disk and stuff. | ||
| 108 | * to avoid allocating some special work | ||
| 109 | * struct, reuse the request. */ | ||
| 110 | |||
| 111 | /* THINK | ||
| 112 | * why do we do this not when we detect the error, | ||
| 113 | * but delay it until it is "done", i.e. possibly | ||
| 114 | * until the next barrier ack? */ | ||
| 115 | |||
| 116 | if (rw == WRITE && | ||
| 117 | ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { | ||
| 118 | if (!(req->w.list.next == LIST_POISON1 || | ||
| 119 | list_empty(&req->w.list))) { | ||
| 120 | /* DEBUG ASSERT only; if this triggers, we | ||
| 121 | * probably corrupt the worker list here */ | ||
| 122 | dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); | ||
| 123 | dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); | ||
| 124 | } | ||
| 125 | req->w.cb = w_io_error; | ||
| 126 | drbd_queue_work(&mdev->data.work, &req->w); | ||
| 127 | /* drbd_req_free() is done in w_io_error */ | ||
| 128 | } else { | ||
| 129 | drbd_req_free(req); | ||
| 130 | } | ||
| 131 | } | 106 | } |
| 132 | 107 | ||
| 133 | static void queue_barrier(struct drbd_conf *mdev) | 108 | static void queue_barrier(struct drbd_conf *mdev) |
| @@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 453 | req->rq_state |= RQ_LOCAL_COMPLETED; | 428 | req->rq_state |= RQ_LOCAL_COMPLETED; |
| 454 | req->rq_state &= ~RQ_LOCAL_PENDING; | 429 | req->rq_state &= ~RQ_LOCAL_PENDING; |
| 455 | 430 | ||
| 456 | dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", | ||
| 457 | (unsigned long long)req->sector, req->size); | ||
| 458 | /* and now: check how to handle local io error. */ | ||
| 459 | __drbd_chk_io_error(mdev, FALSE); | 431 | __drbd_chk_io_error(mdev, FALSE); |
| 460 | _req_may_be_done(req, m); | 432 | _req_may_be_done(req, m); |
| 461 | put_ldev(mdev); | 433 | put_ldev(mdev); |
| @@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 475 | req->rq_state |= RQ_LOCAL_COMPLETED; | 447 | req->rq_state |= RQ_LOCAL_COMPLETED; |
| 476 | req->rq_state &= ~RQ_LOCAL_PENDING; | 448 | req->rq_state &= ~RQ_LOCAL_PENDING; |
| 477 | 449 | ||
| 478 | dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", | ||
| 479 | (unsigned long long)req->sector, req->size); | ||
| 480 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
| 481 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 450 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
| 482 | req->rq_state |= RQ_NET_PENDING; | ||
| 483 | inc_ap_pending(mdev); | ||
| 484 | 451 | ||
| 485 | __drbd_chk_io_error(mdev, FALSE); | 452 | __drbd_chk_io_error(mdev, FALSE); |
| 486 | put_ldev(mdev); | 453 | put_ldev(mdev); |
| 487 | /* NOTE: if we have no connection, | ||
| 488 | * or know the peer has no good data either, | ||
| 489 | * then we don't actually need to "queue_for_net_read", | ||
| 490 | * but we do so anyways, since the drbd_io_error() | ||
| 491 | * and the potential state change to "Diskless" | ||
| 492 | * needs to be done from process context */ | ||
| 493 | 454 | ||
| 455 | /* no point in retrying if there is no good remote data, | ||
| 456 | * or we have no connection. */ | ||
| 457 | if (mdev->state.pdsk != D_UP_TO_DATE) { | ||
| 458 | _req_may_be_done(req, m); | ||
| 459 | break; | ||
| 460 | } | ||
| 461 | |||
| 462 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
| 463 | req->rq_state |= RQ_NET_PENDING; | ||
| 464 | inc_ap_pending(mdev); | ||
| 494 | /* fall through: _req_mod(req,queue_for_net_read); */ | 465 | /* fall through: _req_mod(req,queue_for_net_read); */ |
| 495 | 466 | ||
| 496 | case queue_for_net_read: | 467 | case queue_for_net_read: |
| @@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| 600 | _req_may_be_done(req, m); | 571 | _req_may_be_done(req, m); |
| 601 | break; | 572 | break; |
| 602 | 573 | ||
| 574 | case read_retry_remote_canceled: | ||
| 575 | req->rq_state &= ~RQ_NET_QUEUED; | ||
| 576 | /* fall through, in case we raced with drbd_disconnect */ | ||
| 603 | case connection_lost_while_pending: | 577 | case connection_lost_while_pending: |
| 604 | /* transfer log cleanup after connection loss */ | 578 | /* transfer log cleanup after connection loss */ |
| 605 | /* assert something? */ | 579 | /* assert something? */ |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 16119d7056cc..02d575d24518 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
| @@ -91,6 +91,7 @@ enum drbd_req_event { | |||
| 91 | send_failed, | 91 | send_failed, |
| 92 | handed_over_to_network, | 92 | handed_over_to_network, |
| 93 | connection_lost_while_pending, | 93 | connection_lost_while_pending, |
| 94 | read_retry_remote_canceled, | ||
| 94 | recv_acked_by_peer, | 95 | recv_acked_by_peer, |
| 95 | write_acked_by_peer, | 96 | write_acked_by_peer, |
| 96 | write_acked_by_peer_and_sis, /* and set_in_sync */ | 97 | write_acked_by_peer_and_sis, /* and set_in_sync */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 727ff6339754..b623ceee2a4a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
| @@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error) | |||
| 224 | enum drbd_req_event what; | 224 | enum drbd_req_event what; |
| 225 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 225 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
| 226 | 226 | ||
| 227 | if (error) | ||
| 228 | dev_warn(DEV, "p %s: error=%d\n", | ||
| 229 | bio_data_dir(bio) == WRITE ? "write" : "read", error); | ||
| 230 | if (!error && !uptodate) { | 227 | if (!error && !uptodate) { |
| 231 | dev_warn(DEV, "p %s: setting error to -EIO\n", | 228 | dev_warn(DEV, "p %s: setting error to -EIO\n", |
| 232 | bio_data_dir(bio) == WRITE ? "write" : "read"); | 229 | bio_data_dir(bio) == WRITE ? "write" : "read"); |
| @@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error) | |||
| 257 | complete_master_bio(mdev, &m); | 254 | complete_master_bio(mdev, &m); |
| 258 | } | 255 | } |
| 259 | 256 | ||
| 260 | int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
| 261 | { | ||
| 262 | struct drbd_request *req = container_of(w, struct drbd_request, w); | ||
| 263 | |||
| 264 | /* NOTE: mdev->ldev can be NULL by the time we get here! */ | ||
| 265 | /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ | ||
| 266 | |||
| 267 | /* the only way this callback is scheduled is from _req_may_be_done, | ||
| 268 | * when it is done and had a local write error, see comments there */ | ||
| 269 | drbd_req_free(req); | ||
| 270 | |||
| 271 | return TRUE; | ||
| 272 | } | ||
| 273 | |||
| 274 | int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | 257 | int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) |
| 275 | { | 258 | { |
| 276 | struct drbd_request *req = container_of(w, struct drbd_request, w); | 259 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
| @@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
| 280 | * to give the disk the chance to relocate that block */ | 263 | * to give the disk the chance to relocate that block */ |
| 281 | 264 | ||
| 282 | spin_lock_irq(&mdev->req_lock); | 265 | spin_lock_irq(&mdev->req_lock); |
| 283 | if (cancel || | 266 | if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { |
| 284 | mdev->state.conn < C_CONNECTED || | 267 | _req_mod(req, read_retry_remote_canceled); |
| 285 | mdev->state.pdsk <= D_INCONSISTENT) { | ||
| 286 | _req_mod(req, send_canceled); | ||
| 287 | spin_unlock_irq(&mdev->req_lock); | 268 | spin_unlock_irq(&mdev->req_lock); |
| 288 | dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); | ||
| 289 | return 1; | 269 | return 1; |
| 290 | } | 270 | } |
| 291 | spin_unlock_irq(&mdev->req_lock); | 271 | spin_unlock_irq(&mdev->req_lock); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ea8592b90696..1d1088f48bc2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -45,7 +45,6 @@ struct wb_writeback_args { | |||
| 45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
| 46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
| 47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
| 48 | unsigned int sb_pinned:1; | ||
| 49 | }; | 48 | }; |
| 50 | 49 | ||
| 51 | /* | 50 | /* |
| @@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
| 193 | } | 192 | } |
| 194 | 193 | ||
| 195 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
| 196 | struct wb_writeback_args *args, | 195 | struct wb_writeback_args *args) |
| 197 | int wait) | ||
| 198 | { | 196 | { |
| 199 | struct bdi_work *work; | 197 | struct bdi_work *work; |
| 200 | 198 | ||
| @@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
| 206 | if (work) { | 204 | if (work) { |
| 207 | bdi_work_init(work, args); | 205 | bdi_work_init(work, args); |
| 208 | bdi_queue_work(bdi, work); | 206 | bdi_queue_work(bdi, work); |
| 209 | if (wait) | ||
| 210 | bdi_wait_on_work_clear(work); | ||
| 211 | } else { | 207 | } else { |
| 212 | struct bdi_writeback *wb = &bdi->wb; | 208 | struct bdi_writeback *wb = &bdi->wb; |
| 213 | 209 | ||
| @@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
| 234 | .sync_mode = WB_SYNC_ALL, | 230 | .sync_mode = WB_SYNC_ALL, |
| 235 | .nr_pages = LONG_MAX, | 231 | .nr_pages = LONG_MAX, |
| 236 | .range_cyclic = 0, | 232 | .range_cyclic = 0, |
| 237 | /* | ||
| 238 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
| 239 | * lets make it explicitly clear. | ||
| 240 | */ | ||
| 241 | .sb_pinned = 1, | ||
| 242 | }; | 233 | }; |
| 243 | struct bdi_work work; | 234 | struct bdi_work work; |
| 244 | 235 | ||
| @@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
| 254 | * @bdi: the backing device to write from | 245 | * @bdi: the backing device to write from |
| 255 | * @sb: write inodes from this super_block | 246 | * @sb: write inodes from this super_block |
| 256 | * @nr_pages: the number of pages to write | 247 | * @nr_pages: the number of pages to write |
| 257 | * @sb_locked: caller already holds sb umount sem. | ||
| 258 | * | 248 | * |
| 259 | * Description: | 249 | * Description: |
| 260 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
| 261 | * started when this function returns, we make no guarentees on | 251 | * started when this function returns, we make no guarentees on |
| 262 | * completion. Caller specifies whether sb umount sem is held already or not. | 252 | * completion. Caller need not hold sb s_umount semaphore. |
| 263 | * | 253 | * |
| 264 | */ | 254 | */ |
| 265 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
| 266 | long nr_pages, int sb_locked) | 256 | long nr_pages) |
| 267 | { | 257 | { |
| 268 | struct wb_writeback_args args = { | 258 | struct wb_writeback_args args = { |
| 269 | .sb = sb, | 259 | .sb = sb, |
| 270 | .sync_mode = WB_SYNC_NONE, | 260 | .sync_mode = WB_SYNC_NONE, |
| 271 | .nr_pages = nr_pages, | 261 | .nr_pages = nr_pages, |
| 272 | .range_cyclic = 1, | 262 | .range_cyclic = 1, |
| 273 | .sb_pinned = sb_locked, | ||
| 274 | }; | 263 | }; |
| 275 | 264 | ||
| 276 | /* | 265 | /* |
| @@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | |||
| 282 | args.for_background = 1; | 271 | args.for_background = 1; |
| 283 | } | 272 | } |
| 284 | 273 | ||
| 285 | bdi_alloc_queue_work(bdi, &args, sb_locked); | 274 | bdi_alloc_queue_work(bdi, &args); |
| 286 | } | 275 | } |
| 287 | 276 | ||
| 288 | /* | 277 | /* |
| @@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
| 595 | /* | 584 | /* |
| 596 | * Caller must already hold the ref for this | 585 | * Caller must already hold the ref for this |
| 597 | */ | 586 | */ |
| 598 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { | 587 | if (wbc->sync_mode == WB_SYNC_ALL) { |
| 599 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 588 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 600 | return SB_NOT_PINNED; | 589 | return SB_NOT_PINNED; |
| 601 | } | 590 | } |
| @@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 769 | .for_kupdate = args->for_kupdate, | 758 | .for_kupdate = args->for_kupdate, |
| 770 | .for_background = args->for_background, | 759 | .for_background = args->for_background, |
| 771 | .range_cyclic = args->range_cyclic, | 760 | .range_cyclic = args->range_cyclic, |
| 772 | .sb_pinned = args->sb_pinned, | ||
| 773 | }; | 761 | }; |
| 774 | unsigned long oldest_jif; | 762 | unsigned long oldest_jif; |
| 775 | long wrote = 0; | 763 | long wrote = 0; |
| @@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 912 | 900 | ||
| 913 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 901 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
| 914 | struct wb_writeback_args args = work->args; | 902 | struct wb_writeback_args args = work->args; |
| 915 | int post_clear; | ||
| 916 | 903 | ||
| 917 | /* | 904 | /* |
| 918 | * Override sync mode, in case we must wait for completion | 905 | * Override sync mode, in case we must wait for completion |
| @@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 920 | if (force_wait) | 907 | if (force_wait) |
| 921 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 908 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
| 922 | 909 | ||
| 923 | post_clear = WB_SYNC_ALL || args.sb_pinned; | ||
| 924 | |||
| 925 | /* | 910 | /* |
| 926 | * If this isn't a data integrity operation, just notify | 911 | * If this isn't a data integrity operation, just notify |
| 927 | * that we have seen this work and we are now starting it. | 912 | * that we have seen this work and we are now starting it. |
| 928 | */ | 913 | */ |
| 929 | if (!post_clear) | 914 | if (args.sync_mode == WB_SYNC_NONE) |
| 930 | wb_clear_pending(wb, work); | 915 | wb_clear_pending(wb, work); |
| 931 | 916 | ||
| 932 | wrote += wb_writeback(wb, &args); | 917 | wrote += wb_writeback(wb, &args); |
| @@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 935 | * This is a data integrity writeback, so only do the | 920 | * This is a data integrity writeback, so only do the |
| 936 | * notification when we have completed the work. | 921 | * notification when we have completed the work. |
| 937 | */ | 922 | */ |
| 938 | if (post_clear) | 923 | if (args.sync_mode == WB_SYNC_ALL) |
| 939 | wb_clear_pending(wb, work); | 924 | wb_clear_pending(wb, work); |
| 940 | } | 925 | } |
| 941 | 926 | ||
| @@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) | |||
| 1011 | if (!bdi_has_dirty_io(bdi)) | 996 | if (!bdi_has_dirty_io(bdi)) |
| 1012 | continue; | 997 | continue; |
| 1013 | 998 | ||
| 1014 | bdi_alloc_queue_work(bdi, &args, 0); | 999 | bdi_alloc_queue_work(bdi, &args); |
| 1015 | } | 1000 | } |
| 1016 | 1001 | ||
| 1017 | rcu_read_unlock(); | 1002 | rcu_read_unlock(); |
| @@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb) | |||
| 1220 | iput(old_inode); | 1205 | iput(old_inode); |
| 1221 | } | 1206 | } |
| 1222 | 1207 | ||
| 1223 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
| 1224 | { | ||
| 1225 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
| 1226 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
| 1227 | long nr_to_write; | ||
| 1228 | |||
| 1229 | nr_to_write = nr_dirty + nr_unstable + | ||
| 1230 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
| 1231 | |||
| 1232 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | /** | 1208 | /** |
| 1236 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1209 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
| 1237 | * @sb: the superblock | 1210 | * @sb: the superblock |
| @@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | |||
| 1243 | */ | 1216 | */ |
| 1244 | void writeback_inodes_sb(struct super_block *sb) | 1217 | void writeback_inodes_sb(struct super_block *sb) |
| 1245 | { | 1218 | { |
| 1246 | __writeback_inodes_sb(sb, 0); | 1219 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
| 1247 | } | 1220 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
| 1248 | EXPORT_SYMBOL(writeback_inodes_sb); | 1221 | long nr_to_write; |
| 1249 | 1222 | ||
| 1250 | /** | 1223 | nr_to_write = nr_dirty + nr_unstable + |
| 1251 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | 1224 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 1252 | * @sb: the superblock | 1225 | |
| 1253 | * | 1226 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); |
| 1254 | * Like writeback_inodes_sb(), except the caller already holds the | ||
| 1255 | * sb umount sem. | ||
| 1256 | */ | ||
| 1257 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
| 1258 | { | ||
| 1259 | __writeback_inodes_sb(sb, 1); | ||
| 1260 | } | 1227 | } |
| 1228 | EXPORT_SYMBOL(writeback_inodes_sb); | ||
| 1261 | 1229 | ||
| 1262 | /** | 1230 | /** |
| 1263 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1231 | * writeback_inodes_sb_if_idle - start writeback if none underway |
| @@ -26,9 +26,14 @@ | |||
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * The max size that a non-root user is allowed to grow the pipe. Can | 28 | * The max size that a non-root user is allowed to grow the pipe. Can |
| 29 | * be set by root in /proc/sys/fs/pipe-max-pages | 29 | * be set by root in /proc/sys/fs/pipe-max-size |
| 30 | */ | 30 | */ |
| 31 | unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; | 31 | unsigned int pipe_max_size = 1048576; |
| 32 | |||
| 33 | /* | ||
| 34 | * Minimum pipe size, as required by POSIX | ||
| 35 | */ | ||
| 36 | unsigned int pipe_min_size = PAGE_SIZE; | ||
| 32 | 37 | ||
| 33 | /* | 38 | /* |
| 34 | * We use a start+len construction, which provides full use of the | 39 | * We use a start+len construction, which provides full use of the |
| @@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
| 1118 | * Allocate a new array of pipe buffers and copy the info over. Returns the | 1123 | * Allocate a new array of pipe buffers and copy the info over. Returns the |
| 1119 | * pipe size if successful, or return -ERROR on error. | 1124 | * pipe size if successful, or return -ERROR on error. |
| 1120 | */ | 1125 | */ |
| 1121 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | 1126 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) |
| 1122 | { | 1127 | { |
| 1123 | struct pipe_buffer *bufs; | 1128 | struct pipe_buffer *bufs; |
| 1124 | 1129 | ||
| 1125 | /* | 1130 | /* |
| 1126 | * Must be a power-of-2 currently | ||
| 1127 | */ | ||
| 1128 | if (!is_power_of_2(arg)) | ||
| 1129 | return -EINVAL; | ||
| 1130 | |||
| 1131 | /* | ||
| 1132 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't | 1131 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't |
| 1133 | * expect a lot of shrink+grow operations, just free and allocate | 1132 | * expect a lot of shrink+grow operations, just free and allocate |
| 1134 | * again like we would do for growing. If the pipe currently | 1133 | * again like we would do for growing. If the pipe currently |
| 1135 | * contains more buffers than arg, then return busy. | 1134 | * contains more buffers than arg, then return busy. |
| 1136 | */ | 1135 | */ |
| 1137 | if (arg < pipe->nrbufs) | 1136 | if (nr_pages < pipe->nrbufs) |
| 1138 | return -EBUSY; | 1137 | return -EBUSY; |
| 1139 | 1138 | ||
| 1140 | bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); | 1139 | bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); |
| 1141 | if (unlikely(!bufs)) | 1140 | if (unlikely(!bufs)) |
| 1142 | return -ENOMEM; | 1141 | return -ENOMEM; |
| 1143 | 1142 | ||
| @@ -1158,8 +1157,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | |||
| 1158 | pipe->curbuf = 0; | 1157 | pipe->curbuf = 0; |
| 1159 | kfree(pipe->bufs); | 1158 | kfree(pipe->bufs); |
| 1160 | pipe->bufs = bufs; | 1159 | pipe->bufs = bufs; |
| 1161 | pipe->buffers = arg; | 1160 | pipe->buffers = nr_pages; |
| 1162 | return arg; | 1161 | return nr_pages * PAGE_SIZE; |
| 1162 | } | ||
| 1163 | |||
| 1164 | /* | ||
| 1165 | * Currently we rely on the pipe array holding a power-of-2 number | ||
| 1166 | * of pages. | ||
| 1167 | */ | ||
| 1168 | static inline unsigned int round_pipe_size(unsigned int size) | ||
| 1169 | { | ||
| 1170 | unsigned long nr_pages; | ||
| 1171 | |||
| 1172 | nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 1173 | return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | /* | ||
| 1177 | * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax | ||
| 1178 | * will return an error. | ||
| 1179 | */ | ||
| 1180 | int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, | ||
| 1181 | size_t *lenp, loff_t *ppos) | ||
| 1182 | { | ||
| 1183 | int ret; | ||
| 1184 | |||
| 1185 | ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); | ||
| 1186 | if (ret < 0 || !write) | ||
| 1187 | return ret; | ||
| 1188 | |||
| 1189 | pipe_max_size = round_pipe_size(pipe_max_size); | ||
| 1190 | return ret; | ||
| 1163 | } | 1191 | } |
| 1164 | 1192 | ||
| 1165 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | 1193 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) |
| @@ -1174,23 +1202,24 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 1174 | mutex_lock(&pipe->inode->i_mutex); | 1202 | mutex_lock(&pipe->inode->i_mutex); |
| 1175 | 1203 | ||
| 1176 | switch (cmd) { | 1204 | switch (cmd) { |
| 1177 | case F_SETPIPE_SZ: | 1205 | case F_SETPIPE_SZ: { |
| 1178 | if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { | 1206 | unsigned int size, nr_pages; |
| 1179 | ret = -EINVAL; | 1207 | |
| 1208 | size = round_pipe_size(arg); | ||
| 1209 | nr_pages = size >> PAGE_SHIFT; | ||
| 1210 | |||
| 1211 | if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { | ||
| 1212 | ret = -EPERM; | ||
| 1180 | goto out; | 1213 | goto out; |
| 1181 | } | 1214 | } else if (nr_pages < PAGE_SIZE) { |
| 1182 | /* | ||
| 1183 | * The pipe needs to be at least 2 pages large to | ||
| 1184 | * guarantee POSIX behaviour. | ||
| 1185 | */ | ||
| 1186 | if (arg < 2) { | ||
| 1187 | ret = -EINVAL; | 1215 | ret = -EINVAL; |
| 1188 | goto out; | 1216 | goto out; |
| 1189 | } | 1217 | } |
| 1190 | ret = pipe_set_size(pipe, arg); | 1218 | ret = pipe_set_size(pipe, nr_pages); |
| 1191 | break; | 1219 | break; |
| 1220 | } | ||
| 1192 | case F_GETPIPE_SZ: | 1221 | case F_GETPIPE_SZ: |
| 1193 | ret = pipe->buffers; | 1222 | ret = pipe->buffers * PAGE_SIZE; |
| 1194 | break; | 1223 | break; |
| 1195 | default: | 1224 | default: |
| 1196 | ret = -EINVAL; | 1225 | ret = -EINVAL; |
diff --git a/fs/splice.c b/fs/splice.c index ac22b00d86c3..740e6b9faf7a 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
| 354 | break; | 354 | break; |
| 355 | 355 | ||
| 356 | error = add_to_page_cache_lru(page, mapping, index, | 356 | error = add_to_page_cache_lru(page, mapping, index, |
| 357 | mapping_gfp_mask(mapping)); | 357 | GFP_KERNEL); |
| 358 | if (unlikely(error)) { | 358 | if (unlikely(error)) { |
| 359 | page_cache_release(page); | 359 | page_cache_release(page); |
| 360 | if (error == -EEXIST) | 360 | if (error == -EEXIST) |
| @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
| 42 | if (wait) | 42 | if (wait) |
| 43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
| 44 | else | 44 | else |
| 45 | writeback_inodes_sb_locked(sb); | 45 | writeback_inodes_sb(sb); |
| 46 | 46 | ||
| 47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
| 48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e6e0cb5437e6..aee5f6ce166e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | |||
| 106 | void bdi_unregister(struct backing_dev_info *bdi); | 106 | void bdi_unregister(struct backing_dev_info *bdi); |
| 107 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | 107 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); |
| 108 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 108 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
| 109 | long nr_pages, int sb_locked); | 109 | long nr_pages); |
| 110 | int bdi_writeback_task(struct bdi_writeback *wb); | 110 | int bdi_writeback_task(struct bdi_writeback *wb); |
| 111 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 111 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
| 112 | void bdi_arm_supers_timer(void); | 112 | void bdi_arm_supers_timer(void); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8b7f5e0914ad..09a840264d6f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -1211,14 +1211,23 @@ struct work_struct; | |||
| 1211 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1211 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); |
| 1212 | 1212 | ||
| 1213 | #ifdef CONFIG_BLK_CGROUP | 1213 | #ifdef CONFIG_BLK_CGROUP |
| 1214 | /* | ||
| 1215 | * This should not be using sched_clock(). A real patch is in progress | ||
| 1216 | * to fix this up, until that is in place we need to disable preemption | ||
| 1217 | * around sched_clock() in this function and set_io_start_time_ns(). | ||
| 1218 | */ | ||
| 1214 | static inline void set_start_time_ns(struct request *req) | 1219 | static inline void set_start_time_ns(struct request *req) |
| 1215 | { | 1220 | { |
| 1221 | preempt_disable(); | ||
| 1216 | req->start_time_ns = sched_clock(); | 1222 | req->start_time_ns = sched_clock(); |
| 1223 | preempt_enable(); | ||
| 1217 | } | 1224 | } |
| 1218 | 1225 | ||
| 1219 | static inline void set_io_start_time_ns(struct request *req) | 1226 | static inline void set_io_start_time_ns(struct request *req) |
| 1220 | { | 1227 | { |
| 1228 | preempt_disable(); | ||
| 1221 | req->io_start_time_ns = sched_clock(); | 1229 | req->io_start_time_ns = sched_clock(); |
| 1230 | preempt_enable(); | ||
| 1222 | } | 1231 | } |
| 1223 | 1232 | ||
| 1224 | static inline uint64_t rq_start_time_ns(struct request *req) | 1233 | static inline uint64_t rq_start_time_ns(struct request *req) |
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 68530521ad00..30da4ae48972 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h | |||
| @@ -53,7 +53,7 @@ | |||
| 53 | 53 | ||
| 54 | 54 | ||
| 55 | extern const char *drbd_buildtag(void); | 55 | extern const char *drbd_buildtag(void); |
| 56 | #define REL_VERSION "8.3.8rc1" | 56 | #define REL_VERSION "8.3.8rc2" |
| 57 | #define API_VERSION 88 | 57 | #define API_VERSION 88 |
| 58 | #define PRO_VERSION_MIN 86 | 58 | #define PRO_VERSION_MIN 86 |
| 59 | #define PRO_VERSION_MAX 94 | 59 | #define PRO_VERSION_MAX 94 |
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index a0bb301afac0..64d529133031 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | struct cfq_queue; | 7 | struct cfq_queue; |
| 8 | struct cfq_io_context { | 8 | struct cfq_io_context { |
| 9 | void *key; | 9 | void *key; |
| 10 | unsigned long dead_key; | ||
| 11 | 10 | ||
| 12 | struct cfq_queue *cfqq[2]; | 11 | struct cfq_queue *cfqq[2]; |
| 13 | 12 | ||
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 16de3933c45e..445796945ac9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
| @@ -139,7 +139,9 @@ void pipe_lock(struct pipe_inode_info *); | |||
| 139 | void pipe_unlock(struct pipe_inode_info *); | 139 | void pipe_unlock(struct pipe_inode_info *); |
| 140 | void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); | 140 | void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); |
| 141 | 141 | ||
| 142 | extern unsigned int pipe_max_pages; | 142 | extern unsigned int pipe_max_size, pipe_min_size; |
| 143 | int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); | ||
| 144 | |||
| 143 | 145 | ||
| 144 | /* Drop the inode semaphore and wait for a pipe event, atomically */ | 146 | /* Drop the inode semaphore and wait for a pipe event, atomically */ |
| 145 | void pipe_wait(struct pipe_inode_info *pipe); | 147 | void pipe_wait(struct pipe_inode_info *pipe); |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index cc97d6caf2b3..f64134653a8c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -65,15 +65,6 @@ struct writeback_control { | |||
| 65 | * so we use a single control to update them | 65 | * so we use a single control to update them |
| 66 | */ | 66 | */ |
| 67 | unsigned no_nrwrite_index_update:1; | 67 | unsigned no_nrwrite_index_update:1; |
| 68 | |||
| 69 | /* | ||
| 70 | * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, | ||
| 71 | * the writeback code will pin the sb for the caller. However, | ||
| 72 | * for eg umount, the caller does WB_SYNC_NONE but already has | ||
| 73 | * the sb pinned. If the below is set, caller already has the | ||
| 74 | * sb pinned. | ||
| 75 | */ | ||
| 76 | unsigned sb_pinned:1; | ||
| 77 | }; | 68 | }; |
| 78 | 69 | ||
| 79 | /* | 70 | /* |
| @@ -82,7 +73,6 @@ struct writeback_control { | |||
| 82 | struct bdi_writeback; | 73 | struct bdi_writeback; |
| 83 | int inode_wait(void *); | 74 | int inode_wait(void *); |
| 84 | void writeback_inodes_sb(struct super_block *); | 75 | void writeback_inodes_sb(struct super_block *); |
| 85 | void writeback_inodes_sb_locked(struct super_block *); | ||
| 86 | int writeback_inodes_sb_if_idle(struct super_block *); | 76 | int writeback_inodes_sb_if_idle(struct super_block *); |
| 87 | void sync_inodes_sb(struct super_block *); | 77 | void sync_inodes_sb(struct super_block *); |
| 88 | void writeback_inodes_wbc(struct writeback_control *wbc); | 78 | void writeback_inodes_wbc(struct writeback_control *wbc); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 997080f00e0b..d24f761f4876 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = { | |||
| 1471 | }, | 1471 | }, |
| 1472 | #endif | 1472 | #endif |
| 1473 | { | 1473 | { |
| 1474 | .procname = "pipe-max-pages", | 1474 | .procname = "pipe-max-size", |
| 1475 | .data = &pipe_max_pages, | 1475 | .data = &pipe_max_size, |
| 1476 | .maxlen = sizeof(int), | 1476 | .maxlen = sizeof(int), |
| 1477 | .mode = 0644, | 1477 | .mode = 0644, |
| 1478 | .proc_handler = &proc_dointvec_minmax, | 1478 | .proc_handler = &pipe_proc_fn, |
| 1479 | .extra1 = &two, | 1479 | .extra1 = &pipe_min_size, |
| 1480 | }, | 1480 | }, |
| 1481 | /* | 1481 | /* |
| 1482 | * NOTE: do not add new entries to this table unless you have read | 1482 | * NOTE: do not add new entries to this table unless you have read |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b289310e2c89..5fa63bdf52e4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
| 597 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) | 597 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) |
| 598 | + global_page_state(NR_UNSTABLE_NFS)) | 598 | + global_page_state(NR_UNSTABLE_NFS)) |
| 599 | > background_thresh))) | 599 | > background_thresh))) |
| 600 | bdi_start_writeback(bdi, NULL, 0, 0); | 600 | bdi_start_writeback(bdi, NULL, 0); |
| 601 | } | 601 | } |
| 602 | 602 | ||
| 603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
| @@ -707,7 +707,7 @@ void laptop_mode_timer_fn(unsigned long data) | |||
| 707 | */ | 707 | */ |
| 708 | 708 | ||
| 709 | if (bdi_has_dirty_io(&q->backing_dev_info)) | 709 | if (bdi_has_dirty_io(&q->backing_dev_info)) |
| 710 | bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages, 0); | 710 | bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); |
| 711 | } | 711 | } |
| 712 | 712 | ||
| 713 | /* | 713 | /* |
