diff options
Diffstat (limited to 'ipc/util.c')
-rw-r--r-- | ipc/util.c | 232 |
1 files changed, 114 insertions, 118 deletions
diff --git a/ipc/util.c b/ipc/util.c index 464a8abd779f..809ec5ec8122 100644 --- a/ipc/util.c +++ b/ipc/util.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/msg.h> | 23 | #include <linux/msg.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/notifier.h> | ||
26 | #include <linux/capability.h> | 27 | #include <linux/capability.h> |
27 | #include <linux/highuid.h> | 28 | #include <linux/highuid.h> |
28 | #include <linux/security.h> | 29 | #include <linux/security.h> |
@@ -47,19 +48,16 @@ struct ipc_proc_iface { | |||
47 | int (*show)(struct seq_file *, void *); | 48 | int (*show)(struct seq_file *, void *); |
48 | }; | 49 | }; |
49 | 50 | ||
50 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
51 | |||
52 | static void ipc_memory_notifier(struct work_struct *work) | 51 | static void ipc_memory_notifier(struct work_struct *work) |
53 | { | 52 | { |
54 | ipcns_notify(IPCNS_MEMCHANGED); | 53 | ipcns_notify(IPCNS_MEMCHANGED); |
55 | } | 54 | } |
56 | 55 | ||
57 | static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); | ||
58 | |||
59 | |||
60 | static int ipc_memory_callback(struct notifier_block *self, | 56 | static int ipc_memory_callback(struct notifier_block *self, |
61 | unsigned long action, void *arg) | 57 | unsigned long action, void *arg) |
62 | { | 58 | { |
59 | static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); | ||
60 | |||
63 | switch (action) { | 61 | switch (action) { |
64 | case MEM_ONLINE: /* memory successfully brought online */ | 62 | case MEM_ONLINE: /* memory successfully brought online */ |
65 | case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ | 63 | case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ |
@@ -85,7 +83,10 @@ static int ipc_memory_callback(struct notifier_block *self, | |||
85 | return NOTIFY_OK; | 83 | return NOTIFY_OK; |
86 | } | 84 | } |
87 | 85 | ||
88 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 86 | static struct notifier_block ipc_memory_nb = { |
87 | .notifier_call = ipc_memory_callback, | ||
88 | .priority = IPC_CALLBACK_PRI, | ||
89 | }; | ||
89 | 90 | ||
90 | /** | 91 | /** |
91 | * ipc_init - initialise IPC subsystem | 92 | * ipc_init - initialise IPC subsystem |
@@ -102,7 +103,7 @@ static int __init ipc_init(void) | |||
102 | sem_init(); | 103 | sem_init(); |
103 | msg_init(); | 104 | msg_init(); |
104 | shm_init(); | 105 | shm_init(); |
105 | hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI); | 106 | register_hotmemory_notifier(&ipc_memory_nb); |
106 | register_ipcns_notifier(&init_ipc_ns); | 107 | register_ipcns_notifier(&init_ipc_ns); |
107 | return 0; | 108 | return 0; |
108 | } | 109 | } |
@@ -438,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) | |||
438 | * NULL is returned if the allocation fails | 439 | * NULL is returned if the allocation fails |
439 | */ | 440 | */ |
440 | 441 | ||
441 | void* ipc_alloc(int size) | 442 | void *ipc_alloc(int size) |
442 | { | 443 | { |
443 | void* out; | 444 | void *out; |
444 | if(size > PAGE_SIZE) | 445 | if(size > PAGE_SIZE) |
445 | out = vmalloc(size); | 446 | out = vmalloc(size); |
446 | else | 447 | else |
@@ -465,126 +466,57 @@ void ipc_free(void* ptr, int size) | |||
465 | kfree(ptr); | 466 | kfree(ptr); |
466 | } | 467 | } |
467 | 468 | ||
468 | /* | 469 | struct ipc_rcu { |
469 | * rcu allocations: | ||
470 | * There are three headers that are prepended to the actual allocation: | ||
471 | * - during use: ipc_rcu_hdr. | ||
472 | * - during the rcu grace period: ipc_rcu_grace. | ||
473 | * - [only if vmalloc]: ipc_rcu_sched. | ||
474 | * Their lifetime doesn't overlap, thus the headers share the same memory. | ||
475 | * Unlike a normal union, they are right-aligned, thus some container_of | ||
476 | * forward/backward casting is necessary: | ||
477 | */ | ||
478 | struct ipc_rcu_hdr | ||
479 | { | ||
480 | int refcount; | ||
481 | int is_vmalloc; | ||
482 | void *data[0]; | ||
483 | }; | ||
484 | |||
485 | |||
486 | struct ipc_rcu_grace | ||
487 | { | ||
488 | struct rcu_head rcu; | 470 | struct rcu_head rcu; |
471 | atomic_t refcount; | ||
489 | /* "void *" makes sure alignment of following data is sane. */ | 472 | /* "void *" makes sure alignment of following data is sane. */ |
490 | void *data[0]; | 473 | void *data[0]; |
491 | }; | 474 | }; |
492 | 475 | ||
493 | struct ipc_rcu_sched | ||
494 | { | ||
495 | struct work_struct work; | ||
496 | /* "void *" makes sure alignment of following data is sane. */ | ||
497 | void *data[0]; | ||
498 | }; | ||
499 | |||
500 | #define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ | ||
501 | sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) | ||
502 | #define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ | ||
503 | sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) | ||
504 | |||
505 | static inline int rcu_use_vmalloc(int size) | ||
506 | { | ||
507 | /* Too big for a single page? */ | ||
508 | if (HDRLEN_KMALLOC + size > PAGE_SIZE) | ||
509 | return 1; | ||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | /** | 476 | /** |
514 | * ipc_rcu_alloc - allocate ipc and rcu space | 477 | * ipc_rcu_alloc - allocate ipc and rcu space |
515 | * @size: size desired | 478 | * @size: size desired |
516 | * | 479 | * |
517 | * Allocate memory for the rcu header structure + the object. | 480 | * Allocate memory for the rcu header structure + the object. |
518 | * Returns the pointer to the object. | 481 | * Returns the pointer to the object or NULL upon failure. |
519 | * NULL is returned if the allocation fails. | ||
520 | */ | 482 | */ |
521 | 483 | void *ipc_rcu_alloc(int size) | |
522 | void* ipc_rcu_alloc(int size) | ||
523 | { | 484 | { |
524 | void* out; | 485 | /* |
525 | /* | 486 | * We prepend the allocation with the rcu struct |
526 | * We prepend the allocation with the rcu struct, and | ||
527 | * workqueue if necessary (for vmalloc). | ||
528 | */ | 487 | */ |
529 | if (rcu_use_vmalloc(size)) { | 488 | struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size); |
530 | out = vmalloc(HDRLEN_VMALLOC + size); | 489 | if (unlikely(!out)) |
531 | if (out) { | 490 | return NULL; |
532 | out += HDRLEN_VMALLOC; | 491 | atomic_set(&out->refcount, 1); |
533 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; | 492 | return out->data; |
534 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; | ||
535 | } | ||
536 | } else { | ||
537 | out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); | ||
538 | if (out) { | ||
539 | out += HDRLEN_KMALLOC; | ||
540 | container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; | ||
541 | container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; | ||
542 | } | ||
543 | } | ||
544 | |||
545 | return out; | ||
546 | } | ||
547 | |||
548 | void ipc_rcu_getref(void *ptr) | ||
549 | { | ||
550 | container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; | ||
551 | } | 493 | } |
552 | 494 | ||
553 | static void ipc_do_vfree(struct work_struct *work) | 495 | int ipc_rcu_getref(void *ptr) |
554 | { | 496 | { |
555 | vfree(container_of(work, struct ipc_rcu_sched, work)); | 497 | return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); |
556 | } | 498 | } |
557 | 499 | ||
558 | /** | 500 | /** |
559 | * ipc_schedule_free - free ipc + rcu space | 501 | * ipc_schedule_free - free ipc + rcu space |
560 | * @head: RCU callback structure for queued work | 502 | * @head: RCU callback structure for queued work |
561 | * | ||
562 | * Since RCU callback function is called in bh, | ||
563 | * we need to defer the vfree to schedule_work(). | ||
564 | */ | 503 | */ |
565 | static void ipc_schedule_free(struct rcu_head *head) | 504 | static void ipc_schedule_free(struct rcu_head *head) |
566 | { | 505 | { |
567 | struct ipc_rcu_grace *grace; | 506 | vfree(container_of(head, struct ipc_rcu, rcu)); |
568 | struct ipc_rcu_sched *sched; | ||
569 | |||
570 | grace = container_of(head, struct ipc_rcu_grace, rcu); | ||
571 | sched = container_of(&(grace->data[0]), struct ipc_rcu_sched, | ||
572 | data[0]); | ||
573 | |||
574 | INIT_WORK(&sched->work, ipc_do_vfree); | ||
575 | schedule_work(&sched->work); | ||
576 | } | 507 | } |
577 | 508 | ||
578 | void ipc_rcu_putref(void *ptr) | 509 | void ipc_rcu_putref(void *ptr) |
579 | { | 510 | { |
580 | if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) | 511 | struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); |
512 | |||
513 | if (!atomic_dec_and_test(&p->refcount)) | ||
581 | return; | 514 | return; |
582 | 515 | ||
583 | if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { | 516 | if (is_vmalloc_addr(ptr)) { |
584 | call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, | 517 | call_rcu(&p->rcu, ipc_schedule_free); |
585 | ipc_schedule_free); | ||
586 | } else { | 518 | } else { |
587 | kfree_rcu(container_of(ptr, struct ipc_rcu_grace, data), rcu); | 519 | kfree_rcu(p, rcu); |
588 | } | 520 | } |
589 | } | 521 | } |
590 | 522 | ||
@@ -668,38 +600,81 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) | |||
668 | } | 600 | } |
669 | 601 | ||
670 | /** | 602 | /** |
603 | * ipc_obtain_object | ||
604 | * @ids: ipc identifier set | ||
605 | * @id: ipc id to look for | ||
606 | * | ||
607 | * Look for an id in the ipc ids idr and return associated ipc object. | ||
608 | * | ||
609 | * Call inside the RCU critical section. | ||
610 | * The ipc object is *not* locked on exit. | ||
611 | */ | ||
612 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) | ||
613 | { | ||
614 | struct kern_ipc_perm *out; | ||
615 | int lid = ipcid_to_idx(id); | ||
616 | |||
617 | out = idr_find(&ids->ipcs_idr, lid); | ||
618 | if (!out) | ||
619 | return ERR_PTR(-EINVAL); | ||
620 | |||
621 | return out; | ||
622 | } | ||
623 | |||
624 | /** | ||
671 | * ipc_lock - Lock an ipc structure without rw_mutex held | 625 | * ipc_lock - Lock an ipc structure without rw_mutex held |
672 | * @ids: IPC identifier set | 626 | * @ids: IPC identifier set |
673 | * @id: ipc id to look for | 627 | * @id: ipc id to look for |
674 | * | 628 | * |
675 | * Look for an id in the ipc ids idr and lock the associated ipc object. | 629 | * Look for an id in the ipc ids idr and lock the associated ipc object. |
676 | * | 630 | * |
677 | * The ipc object is locked on exit. | 631 | * The ipc object is locked on successful exit. |
678 | */ | 632 | */ |
679 | |||
680 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) | 633 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) |
681 | { | 634 | { |
682 | struct kern_ipc_perm *out; | 635 | struct kern_ipc_perm *out; |
683 | int lid = ipcid_to_idx(id); | ||
684 | 636 | ||
685 | rcu_read_lock(); | 637 | rcu_read_lock(); |
686 | out = idr_find(&ids->ipcs_idr, lid); | 638 | out = ipc_obtain_object(ids, id); |
687 | if (out == NULL) { | 639 | if (IS_ERR(out)) |
688 | rcu_read_unlock(); | 640 | goto err1; |
689 | return ERR_PTR(-EINVAL); | ||
690 | } | ||
691 | 641 | ||
692 | spin_lock(&out->lock); | 642 | spin_lock(&out->lock); |
693 | 643 | ||
694 | /* ipc_rmid() may have already freed the ID while ipc_lock | 644 | /* ipc_rmid() may have already freed the ID while ipc_lock |
695 | * was spinning: here verify that the structure is still valid | 645 | * was spinning: here verify that the structure is still valid |
696 | */ | 646 | */ |
697 | if (out->deleted) { | 647 | if (!out->deleted) |
698 | spin_unlock(&out->lock); | 648 | return out; |
699 | rcu_read_unlock(); | ||
700 | return ERR_PTR(-EINVAL); | ||
701 | } | ||
702 | 649 | ||
650 | spin_unlock(&out->lock); | ||
651 | out = ERR_PTR(-EINVAL); | ||
652 | err1: | ||
653 | rcu_read_unlock(); | ||
654 | return out; | ||
655 | } | ||
656 | |||
657 | /** | ||
658 | * ipc_obtain_object_check | ||
659 | * @ids: ipc identifier set | ||
660 | * @id: ipc id to look for | ||
661 | * | ||
662 | * Similar to ipc_obtain_object() but also checks | ||
663 | * the ipc object reference counter. | ||
664 | * | ||
665 | * Call inside the RCU critical section. | ||
666 | * The ipc object is *not* locked on exit. | ||
667 | */ | ||
668 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id) | ||
669 | { | ||
670 | struct kern_ipc_perm *out = ipc_obtain_object(ids, id); | ||
671 | |||
672 | if (IS_ERR(out)) | ||
673 | goto out; | ||
674 | |||
675 | if (ipc_checkid(out, id)) | ||
676 | return ERR_PTR(-EIDRM); | ||
677 | out: | ||
703 | return out; | 678 | return out; |
704 | } | 679 | } |
705 | 680 | ||
@@ -780,11 +755,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | |||
780 | struct ipc64_perm *perm, int extra_perm) | 755 | struct ipc64_perm *perm, int extra_perm) |
781 | { | 756 | { |
782 | struct kern_ipc_perm *ipcp; | 757 | struct kern_ipc_perm *ipcp; |
758 | |||
759 | ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); | ||
760 | if (IS_ERR(ipcp)) | ||
761 | goto out; | ||
762 | |||
763 | spin_lock(&ipcp->lock); | ||
764 | out: | ||
765 | return ipcp; | ||
766 | } | ||
767 | |||
768 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, | ||
769 | struct ipc_ids *ids, int id, int cmd, | ||
770 | struct ipc64_perm *perm, int extra_perm) | ||
771 | { | ||
783 | kuid_t euid; | 772 | kuid_t euid; |
784 | int err; | 773 | int err = -EPERM; |
774 | struct kern_ipc_perm *ipcp; | ||
785 | 775 | ||
786 | down_write(&ids->rw_mutex); | 776 | down_write(&ids->rw_mutex); |
787 | ipcp = ipc_lock_check(ids, id); | 777 | rcu_read_lock(); |
778 | |||
779 | ipcp = ipc_obtain_object_check(ids, id); | ||
788 | if (IS_ERR(ipcp)) { | 780 | if (IS_ERR(ipcp)) { |
789 | err = PTR_ERR(ipcp); | 781 | err = PTR_ERR(ipcp); |
790 | goto out_up; | 782 | goto out_up; |
@@ -793,17 +785,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | |||
793 | audit_ipc_obj(ipcp); | 785 | audit_ipc_obj(ipcp); |
794 | if (cmd == IPC_SET) | 786 | if (cmd == IPC_SET) |
795 | audit_ipc_set_perm(extra_perm, perm->uid, | 787 | audit_ipc_set_perm(extra_perm, perm->uid, |
796 | perm->gid, perm->mode); | 788 | perm->gid, perm->mode); |
797 | 789 | ||
798 | euid = current_euid(); | 790 | euid = current_euid(); |
799 | if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || | 791 | if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || |
800 | ns_capable(ns->user_ns, CAP_SYS_ADMIN)) | 792 | ns_capable(ns->user_ns, CAP_SYS_ADMIN)) |
801 | return ipcp; | 793 | return ipcp; |
802 | 794 | ||
803 | err = -EPERM; | ||
804 | ipc_unlock(ipcp); | ||
805 | out_up: | 795 | out_up: |
796 | /* | ||
797 | * Unsuccessful lookup, unlock and return | ||
798 | * the corresponding error. | ||
799 | */ | ||
800 | rcu_read_unlock(); | ||
806 | up_write(&ids->rw_mutex); | 801 | up_write(&ids->rw_mutex); |
802 | |||
807 | return ERR_PTR(err); | 803 | return ERR_PTR(err); |
808 | } | 804 | } |
809 | 805 | ||
@@ -964,7 +960,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file) | |||
964 | seq = file->private_data; | 960 | seq = file->private_data; |
965 | seq->private = iter; | 961 | seq->private = iter; |
966 | 962 | ||
967 | iter->iface = PDE(inode)->data; | 963 | iter->iface = PDE_DATA(inode); |
968 | iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); | 964 | iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); |
969 | out: | 965 | out: |
970 | return ret; | 966 | return ret; |