/*
 *  fs/signalfd.c
 *
 *  Copyright (C) 2003  Linus Torvalds
 *
 *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org>
 *      Changed ->read() to return a siginfo strcture instead of signal number.
 *      Fixed locking in ->poll().
 *      Added sighand-detach notification.
 *      Added fd re-use in sys_signalfd() syscall.
 *      Now using anonymous inode source.
 *      Thanks to Oleg Nesterov for useful code review and suggestions.
 *      More comments and suggestions from Arnd Bergmann.
 * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
 *      Retrieve multiple signals with one read() call
 */

#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/list.h>
#include <linux/anon_inodes.h>
#include <linux/signalfd.h>

struct signalfd_ctx {
	struct list_head lnk;
	wait_queue_head_t wqh;
	sigset_t sigmask;
	struct task_struct *tsk;
};

struct signalfd_lockctx {
	struct task_struct *tsk;
	unsigned long flags;
};

/*
 * Tries to acquire the sighand lock. We do not increment the sighand
 * use count, and we do not even pin the task struct, so we need to
 * do it inside an RCU read lock, and we must be prepared for the
 * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
 * being detached. We return 0 if the sighand has been detached, or
 * 1 if we were able to pin the sighand lock.
 */
static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
{
	struct sighand_struct *sighand = NULL;

	rcu_read_lock();
	lk->tsk = rcu_dereference(ctx->tsk);
	if (likely(lk->tsk != NULL))
		sighand = lock_task_sighand(lk->tsk, &lk->flags);
	rcu_read_unlock();

	if (sighand && !ctx->tsk) {
		unlock_task_sighand(lk->tsk, &lk->flags);
		sighand = NULL;
	}

	return sighand != NULL;
}

static void signalfd_unlock(struct signalfd_lockctx *lk)
{
	unlock_task_sighand(lk->tsk, &lk->flags);
}

/*
 * This must be called with the sighand lock held.
 */
void signalfd_deliver(struct task_struct *tsk, int sig)
{
	struct sighand_struct *sighand = tsk->sighand;
	struct signalfd_ctx *ctx, *tmp;

	BUG_ON(!sig);
	list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
		/*
		 * We use a negative signal value as a way to broadcast that the
		 * sighand has been orphaned, so that we can notify all the
		 * listeners about this. Remember the ctx->sigmask is inverted,
		 * so if the user is interested in a signal, that corresponding
		 * bit will be zero.
		 */
		if (sig < 0) {
			if (ctx->tsk == tsk) {
				ctx->tsk = NULL;
				list_del_init(&ctx->lnk);
				wake_up(&ctx->wqh);
			}
		} else {
			if (!sigismember(&ctx->sigmask, sig))
				wake_up(&ctx->wqh);
		}
	}
}

static void signalfd_cleanup(struct signalfd_ctx *ctx)
{
	struct signalfd_lockctx lk;

	/*
	 * This is tricky. If the sighand is gone, we do not need to remove
	 * context from the list, the list itself won't be there anymore.
	 */
	if (signalfd_lock(ctx, &lk)) {
		list_del(&ctx->lnk);
		signalfd_unlock(&lk);
	}
	kfree(ctx);
}

static int signalfd_release(struct inode *inode, struct file *file)
{
	signalfd_cleanup(file->private_data);
	return 0;
}

static unsigned int signalfd_poll(struct file *file, poll_table *wait)
{
	struct signalfd_ctx *ctx = file->private_data;
	unsigned int events = 0;
	struct signalfd_lockctx lk;

	poll_wait(file, &ctx->wqh, wait);

	/*
	 * Let the caller get a POLLIN in this case, ala socket recv() when
	 * the peer disconnects.
	 */
	if (signalfd_lock(ctx, &lk)) {
		if ((lk.tsk == current &&
		     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
		    next_signal(&lk.tsk->signal->shared_pending,
				&ctx->sigmask) > 0)
			events |= POLLIN;
		signalfd_unlock(&lk);
	} else
		events |= POLLIN;

	return events;
}

/*
 * Copied from copy_siginfo_to_user() in kernel/signal.c
 */
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
			     siginfo_t const *kinfo)
{
	long err;

	BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);

	/*
	 * Unused memebers should be zero ...
	 */
	err = __clear_user(uinfo, sizeof(*uinfo));

	/*
	 * If you change siginfo_t structure, please be sure
	 * this code is fixed accordingly.
	 */
	err |= __put_user(kinfo->si_signo, &uinfo->signo);
	err |= __put_user(kinfo->si_errno, &uinfo->err);
	err |= __put_user((short)kinfo->si_code, &uinfo->code);
	switch (kinfo->si_code & __SI_MASK) {
	case __SI_KILL:
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		break;
	case __SI_TIMER:
		 err |= __put_user(kinfo->si_tid, &uinfo->tid);
		 err |= __put_user(kinfo->si_overrun, &uinfo->overrun);
		 err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr);
		break;
	case __SI_POLL:
		err |= __put_user(kinfo->si_band, &uinfo->band);
		err |= __put_user(kinfo->si_fd, &uinfo->fd);
		break;
	case __SI_FAULT:
		err |= __put_user((long)kinfo->si_addr, &uinfo->addr);
#ifdef __ARCH_SI_TRAPNO
		err |= __put_user(kinfo->si_trapno, &uinfo->trapno);
#endif
		break;
	case __SI_CHLD:
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		err |= __put_user(kinfo->si_status, &uinfo->status);
		err |= __put_user(kinfo->si_utime, &uinfo->utime);
		err |= __put_user(kinfo->si_stime, &uinfo->stime);
		break;
	case __SI_RT: /* This is not generated by the kernel as of now. */
	case __SI_MESGQ: /* But this is */
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr);
		break;
	default: /* this is just in case for now ... */
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		break;
	}

	return err ? -EFAULT: sizeof(*uinfo);
}

static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
				int nonblock)
{
	ssize_t ret;
	struct signalfd_lockctx lk;
	DECLARE_WAITQUEUE(wait, current);

	if (!signalfd_lock(ctx, &lk))
		return 0;

	ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
	switch (ret) {
	case 0:
		if (!nonblock)
			break;
		ret = -EAGAIN;
	default:
		signalfd_unlock(&lk);
		return ret;
	}

	add_wait_queue(&ctx->wqh, &wait);
	for (;;) {
		set_current_state(TASK_INTERRUPTIBLE);
		ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
		signalfd_unlock(&lk);
		if (ret != 0)
			break;
		if (signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}
		schedule();
		ret = signalfd_lock(ctx, &lk);
		if (unlikely(!ret)) {
			/*
			 * Let the caller read zero byte, ala socket
			 * recv() when the peer disconnect. This test
			 * must be done before doing a dequeue_signal(),
			 * because if the sighand has been orphaned,
			 * the dequeue_signal() call is going to crash
			 * because ->sighand will be long gone.
			 */
			 break;
		}
	}

	remove_wait_queue(&ctx->wqh, &wait);
	__set_current_state(TASK_RUNNING);

	return ret;
}

/*
 * Returns either the size of a "struct signalfd_siginfo", or zero if the
 * sighand we are attached to, has been orphaned. The "count" parameter
 * must be at least the size of a "struct signalfd_siginfo".
 */
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
			     loff_t *ppos)
{
	struct signalfd_ctx *ctx = file->private_data;
	struct signalfd_siginfo __user *siginfo;
	int nonblock = file->f_flags & O_NONBLOCK;
	ssize_t ret, total = 0;
	siginfo_t info;

	count /= sizeof(struct signalfd_siginfo);
	if (!count)
		return -EINVAL;

	siginfo = (struct signalfd_siginfo __user *) buf;

	do {
		ret = signalfd_dequeue(ctx, &info, nonblock);
		if (unlikely(ret <= 0))
			break;
		ret = signalfd_copyinfo(siginfo, &info);
		if (ret < 0)
			break;
		siginfo++;
		total += ret;
		nonblock = 1;
	} while (--count);

	return total ? total : ret;
}

static const struct file_operations signalfd_fops = {
	.release	= signalfd_release,
	.poll		= signalfd_poll,
	.read		= signalfd_read,
};

/*
 * Create a file descriptor that is associated with our signal
 * state. We can pass it around to others if we want to, but
 * it will always be _our_ signal state.
 */
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
{
	int error;
	sigset_t sigmask;
	struct signalfd_ctx *ctx;
	struct sighand_struct *sighand;
	struct file *file;
	struct inode *inode;
	struct signalfd_lockctx lk;

	if (sizemask != sizeof(sigset_t) ||
	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
		return error = -EINVAL;
	sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
	signotset(&sigmask);

	if (ufd == -1) {
		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
		if (!ctx)
			return -ENOMEM;

		init_waitqueue_head(&ctx->wqh);
		ctx->sigmask = sigmask;
		ctx->tsk = current;

		sighand = current->sighand;
		/*
		 * Add this fd to the list of signal listeners.
		 */
		spin_lock_irq(&sighand->siglock);
		list_add_tail(&ctx->lnk, &sighand->signalfd_list);
		spin_unlock_irq(&sighand->siglock);

		/*
		 * When we call this, the initialization must be complete, since
		 * anon_inode_getfd() will install the fd.
		 */
		error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]",
					 &signalfd_fops, ctx);
		if (error)
			goto err_fdalloc;
	} else {
		file = fget(ufd);
		if (!file)
			return -EBADF;
		ctx = file->private_data;
		if (file->f_op != &signalfd_fops) {
			fput(file);
			return -EINVAL;
		}
		/*
		 * We need to be prepared of the fact that the sighand this fd
		 * is attached to, has been detched. In that case signalfd_lock()
		 * will return 0, and we'll just skip setting the new mask.
		 */
		if (signalfd_lock(ctx, &lk)) {
			ctx->sigmask = sigmask;
			signalfd_unlock(&lk);
		}
		wake_up(&ctx->wqh);
		fput(file);
	}

	return ufd;

err_fdalloc:
	signalfd_cleanup(ctx);
	return error;
}