/* audit.c -- Auditing support
 * Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
 * System-call specific features have moved to auditsc.c
 *
 * Copyright 2003-2007 Red Hat Inc., Durham, North Carolina.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Written by Rickard E. (Rik) Faith <faith@redhat.com>
 *
 * Goals: 1) Integrate fully with Security Modules.
 *	  2) Minimal run-time overhead:
 *	     a) Minimal when syscall auditing is disabled (audit_enable=0).
 *	     b) Small when syscall auditing is enabled and no audit record
 *		is generated (defer as much work as possible to record
 *		generation time):
 *		i) context is allocated,
 *		ii) names from getname are stored without a copy, and
 *		iii) inode information stored from path_lookup.
 *	  3) Ability to disable syscall auditing at boot time (audit=0).
 *	  4) Usable by other parts of the kernel (if audit_log* is called,
 *	     then a syscall record will be generated automatically for the
 *	     current syscall).
 *	  5) Netlink interface to user-space.
 *	  6) Support low-overhead kernel-based filtering to minimize the
 *	     information that must be passed to user-space.
 *
 * Example user-space utilities: http://people.redhat.com/sgrubb/audit/
 */

#include <linux/init.h>
#include <asm/types.h>
#include <asm/atomic.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/kthread.h>

#include <linux/audit.h>

#include <net/sock.h>
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/inotify.h>
#include <linux/freezer.h>
#include <linux/tty.h>

#include "audit.h"

/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED.
 * (Initialization happens after skb_init is called.) */
#define AUDIT_DISABLED		-1
#define AUDIT_UNINITIALIZED	0
#define AUDIT_INITIALIZED	1
static int	audit_initialized;

#define AUDIT_OFF	0
#define AUDIT_ON	1
#define AUDIT_LOCKED	2
int		audit_enabled;
int		audit_ever_enabled;

/* Default state when kernel boots without any parameters. */
static int	audit_default;

/* If auditing cannot proceed, audit_failure selects what happens. */
static int	audit_failure = AUDIT_FAIL_PRINTK;

/*
 * If audit records are to be written to the netlink socket, audit_pid
 * contains the pid of the auditd process and audit_nlk_pid contains
 * the pid to use to send netlink messages to that process.
 */
int		audit_pid;
static int	audit_nlk_pid;

/* If audit_rate_limit is non-zero, limit the rate of sending audit records
 * to that number per second.  This prevents DoS attacks, but results in
 * audit records being dropped. */
static int	audit_rate_limit;

/* Number of outstanding audit_buffers allowed. */
static int	audit_backlog_limit = 64;
static int	audit_backlog_wait_time = 60 * HZ;
static int	audit_backlog_wait_overflow = 0;

/* The identity of the user shutting down the audit system. */
uid_t		audit_sig_uid = -1;
pid_t		audit_sig_pid = -1;
u32		audit_sig_sid = 0;

/* Records can be lost in several ways:
   0) [suppressed in audit_alloc]
   1) out of memory in audit_log_start [kmalloc of struct audit_buffer]
   2) out of memory in audit_log_move [alloc_skb]
   3) suppressed due to audit_rate_limit
   4) suppressed due to audit_backlog_limit
*/
static atomic_t    audit_lost = ATOMIC_INIT(0);

/* The netlink socket. */
static struct sock *audit_sock;

/* Inotify handle. */
struct inotify_handle *audit_ih;

/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];

/* The audit_freelist is a list of pre-allocated audit buffers (if more
 * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
 * being placed on the freelist). */
static DEFINE_SPINLOCK(audit_freelist_lock);
static int	   audit_freelist_count;
static LIST_HEAD(audit_freelist);

static struct sk_buff_head audit_skb_queue;
/* queue of skbs to send to auditd when/if it comes back */
static struct sk_buff_head audit_skb_hold_queue;
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);

/* Serialize requests from userspace. */
static DEFINE_MUTEX(audit_cmd_mutex);

/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
 * audit records.  Since printk uses a 1024 byte buffer, this buffer
 * should be at least that large. */
#define AUDIT_BUFSIZ 1024

/* AUDIT_MAXFREE is the number of empty audit_buffers we keep on the
 * audit_freelist.  Doing so eliminates many kmalloc/kfree calls. */
#define AUDIT_MAXFREE  (2*NR_CPUS)

/* The audit_buffer is used when formatting an audit record.  The caller
 * locks briefly to get the record off the freelist or to allocate the
 * buffer, and locks briefly to send the buffer to the netlink layer or
 * to place it on a transmit queue.  Multiple audit_buffers can be in
 * use simultaneously. */
struct audit_buffer {
	struct list_head     list;
	struct sk_buff       *skb;	/* formatted skb ready to send */
	struct audit_context *ctx;	/* NULL or associated context */
	gfp_t		     gfp_mask;
};

struct audit_reply {
	int pid;
	struct sk_buff *skb;
};

static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
{
	if (ab) {
		struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
		nlh->nlmsg_pid = pid;
	}
}

void audit_panic(const char *message)
{
	switch (audit_failure)
	{
	case AUDIT_FAIL_SILENT:
		break;
	case AUDIT_FAIL_PRINTK:
		if (printk_ratelimit())
			printk(KERN_ERR "audit: %s\n", message);
		break;
	case AUDIT_FAIL_PANIC:
		/* test audit_pid since printk is always losey, why bother? */
		if (audit_pid)
			panic("audit: %s\n", message);
		break;
	}
}

static inline int audit_rate_check(void)
{
	static unsigned long	last_check = 0;
	static int		messages   = 0;
	static DEFINE_SPINLOCK(lock);
	unsigned long		flags;
	unsigned long		now;
	unsigned long		elapsed;
	int			retval	   = 0;

	if (!audit_rate_limit) return 1;

	spin_lock_irqsave(&lock, flags);
	if (++messages < audit_rate_limit) {
		retval = 1;
	} else {
		now     = jiffies;
		elapsed = now - last_check;
		if (elapsed > HZ) {
			last_check = now;
			messages   = 0;
			retval     = 1;
		}
	}
	spin_unlock_irqrestore(&lock, flags);

	return retval;
}

/**
 * audit_log_lost - conditionally log lost audit message event
 * @message: the message stating reason for lost audit message
 *
 * Emit at least 1 message per second, even if audit_rate_check is
 * throttling.
 * Always increment the lost messages counter.
*/
void audit_log_lost(const char *message)
{
	static unsigned long	last_msg = 0;
	static DEFINE_SPINLOCK(lock);
	unsigned long		flags;
	unsigned long		now;
	int			print;

	atomic_inc(&audit_lost);

	print = (audit_failure == AUDIT_FAIL_PANIC || !audit_rate_limit);

	if (!print) {
		spin_lock_irqsave(&lock, flags);
		now = jiffies;
		if (now - last_msg > HZ) {
			print = 1;
			last_msg = now;
		}
		spin_unlock_irqrestore(&lock, flags);
	}

	if (print) {
		if (printk_ratelimit())
			printk(KERN_WARNING
				"audit: audit_lost=%d audit_rate_limit=%d "
				"audit_backlog_limit=%d\n",
				atomic_read(&audit_lost),
				audit_rate_limit,
				audit_backlog_limit);
		audit_panic(message);
	}
}

static int audit_log_config_change(char *function_name, int new, int old,
				   uid_t loginuid, u32 sessionid, u32 sid,
				   int allow_changes)
{
	struct audit_buffer *ab;
	int rc = 0;

	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
	audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,
			 old, loginuid, sessionid);
	if (sid) {
		char *ctx = NULL;
		u32 len;

		rc = security_secid_to_secctx(sid, &ctx, &len);
		if (rc) {
			audit_log_format(ab, " sid=%u", sid);
			allow_changes = 0; /* Something weird, deny request */
		} else {
			audit_log_format(ab, " subj=%s", ctx);
			security_release_secctx(ctx, len);
		}
	}
	audit_log_format(ab, " res=%d", allow_changes);
	audit_log_end(ab);
	return rc;
}

static int audit_do_config_change(char *function_name, int *to_change,
				  int new, uid_t loginuid, u32 sessionid,
				  u32 sid)
{
	int allow_changes, rc = 0, old = *to_change;

	/* check if we are locked */
	if (audit_enabled == AUDIT_LOCKED)
		allow_changes = 0;
	else
		allow_changes = 1;

	if (audit_enabled != AUDIT_OFF) {
		rc = audit_log_config_change(function_name, new, old, loginuid,
					     sessionid, sid, allow_changes);
		if (rc)
			allow_changes = 0;
	}

	/* If we are allowed, make the change */
	if (allow_changes == 1)
		*to_change = new;
	/* Not allowed, update reason */
	else if (rc == 0)
		rc = -EPERM;
	return rc;
}

static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid,
				u32 sid)
{
	return audit_do_config_change("audit_rate_limit", &audit_rate_limit,
				      limit, loginuid, sessionid, sid);
}

static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid,
				   u32 sid)
{
	return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit,
				      limit, loginuid, sessionid, sid);
}

static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid)
{
	int rc;
	if (state < AUDIT_OFF || state > AUDIT_LOCKED)
		return -EINVAL;

	rc =  audit_do_config_change("audit_enabled", &audit_enabled, state,
				     loginuid, sessionid, sid);

	if (!rc)
		audit_ever_enabled |= !!state;

	return rc;
}

static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid)
{
	if (state != AUDIT_FAIL_SILENT
	    && state != AUDIT_FAIL_PRINTK
	    && state != AUDIT_FAIL_PANIC)
		return -EINVAL;

	return audit_do_config_change("audit_failure", &audit_failure, state,
				      loginuid, sessionid, sid);
}

/*
 * Queue skbs to be sent to auditd when/if it comes back.  These skbs should
 * already have been sent via prink/syslog and so if these messages are dropped
 * it is not a huge concern since we already passed the audit_log_lost()
 * notification and stuff.  This is just nice to get audit messages during
 * boot before auditd is running or messages generated while auditd is stopped.
 * This only holds messages is audit_default is set, aka booting with audit=1
 * or building your kernel that way.
 */
static void audit_hold_skb(struct sk_buff *skb)
{
	if (audit_default &&
	    skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit)
		skb_queue_tail(&audit_skb_hold_queue, skb);
	else
		kfree_skb(skb);
}

static void kauditd_send_skb(struct sk_buff *skb)
{
	int err;
	/* take a reference in case we can't send it and we want to hold it */
	skb_get(skb);
	err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0);
	if (err < 0) {
		BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */
		printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
		audit_log_lost("auditd dissapeared\n");
		audit_pid = 0;
		/* we might get lucky and get this in the next auditd */
		audit_hold_skb(skb);
	} else
		/* drop the extra reference if sent ok */
		kfree_skb(skb);
}

static int kauditd_thread(void *dummy)
{
	struct sk_buff *skb;

	set_freezable();
	while (!kthread_should_stop()) {
		/*
		 * if auditd just started drain the queue of messages already
		 * sent to syslog/printk.  remember loss here is ok.  we already
		 * called audit_log_lost() if it didn't go out normally.  so the
		 * race between the skb_dequeue and the next check for audit_pid
		 * doesn't matter.
		 *
		 * if you ever find kauditd to be too slow we can get a perf win
		 * by doing our own locking and keeping better track if there
		 * are messages in this queue.  I don't see the need now, but
		 * in 5 years when I want to play with this again I'll see this
		 * note and still have no friggin idea what i'm thinking today.
		 */
		if (audit_default && audit_pid) {
			skb = skb_dequeue(&audit_skb_hold_queue);
			if (unlikely(skb)) {
				while (skb && audit_pid) {
					kauditd_send_skb(skb);
					skb = skb_dequeue(&audit_skb_hold_queue);
				}
			}
		}

		skb = skb_dequeue(&audit_skb_queue);
		wake_up(&audit_backlog_wait);
		if (skb) {
			if (audit_pid)
				kauditd_send_skb(skb);
			else {
				if (printk_ratelimit())
					printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0));
				else
					audit_log_lost("printk limit exceeded\n");

				audit_hold_skb(skb);
			}
		} else {
			DECLARE_WAITQUEUE(wait, current);
			set_current_state(TASK_INTERRUPTIBLE);
			add_wait_queue(&kauditd_wait, &wait);

			if (!skb_queue_len(&audit_skb_queue)) {
				try_to_freeze();
				schedule();
			}

			__set_current_state(TASK_RUNNING);
			remove_wait_queue(&kauditd_wait, &wait);
		}
	}
	return 0;
}

static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid)
{
	struct task_struct *tsk;
	int err;

	read_lock(&tasklist_lock);
	tsk = find_task_by_vpid(pid);
	err = -ESRCH;
	if (!tsk)
		goto out;
	err = 0;

	spin_lock_irq(&tsk->sighand->siglock);
	if (!tsk->signal->audit_tty)
		err = -EPERM;
	spin_unlock_irq(&tsk->sighand->siglock);
	if (err)
		goto out;

	tty_audit_push_task(tsk, loginuid, sessionid);
out:
	read_unlock(&tasklist_lock);
	return err;
}

int audit_send_list(void *_dest)
{
	struct audit_netlink_list *dest = _dest;
	int pid = dest->pid;
	struct sk_buff *skb;

	/* wait for parent to finish and send an ACK */
	mutex_lock(&audit_cmd_mutex);
	mutex_unlock(&audit_cmd_mutex);

	while ((skb = __skb_dequeue(&dest->q)) != NULL)
		netlink_unicast(audit_sock, skb, pid, 0);

	kfree(dest);

	return 0;
}

#ifdef CONFIG_AUDIT_TREE
static int prune_tree_thread(void *unused)
{
	mutex_lock(&audit_cmd_mutex);
	audit_prune_trees();
	mutex_unlock(&audit_cmd_mutex);
	return 0;
}

void audit_schedule_prune(void)
{
	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
}
#endif

struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
				 int multi, void *payload, int size)
{
	struct sk_buff	*skb;
	struct nlmsghdr	*nlh;
	int		len = NLMSG_SPACE(size);
	void		*data;
	int		flags = multi ? NLM_F_MULTI : 0;
	int		t     = done  ? NLMSG_DONE  : type;

	skb = alloc_skb(len, GFP_KERNEL);
	if (!skb)
		return NULL;

	nlh		 = NLMSG_PUT(skb, pid, seq, t, size);
	nlh->nlmsg_flags = flags;
	data		 = NLMSG_DATA(nlh);
	memcpy(data, payload, size);
	return skb;

nlmsg_failure:			/* Used by NLMSG_PUT */
	if (skb)
		kfree_skb(skb);
	return NULL;
}

static int audit_send_reply_thread(void *arg)
{
	struct audit_reply *reply = (struct audit_reply *)arg;

	mutex_lock(&audit_cmd_mutex);
	mutex_unlock(&audit_cmd_mutex);

	/* Ignore failure. It'll only happen if the sender goes away,
	   because our timeout is set to infinite. */
	netlink_unicast(audit_sock, reply->skb, reply->pid, 0);
	kfree(reply);
	return 0;
}
/**
 * audit_send_reply - send an audit reply message via netlink
 * @pid: process id to send reply to
 * @seq: sequence number
 * @type: audit message type
 * @done: done (last) flag
 * @multi: multi-part message flag
 * @payload: payload data
 * @size: payload size
 *
 * Allocates an skb, builds the netlink message, and sends it to the pid.
 * No failure notifications.
 */
void audit_send_reply(int pid, int seq, int type, int done, int multi,
		      void *payload, int size)
{
	struct sk_buff *skb;
	struct task_struct *tsk;
	struct audit_reply *reply = kmalloc(sizeof(struct audit_reply),
					    GFP_KERNEL);

	if (!reply)
		return;

	skb = audit_make_reply(pid, seq, type, done, multi, payload, size);
	if (!skb)
		goto out;

	reply->pid = pid;
	reply->skb = skb;

	tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply");
	if (!IS_ERR(tsk))
		return;
	kfree_skb(skb);
out:
	kfree(reply);
}

/*
 * Check for appropriate CAP_AUDIT_ capabilities on incoming audit
 * control messages.
 */
static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
{
	int err = 0;

	switch (msg_type) {
	case AUDIT_GET:
	case AUDIT_LIST:
	case AUDIT_LIST_RULES:
	case AUDIT_SET:
	case AUDIT_ADD:
	case AUDIT_ADD_RULE:
	case AUDIT_DEL:
	case AUDIT_DEL_RULE:
	case AUDIT_SIGNAL_INFO:
	case AUDIT_TTY_GET:
	case AUDIT_TTY_SET:
	case AUDIT_TRIM:
	case AUDIT_MAKE_EQUIV:
		if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
			err = -EPERM;
		break;
	case AUDIT_USER:
	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
		if (security_netlink_recv(skb, CAP_AUDIT_WRITE))
			err = -EPERM;
		break;
	default:  /* bad msg */
		err = -EINVAL;
	}

	return err;
}

static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
				     u32 pid, u32 uid, uid_t auid, u32 ses,
				     u32 sid)
{
	int rc = 0;
	char *ctx = NULL;
	u32 len;

	if (!audit_enabled) {
		*ab = NULL;
		return rc;
	}

	*ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
	audit_log_format(*ab, "user pid=%d uid=%u auid=%u ses=%u",
			 pid, uid, auid, ses);
	if (sid) {
		rc = security_secid_to_secctx(sid, &ctx, &len);
		if (rc)
			audit_log_format(*ab, " ssid=%u", sid);
		else {
			audit_log_format(*ab, " subj=%s", ctx);
			security_release_secctx(ctx, len);
		}
	}

	ret<span class="hl slc">#include &lt;linux/init.h&gt;</span>
<span class="hl slc">#include &lt;linux/linkage.h&gt;</span>

<span class="hl slc">#include &lt;asm/assembler.h&gt;</span>
<span class="hl slc">#include &lt;asm/asm-offsets.h&gt;</span>
<span class="hl slc">#include &lt;asm/errno.h&gt;</span>
<span class="hl slc">#include &lt;asm/thread_info.h&gt;</span>
<span class="hl slc">#include &lt;asm/v7m.h&gt;</span>

&#64; Bad Abort numbers
&#64; <span class="hl opt">-----------------</span>
&#64;
<span class="hl slc">#define BAD_PREFETCH	0</span>
<span class="hl slc">#define BAD_DATA	1</span>
<span class="hl slc">#define BAD_ADDREXCPTN	2</span>
<span class="hl slc">#define BAD_IRQ		3</span>
<span class="hl slc">#define BAD_UNDEFINSTR	4</span>

&#64;
&#64; Most of the stack format comes from struct pt_regs<span class="hl opt">,</span> but with
&#64; the addition of <span class="hl num">8</span> bytes for storing syscall args <span class="hl num">5</span> <span class="hl kwa">and</span> <span class="hl num">6</span><span class="hl ppc">.</span>
&#64; This _must_ remain <span class="hl kwa">a</span> multiple of <span class="hl num">8</span> for EABI.
&#64;
<span class="hl slc">#define S_OFF		8</span>

<span class="hl com">/* </span>
<span class="hl com"> * The SWI code relies on the fact that R0 is at the bottom of the stack</span>
<span class="hl com"> * (due to slow/fast restore user regs).</span>
<span class="hl com"> */</span>
<span class="hl slc">#if S_R0 != 0</span>
<span class="hl slc">#error &quot;Please fix&quot;</span>
<span class="hl slc">#endif</span>

	<span class="hl ppc">.macro	zero_fp</span>
<span class="hl slc">#ifdef CONFIG_FRAME_POINTER</span>
	mov	fp<span class="hl opt">,</span> <span class="hl slc">#0</span>
<span class="hl slc">#endif</span>
	<span class="hl ppc">.endm</span>

<span class="hl slc">#ifdef CONFIG_ALIGNMENT_TRAP</span>
<span class="hl slc">#define ATRAP(x...) x</span>
<span class="hl slc">#else</span>
<span class="hl slc">#define ATRAP(x...)</span>
<span class="hl slc">#endif</span>

	<span class="hl ppc">.macro	alignment_trap, rtmp1, rtmp2, label</span>
<span class="hl slc">#ifdef CONFIG_ALIGNMENT_TRAP</span>
	mrc	p15<span class="hl opt">,</span> <span class="hl num">0</span><span class="hl opt">,</span> <span class="hl esc">\r</span>tmp<span class="hl num">2</span><span class="hl opt">,</span> c1<span class="hl opt">,</span> c0<span class="hl opt">,</span> <span class="hl num">0</span>
	ldr	<span class="hl esc">\r</span>tmp<span class="hl num">1</span><span class="hl opt">,</span> \label
	ldr	<span class="hl esc">\r</span>tmp<span class="hl num">1</span><span class="hl opt">, [</span><span class="hl esc">\r</span>tmp<span class="hl num">1</span><span class="hl opt">]</span>
	<span class="hl kwa">teq</span>	<span class="hl esc">\r</span>tmp<span class="hl num">1</span><span class="hl opt">,</span> <span class="hl esc">\r</span>tmp<span class="hl num">2</span>
	mcrne	p15<span class="hl opt">,</span> <span class="hl num">0</span><span class="hl opt">,</span> <span class="hl esc">\r</span>tmp<span class="hl num">1</span><span class="hl opt">,</span> c1<span class="hl opt">,</span> c0<span class="hl opt">,</span> <span class="hl num">0</span>
<span class="hl slc">#endif</span>
	<span class="hl ppc">.endm</span>

<span class="hl slc">#ifdef CONFIG_CPU_V7M</span>
<span class="hl com">/*</span>
<span class="hl com"> * ARMv7-M exception entry/exit macros.</span>
<span class="hl com"> *</span>
<span class="hl com"> * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are</span>
<span class="hl com"> * automatically saved on the current stack (32 words) before</span>
<span class="hl com"> * switching to the exception stack (SP_main).</span>
<span class="hl com"> *</span>
<span class="hl com"> * If exception is taken while in user mode, SP_main is</span>
<span class="hl com"> * empty. Otherwise, SP_main is aligned to 64 bit automatically</span>
<span class="hl com"> * (CCR.STKALIGN set).</span>
<span class="hl com"> *</span>
<span class="hl com"> * Linux assumes that the interrupts are disabled when entering an</span>
<span class="hl com"> * exception handler and it may BUG if this is not the case. Interrupts</span>
<span class="hl com"> * are disabled during entry and reenabled in the exit macro.</span>
<span class="hl com"> *</span>
<span class="hl com"> * v7m_exception_slow_exit is used when returning from SVC or PendSV.</span>
<span class="hl com"> * When returning to kernel mode, we don&apos;t return from exception.</span>
<span class="hl com"> */</span>
	<span class="hl ppc">.macro	v7m_exception_entry</span>
	&#64; determine the location of the registers saved by the core during
	&#64; exception entry. Depending on the mode the cpu was in when the
	&#64; exception happend that is either on the main <span class="hl kwa">or</span> the process stack.
	&#64; Bit <span class="hl num">2</span> of EXC_RETURN stored in the <span class="hl kwb">lr</span> register specifies which stack
	&#64; was used.
	tst	<span class="hl kwb">lr</span><span class="hl opt">,</span> <span class="hl slc">#EXC_RET_STACK_MASK</span>
	mrsne	<span class="hl kwb">r12</span><span class="hl opt">,</span> psp
	moveq	<span class="hl kwb">r12</span><span class="hl opt">,</span> sp

	&#64; we cannot rely on r0-<span class="hl kwb">r3</span> <span class="hl kwa">and</span> <span class="hl kwb">r12</span> matching the value saved in the
	&#64; exception frame because of tail-chaining. So these have to <span class="hl kwa">be</span>
	&#64; reloaded.
	ldmia	<span class="hl kwb">r12</span><span class="hl opt">!, {</span>r0-<span class="hl kwb">r3</span><span class="hl opt">}</span>

	&#64; Linux expects to have irqs off. Do it here before taking stack space
	cpsid	i

	<span class="hl kwa">sub</span>	sp<span class="hl opt">,</span> <span class="hl slc">#S_FRAME_SIZE-S_IP</span>
	stmdb	sp<span class="hl opt">!, {</span>r0-<span class="hl kwb">r11</span><span class="hl opt">}</span>

	&#64; load saved <span class="hl kwb">r12</span><span class="hl opt">,</span> <span class="hl kwb">lr</span><span class="hl opt">,</span> return address <span class="hl kwa">and</span> xPSR.
	&#64; r0-<span class="hl kwb">r7</span> are used for signals <span class="hl kwa">and</span> never touched from now on. Clobbering
	&#64; r8-<span class="hl kwb">r12</span> is OK.
	mov	<span class="hl kwb">r9</span><span class="hl opt">,</span> <span class="hl kwb">r12</span>
	ldmia	<span class="hl kwb">r9</span><span class="hl opt">!, {</span><span class="hl kwb">r8</span><span class="hl opt">,</span> r10-<span class="hl kwb">r12</span><span class="hl opt">}</span>

	&#64; calculate the original stack pointer value.
	&#64; <span class="hl kwb">r9</span> currently points to the memory location just above the auto saved
	&#64; xPSR.
	&#64; The cpu might automatically <span class="hl num">8</span><span class="hl opt">-</span>byte align the stack. Bit <span class="hl num">9</span>
	&#64; of the saved xPSR specifies if stack aligning took place. In this case
	&#64; another <span class="hl num">32</span><span class="hl opt">-</span>bit value is included in the stack.

	tst	<span class="hl kwb">r12</span><span class="hl opt">,</span> V7M_xPSR_FRAMEPTRALIGN
	addne	<span class="hl kwb">r9</span><span class="hl opt">,</span> <span class="hl kwb">r9</span><span class="hl opt">,</span> <span class="hl slc">#4</span>

	&#64; store saved <span class="hl kwb">r12</span> using str to have <span class="hl kwa">a</span> register to hold the base for <span class="hl kwa">stm</span>
	str	<span class="hl kwb">r8</span><span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_IP]</span>
	<span class="hl kwa">add</span>	<span class="hl kwb">r8</span><span class="hl opt">,</span> sp<span class="hl opt">,</span> <span class="hl slc">#S_SP</span>
	&#64; store r13-<span class="hl kwb">r15</span><span class="hl opt">,</span> xPSR
	stmia	<span class="hl kwb">r8</span><span class="hl opt">!, {</span>r9-<span class="hl kwb">r12</span><span class="hl opt">}</span>
	&#64; store old_r0
	str	<span class="hl kwb">r0</span><span class="hl opt">, [</span><span class="hl kwb">r8</span><span class="hl opt">]</span>
	<span class="hl ppc">.endm</span>

        <span class="hl com">/*</span>
<span class="hl com">	 * PENDSV and SVCALL are configured to have the same exception</span>
<span class="hl com">	 * priorities. As a kernel thread runs at SVCALL execution priority it</span>
<span class="hl com">	 * can never be preempted and so we will never have to return to a</span>
<span class="hl com">	 * kernel thread here.</span>
<span class="hl com">         */</span>
	<span class="hl ppc">.macro	v7m_exception_slow_exit ret_r0</span>
	cpsid	i
	ldr	<span class="hl kwb">lr</span><span class="hl opt">, =</span>EXC_RET_THREADMODE_PROCESSSTACK

	&#64; read original <span class="hl kwb">r12</span><span class="hl opt">,</span> sp<span class="hl opt">,</span> <span class="hl kwb">lr</span><span class="hl opt">,</span> pc <span class="hl kwa">and</span> xPSR
	<span class="hl kwa">add</span>	<span class="hl kwb">r12</span><span class="hl opt">,</span> sp<span class="hl opt">,</span> <span class="hl slc">#S_IP</span>
	ldmia	<span class="hl kwb">r12</span><span class="hl opt">, {</span>r1-<span class="hl kwb">r5</span><span class="hl opt">}</span>

	&#64; an exception frame is always <span class="hl num">8</span><span class="hl opt">-</span>byte aligned. To tell the hardware if
	&#64; the sp to <span class="hl kwa">be</span> restored is aligned <span class="hl kwa">or not</span> set bit <span class="hl num">9</span> of the saved xPSR
	&#64; accordingly.
	tst	<span class="hl kwb">r2</span><span class="hl opt">,</span> <span class="hl slc">#4</span>
	subne	<span class="hl kwb">r2</span><span class="hl opt">,</span> <span class="hl kwb">r2</span><span class="hl opt">,</span> <span class="hl slc">#4</span>
	orrne	<span class="hl kwb">r5</span><span class="hl opt">,</span> V7M_xPSR_FRAMEPTRALIGN
	biceq	<span class="hl kwb">r5</span><span class="hl opt">,</span> V7M_xPSR_FRAMEPTRALIGN

	&#64; ensure bit <span class="hl num">0</span> is cleared in the PC<span class="hl opt">,</span> otherwise behaviour is
	&#64; unpredictable
	bic	<span class="hl kwb">r4</span><span class="hl opt">,</span> <span class="hl slc">#1</span>

	&#64; write basic exception frame
	stmdb	<span class="hl kwb">r2</span><span class="hl opt">!, {</span><span class="hl kwb">r1</span><span class="hl opt">,</span> r3-<span class="hl kwb">r5</span><span class="hl opt">}</span>
	ldmia	sp<span class="hl opt">, {</span><span class="hl kwb">r1</span><span class="hl opt">,</span> r3-<span class="hl kwb">r5</span><span class="hl opt">}</span>
	<span class="hl ppc">.if	\ret_r0</span>
	stmdb	<span class="hl kwb">r2</span><span class="hl opt">!, {</span><span class="hl kwb">r0</span><span class="hl opt">,</span> r3-<span class="hl kwb">r5</span><span class="hl opt">}</span>
	<span class="hl ppc">.else</span>
	stmdb	<span class="hl kwb">r2</span><span class="hl opt">!, {</span><span class="hl kwb">r1</span><span class="hl opt">,</span> r3-<span class="hl kwb">r5</span><span class="hl opt">}</span>
	<span class="hl ppc">.endif</span>

	&#64; restore process sp
	<span class="hl kwb">msr</span>	psp<span class="hl opt">,</span> <span class="hl kwb">r2</span>

	&#64; restore original r4-<span class="hl kwb">r11</span>
	ldmia	sp<span class="hl opt">!, {</span>r0-<span class="hl kwb">r11</span><span class="hl opt">}</span>

	&#64; restore main sp
	<span class="hl kwa">add</span>	sp<span class="hl opt">,</span> sp<span class="hl opt">,</span> <span class="hl slc">#S_FRAME_SIZE-S_IP</span>

	cpsie	i
	bx	<span class="hl kwb">lr</span>
	<span class="hl ppc">.endm</span>
<span class="hl slc">#endif	/* CONFIG_CPU_V7M */</span>

	&#64;
	&#64; Store<span class="hl opt">/</span>load the USER SP <span class="hl kwa">and</span> <span class="hl kwb">LR</span> registers by switching to the SYS
	&#64; mode. Useful in Thumb-<span class="hl num">2</span> mode where <span class="hl str">&quot;stm/ldm rd, {sp, lr}^&quot;</span> is <span class="hl kwa">not</span>
	&#64; available. Should only <span class="hl kwa">be</span> called from <span class="hl kwa">SVC</span> mode
	&#64;
	<span class="hl ppc">.macro	store_user_sp_lr, rd, rtemp, offset = 0</span>
	mrs	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> cpsr
	eor	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl slc">#(SVC_MODE ^ SYSTEM_MODE)</span>
	<span class="hl kwb">msr</span>	cpsr_c<span class="hl opt">,</span> <span class="hl esc">\r</span>temp			&#64; switch to the SYS mode

	str	sp<span class="hl opt">, [</span><span class="hl esc">\r</span>d<span class="hl opt">,</span> <span class="hl slc">#\offset]		&#64; save sp_usr</span>
	str	<span class="hl kwb">lr</span><span class="hl opt">, [</span><span class="hl esc">\r</span>d<span class="hl opt">,</span> <span class="hl slc">#\offset + 4]		&#64; save lr_usr</span>

	eor	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl slc">#(SVC_MODE ^ SYSTEM_MODE)</span>
	<span class="hl kwb">msr</span>	cpsr_c<span class="hl opt">,</span> <span class="hl esc">\r</span>temp			&#64; switch back to the <span class="hl kwa">SVC</span> mode
	<span class="hl ppc">.endm</span>

	<span class="hl ppc">.macro	load_user_sp_lr, rd, rtemp, offset = 0</span>
	mrs	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> cpsr
	eor	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl slc">#(SVC_MODE ^ SYSTEM_MODE)</span>
	<span class="hl kwb">msr</span>	cpsr_c<span class="hl opt">,</span> <span class="hl esc">\r</span>temp			&#64; switch to the SYS mode

	ldr	sp<span class="hl opt">, [</span><span class="hl esc">\r</span>d<span class="hl opt">,</span> <span class="hl slc">#\offset]		&#64; load sp_usr</span>
	ldr	<span class="hl kwb">lr</span><span class="hl opt">, [</span><span class="hl esc">\r</span>d<span class="hl opt">,</span> <span class="hl slc">#\offset + 4]		&#64; load lr_usr</span>

	eor	<span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl esc">\r</span>temp<span class="hl opt">,</span> <span class="hl slc">#(SVC_MODE ^ SYSTEM_MODE)</span>
	<span class="hl kwb">msr</span>	cpsr_c<span class="hl opt">,</span> <span class="hl esc">\r</span>temp			&#64; switch back to the <span class="hl kwa">SVC</span> mode
	<span class="hl ppc">.endm</span>

<span class="hl slc">#ifndef CONFIG_THUMB2_KERNEL</span>
	<span class="hl ppc">.macro	svc_exit, rpsr, irq = 0</span>
	<span class="hl ppc">.if	\irq != 0</span>
	&#64; IRQs already off
<span class="hl slc">#ifdef CONFIG_TRACE_IRQFLAGS</span>
	&#64; The parent context IRQs must have been enabled to get here in
	&#64; the first place<span class="hl opt">,</span> so there<span class="hl str">&apos;s no point checking the PSR I bit.</span>
<span class="hl str">	bl	trace_hardirqs_on</span>
<span class="hl str">#endif</span>
<span class="hl str">	.else</span>
<span class="hl str">	&#64; IRQs off again before pulling preserved data off the stack</span>
<span class="hl str">	disable_irq_notrace</span>
<span class="hl str">#ifdef CONFIG_TRACE_IRQFLAGS</span>
<span class="hl str">	tst</span>	<span class="hl esc">\r</span><span class="hl str">psr, #PSR_I_BIT</span>
<span class="hl str">	bleq	trace_hardirqs_on</span>
<span class="hl str">	tst</span>	<span class="hl esc">\r</span><span class="hl str">psr, #PSR_I_BIT</span>
<span class="hl str">	blne	trace_hardirqs_off</span>
<span class="hl str">#endif</span>
<span class="hl str">	.endif</span>
<span class="hl str">	msr	spsr_cxsf,</span> <span class="hl esc">\r</span><span class="hl str">psr</span>
<span class="hl str">#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)</span>
<span class="hl str">	&#64; We must avoid clrex due to Cortex-A15 erratum #830321</span>
<span class="hl str">	sub	r0, sp, #4			&#64; uninhabited address</span>
<span class="hl str">	strex	r1, r2, [r0]			&#64; clear the exclusive monitor</span>
<span class="hl str">#endif</span>
<span class="hl str">	ldmia	sp, {r0 - pc}^			&#64; load r0 - pc, cpsr</span>
<span class="hl str">	.endm</span>
<span class="hl str"></span>
<span class="hl str">	&#64;</span>
<span class="hl str">	&#64; svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit</span>
<span class="hl str">	&#64;</span>
<span class="hl str">	&#64; This macro acts in a similar manner to svc_exit but switches to FIQ</span>
<span class="hl str">	&#64; mode to restore the final part of the register state.</span>
<span class="hl str">	&#64;</span>
<span class="hl str">	&#64; We cannot use the normal svc_exit procedure because that would</span>
<span class="hl str">	&#64; clobber spsr_svc (FIQ could be delivered during the first few</span>
<span class="hl str">	&#64; instructions of vector_swi meaning its contents have not been</span>
<span class="hl str">	&#64; saved anywhere).</span>
<span class="hl str">	&#64;</span>
<span class="hl str">	&#64; Note that, unlike svc_exit, this macro also does not allow a caller</span>
<span class="hl str">	&#64; supplied rpsr. This is because the FIQ exceptions are not re-entrant</span>
<span class="hl str">	&#64; and the handlers cannot call into the scheduler (meaning the value</span>
<span class="hl str">	&#64; on the stack remains correct).</span>
<span class="hl str">	&#64;</span>
<span class="hl str">	.macro  svc_exit_via_fiq</span>
<span class="hl str">	mov	r0, sp</span>
<span class="hl str">	ldmib	r0, {r1 - r14}	&#64; abort is deadly from here onward (it will</span>
<span class="hl str">				&#64; clobber state restored below)</span>
<span class="hl str">	msr	cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT</span>
<span class="hl str">	add	r8, r0, #S_PC</span>
<span class="hl str">	ldr	r9, [r0, #S_PSR]</span>
<span class="hl str">	msr	spsr_cxsf, r9</span>
<span class="hl str">	ldr	r0, [r0, #S_R0]</span>
<span class="hl str">	ldmia	r8, {pc}^</span>
<span class="hl str">	.endm</span>
<span class="hl str"></span>
<span class="hl str">	.macro	restore_user_regs, fast = 0, offset = 0</span>
<span class="hl str">	mov	r2, sp</span>
<span class="hl str">	ldr	r1, [r2, #\offset + S_PSR]	&#64; get calling cpsr</span>
<span class="hl str">	ldr	lr, [r2, #\offset + S_PC]!	&#64; get pc</span>
<span class="hl str">	msr	spsr_cxsf, r1			&#64; save in spsr_svc</span>
<span class="hl str">#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)</span>
<span class="hl str">	&#64; We must avoid clrex due to Cortex-A15 erratum #830321</span>
<span class="hl str">	strex	r1, r2, [r2]			&#64; clear the exclusive monitor</span>
<span class="hl str">#endif</span>
<span class="hl str">	.if</span>	<span class="hl esc">\f</span><span class="hl str">ast</span>
<span class="hl str">	ldmdb	r2, {r1 - lr}^			&#64; get calling r1 - lr</span>
<span class="hl str">	.else</span>
<span class="hl str">	ldmdb	r2, {r0 - lr}^			&#64; get calling r0 - lr</span>
<span class="hl str">	.endif</span>
<span class="hl str">	mov	r0, r0				&#64; ARMv5T and earlier require a nop</span>
<span class="hl str">						&#64; after ldm {}^</span>
<span class="hl str">	add	sp, sp, #\offset + S_FRAME_SIZE</span>
<span class="hl str">	movs	pc, lr				&#64; return &amp; move spsr_svc into cpsr</span>
<span class="hl str">	.endm</span>
<span class="hl str"></span>
<span class="hl str">#else	/* CONFIG_THUMB2_KERNEL */</span>
<span class="hl str">	.macro	svc_exit, rpsr, irq = 0</span>
<span class="hl str">	.if	\irq != 0</span>
<span class="hl str">	&#64; IRQs already off</span>
<span class="hl str">#ifdef CONFIG_TRACE_IRQFLAGS</span>
<span class="hl str">	&#64; The parent context IRQs must have been enabled to get here in</span>
<span class="hl str">	&#64; the first place, so there&apos;</span>s no point checking the PSR I bit.
	<span class="hl kwa">bl</span>	trace_hardirqs_on
<span class="hl slc">#endif</span>
	<span class="hl ppc">.else</span>
	&#64; IRQs off again before pulling preserved data off the stack
	disable_irq_notrace
<span class="hl slc">#ifdef CONFIG_TRACE_IRQFLAGS</span>
	tst	<span class="hl esc">\r</span>psr<span class="hl opt">,</span> <span class="hl slc">#PSR_I_BIT</span>
	bleq	trace_hardirqs_on
	tst	<span class="hl esc">\r</span>psr<span class="hl opt">,</span> <span class="hl slc">#PSR_I_BIT</span>
	blne	trace_hardirqs_off
<span class="hl slc">#endif</span>
	<span class="hl ppc">.endif</span>
	ldr	<span class="hl kwb">lr</span><span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_SP]			&#64; top of the stack</span>
	ldrd	<span class="hl kwb">r0</span><span class="hl opt">,</span> <span class="hl kwb">r1</span><span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_LR]		&#64; calling lr and pc</span>

	&#64; We must avoid clrex due to Cortex-A15 erratum <span class="hl slc">#830321</span>
	strex	<span class="hl kwb">r2</span><span class="hl opt">,</span> <span class="hl kwb">r1</span><span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_LR]		&#64; clear the exclusive monitor</span>

	stmdb	<span class="hl kwb">lr</span><span class="hl opt">!, {</span><span class="hl kwb">r0</span><span class="hl opt">,</span> <span class="hl kwb">r1</span><span class="hl opt">,</span> <span class="hl esc">\r</span>psr<span class="hl opt">}</span>		&#64; calling <span class="hl kwb">lr</span> <span class="hl kwa">and</span> rfe context
	ldmia	sp<span class="hl opt">, {</span><span class="hl kwb">r0</span> <span class="hl opt">-</span> <span class="hl kwb">r12</span><span class="hl opt">}</span>
	mov	sp<span class="hl opt">,</span> <span class="hl kwb">lr</span>
	ldr	<span class="hl kwb">lr</span><span class="hl opt">, [</span>sp<span class="hl opt">],</span> <span class="hl slc">#4</span>
	rfeia	sp<span class="hl opt">!</span>
	<span class="hl ppc">.endm</span>

	&#64;
	&#64; svc_exit_via_fiq <span class="hl opt">-</span> like svc_exit but switches to FIQ mode before exit
	&#64;
	&#64; For full details see non-Thumb implementation above.
	&#64;
	<span class="hl ppc">.macro  svc_exit_via_fiq</span>
	<span class="hl kwa">add</span>	<span class="hl kwb">r0</span><span class="hl opt">,</span> sp<span class="hl opt">,</span> <span class="hl slc">#S_R2</span>
	ldr	<span class="hl kwb">lr</span><span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_LR]</span>
	ldr	sp<span class="hl opt">, [</span>sp<span class="hl opt">,</span> <span class="hl slc">#S_SP] &#64; abort is deadly from here onward (it will</span>
			        &#64; clobber state restored below<span class="hl opt">)</span>
	ldmia	<span class="hl kwb">r0</span><span class="hl opt">, {</span><span class="hl kwb">r2</span> <span class="hl opt">-</span> <span class="hl kwb">r12</span><span class="hl opt">}</span>
	mov	<span class="hl kwb">r1</span><span clas