aboutsummaryrefslogblamecommitdiffstats
path: root/net/sunrpc/clnt.c
blob: aa8965e9d30770cec269d1dfe6cd0bccb490dfa1 (plain) (tree)
1
2
  
                           


























                                                                            
                          
                            

                              
                                     
                                 

















                                                            
                                                        

                                                     
                                                            











                                                           
                               

                  

                                           

                             




                                                







                                                                        
                                                 


                                                                                            
                                        

                                     










                                                                        
                                                     




                                                        
                                      







                                                             
                                 



                                                                           
                                                  



























                                                           
                                                    











                                                                            

                                            

                                                                                  
                                    










                                                                             




                                             




                                                    
                           
            


                            





























                                                                                   









                                                                           
                                                












                                                                           



                                      


                                                                              
                                                        
                                                          























                                                                     
                                                
                                                             

































                                                                              

                                             






                                                    

                                           
                                       
                                      

                                



















                                                                    





































                                                                          



                                       
                                                       


 



                                                    
  
                                                                 

                           
                                                                                                  
 

                                                        
                                                    


















                                                                            

                                                              
                                                



                                                                
                                    
















                                                                            
                         
                                                                 


                         


                                                              


                                                              


                                            
                                           

                                                 
         
                                     
                               
    







                                                                         
                                                             





                                                           
                      
                           
                                 


                                
                                              
                         
                                                              
                                 
 


                                                        








                                                              
                                                



                                          

















                                                                         
                                                





                                                                                 

                                                                   















                                                                      











                                                                            









































































































                                                                                
                                                       



                                    

                                              




                                                               
                           





                                                                   
                                                            


                                                                               
                                                 








                                               











                                                   





                                       




                                                  





                                                               

                                                          




                                          
                                                                             












                                                                        









                                                                








                                                 
 

                                                               
 
                                       
                             
                                                   
                                                               




                                        


















                                                                                     

                                      

































                                                                          



                                   
                                              
 


                                                                  
 





                                                      
         


  
                               






                                                


                                                               






                                                
                                                                    
                               
 



                         
                                            


                                     
                      


















                                                                             

                                                  
                                  



                                                                  
                                               



                                               
                                                

                                       



                                                                         
                                      






























                                                               
                                       









                                                
                                                          






                                                           











                                              













                                                                         

                            
                                
                                                                               




                                                                    
                                                     



                                                                                  
                               




















                                                                 
                                                  
















                                                                                




                                                                     






                                                               




                                          

         
                                        

























































                                                                             



                                                      

                                                         





                                                                       


                                                             


















                                                                              
                                 







                                                    


                                                                                          
                                
                                                                                                       















                                                                            
                                       








                                                                              
                                       
                                      

                                                                                      










                                                                                        
                                                                         







                                           
                                                                                     

                                                                       

                                      
                               
                                                                                              


                                                                       

                                         
                              
                                                                                                         



                                                            

                                    







                                                                                         
            


                                                
                                                                              
                                            

                                        





                                                                              
                              

                                                                                   
                         
 


























                                                                 
/*
 *  linux/net/sunrpc/clnt.c
 *
 *  This file contains the high-level RPC interface.
 *  It is modeled as a finite state machine to support both synchronous
 *  and asynchronous requests.
 *
 *  -	RPC header generation and argument serialization.
 *  -	Credential refresh.
 *  -	TCP connect handling.
 *  -	Retry of operation when it is suspected the operation failed because
 *	of uid squashing on the server, or when the credentials were stale
 *	and need to be refreshed, or when a packet was damaged in transit.
 *	This may be have to be moved to the VFS layer.
 *
 *  NB: BSD uses a more intelligent approach to guessing when a request
 *  or reply has been lost by keeping the RTO estimate for each procedure.
 *  We currently make do with a constant timeout value.
 *
 *  Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
 *  Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
 */

#include <asm/system.h>

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>

#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/metrics.h>


#define RPC_SLACK_SPACE		(1024)	/* total overkill */

#ifdef RPC_DEBUG
# define RPCDBG_FACILITY	RPCDBG_CALL
#endif

static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);


static void	call_start(struct rpc_task *task);
static void	call_reserve(struct rpc_task *task);
static void	call_reserveresult(struct rpc_task *task);
static void	call_allocate(struct rpc_task *task);
static void	call_encode(struct rpc_task *task);
static void	call_decode(struct rpc_task *task);
static void	call_bind(struct rpc_task *task);
static void	call_bind_status(struct rpc_task *task);
static void	call_transmit(struct rpc_task *task);
static void	call_status(struct rpc_task *task);
static void	call_transmit_status(struct rpc_task *task);
static void	call_refresh(struct rpc_task *task);
static void	call_refreshresult(struct rpc_task *task);
static void	call_timeout(struct rpc_task *task);
static void	call_connect(struct rpc_task *task);
static void	call_connect_status(struct rpc_task *task);
static u32 *	call_header(struct rpc_task *task);
static u32 *	call_verify(struct rpc_task *task);


static int
rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
{
	static uint32_t clntid;
	int error;

	clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
	clnt->cl_dentry = ERR_PTR(-ENOENT);
	if (dir_name == NULL)
		return 0;

	clnt->cl_vfsmnt = rpc_get_mount();
	if (IS_ERR(clnt->cl_vfsmnt))
		return PTR_ERR(clnt->cl_vfsmnt);

	for (;;) {
		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
				"%s/clnt%x", dir_name,
				(unsigned int)clntid++);
		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
		clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
		if (!IS_ERR(clnt->cl_dentry))
			return 0;
		error = PTR_ERR(clnt->cl_dentry);
		if (error != -EEXIST) {
			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
					clnt->cl_pathname, error);
			rpc_put_mount();
			return error;
		}
	}
}

/*
 * Create an RPC client
 * FIXME: This should also take a flags argument (as in task->tk_flags).
 * It's called (among others) from pmap_create_client, which may in
 * turn be called by an async task. In this case, rpciod should not be
 * made to sleep too long.
 */
struct rpc_clnt *
rpc_new_client(struct rpc_xprt *xprt, char *servname,
		  struct rpc_program *program, u32 vers,
		  rpc_authflavor_t flavor)
{
	struct rpc_version	*version;
	struct rpc_clnt		*clnt = NULL;
	struct rpc_auth		*auth;
	int err;
	int len;

	dprintk("RPC: creating %s client for %s (xprt %p)\n",
		program->name, servname, xprt);

	err = -EINVAL;
	if (!xprt)
		goto out_no_xprt;
	if (vers >= program->nrvers || !(version = program->version[vers]))
		goto out_err;

	err = -ENOMEM;
	clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
	if (!clnt)
		goto out_err;
	memset(clnt, 0, sizeof(*clnt));
	atomic_set(&clnt->cl_users, 0);
	atomic_set(&clnt->cl_count, 1);
	clnt->cl_parent = clnt;

	clnt->cl_server = clnt->cl_inline_name;
	len = strlen(servname) + 1;
	if (len > sizeof(clnt->cl_inline_name)) {
		char *buf = kmalloc(len, GFP_KERNEL);
		if (buf != 0)
			clnt->cl_server = buf;
		else
			len = sizeof(clnt->cl_inline_name);
	}
	strlcpy(clnt->cl_server, servname, len);

	clnt->cl_xprt     = xprt;
	clnt->cl_procinfo = version->procs;
	clnt->cl_maxproc  = version->nrprocs;
	clnt->cl_protname = program->name;
	clnt->cl_pmap	  = &clnt->cl_pmap_default;
	clnt->cl_port     = xprt->addr.sin_port;
	clnt->cl_prog     = program->number;
	clnt->cl_vers     = version->number;
	clnt->cl_prot     = xprt->prot;
	clnt->cl_stats    = program->stats;
	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");

	if (!clnt->cl_port)
		clnt->cl_autobind = 1;

	clnt->cl_rtt = &clnt->cl_rtt_default;
	rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);

	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
	if (err < 0)
		goto out_no_path;

	auth = rpcauth_create(flavor, clnt);
	if (IS_ERR(auth)) {
		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
				flavor);
		err = PTR_ERR(auth);
		goto out_no_auth;
	}

	/* save the nodename */
	clnt->cl_nodelen = strlen(system_utsname.nodename);
	if (clnt->cl_nodelen > UNX_MAXNODENAME)
		clnt->cl_nodelen = UNX_MAXNODENAME;
	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
	return clnt;

out_no_auth:
	if (!IS_ERR(clnt->cl_dentry)) {
		rpc_rmdir(clnt->cl_pathname);
		dput(clnt->cl_dentry);
		rpc_put_mount();
	}
out_no_path:
	if (clnt->cl_server != clnt->cl_inline_name)
		kfree(clnt->cl_server);
	kfree(clnt);
out_err:
	xprt_destroy(xprt);
out_no_xprt:
	return ERR_PTR(err);
}

/**
 * Create an RPC client
 * @xprt - pointer to xprt struct
 * @servname - name of server
 * @info - rpc_program
 * @version - rpc_program version
 * @authflavor - rpc_auth flavour to use
 *
 * Creates an RPC client structure, then pings the server in order to
 * determine if it is up, and if it supports this program and version.
 *
 * This function should never be called by asynchronous tasks such as
 * the portmapper.
 */
struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
		struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
{
	struct rpc_clnt *clnt;
	int err;
	
	clnt = rpc_new_client(xprt, servname, info, version, authflavor);
	if (IS_ERR(clnt))
		return clnt;
	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
	if (err == 0)
		return clnt;
	rpc_shutdown_client(clnt);
	return ERR_PTR(err);
}

/*
 * This function clones the RPC client structure. It allows us to share the
 * same transport while varying parameters such as the authentication
 * flavour.
 */
struct rpc_clnt *
rpc_clone_client(struct rpc_clnt *clnt)
{
	struct rpc_clnt *new;

	new = kmalloc(sizeof(*new), GFP_KERNEL);
	if (!new)
		goto out_no_clnt;
	memcpy(new, clnt, sizeof(*new));
	atomic_set(&new->cl_count, 1);
	atomic_set(&new->cl_users, 0);
	new->cl_parent = clnt;
	atomic_inc(&clnt->cl_count);
	/* Duplicate portmapper */
	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
	/* Turn off autobind on clones */
	new->cl_autobind = 0;
	new->cl_oneshot = 0;
	new->cl_dead = 0;
	if (!IS_ERR(new->cl_dentry)) {
		dget(new->cl_dentry);
		rpc_get_mount();
	}
	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
	if (new->cl_auth)
		atomic_inc(&new->cl_auth->au_count);
	new->cl_pmap		= &new->cl_pmap_default;
	new->cl_metrics         = rpc_alloc_iostats(clnt);
	return new;
out_no_clnt:
	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
	return ERR_PTR(-ENOMEM);
}

/*
 * Properly shut down an RPC client, terminating all outstanding
 * requests. Note that we must be certain that cl_oneshot and
 * cl_dead are cleared, or else the client would be destroyed
 * when the last task releases it.
 */
int
rpc_shutdown_client(struct rpc_clnt *clnt)
{
	dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
			clnt->cl_protname, clnt->cl_server,
			atomic_read(&clnt->cl_users));

	while (atomic_read(&clnt->cl_users) > 0) {
		/* Don't let rpc_release_client destroy us */
		clnt->cl_oneshot = 0;
		clnt->cl_dead = 0;
		rpc_killall_tasks(clnt);
		wait_event_timeout(destroy_wait,
			!atomic_read(&clnt->cl_users), 1*HZ);
	}

	if (atomic_read(&clnt->cl_users) < 0) {
		printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
				clnt, atomic_read(&clnt->cl_users));
#ifdef RPC_DEBUG
		rpc_show_tasks();
#endif
		BUG();
	}

	return rpc_destroy_client(clnt);
}

/*
 * Delete an RPC client
 */
int
rpc_destroy_client(struct rpc_clnt *clnt)
{
	if (!atomic_dec_and_test(&clnt->cl_count))
		return 1;
	BUG_ON(atomic_read(&clnt->cl_users) != 0);

	dprintk("RPC: destroying %s client for %s\n",
			clnt->cl_protname, clnt->cl_server);
	if (clnt->cl_auth) {
		rpcauth_destroy(clnt->cl_auth);
		clnt->cl_auth = NULL;
	}
	if (clnt->cl_parent != clnt) {
		rpc_destroy_client(clnt->cl_parent);
		goto out_free;
	}
	if (clnt->cl_pathname[0])
		rpc_rmdir(clnt->cl_pathname);
	if (clnt->cl_xprt) {
		xprt_destroy(clnt->cl_xprt);
		clnt->cl_xprt = NULL;
	}
	if (clnt->cl_server != clnt->cl_inline_name)
		kfree(clnt->cl_server);
out_free:
	rpc_free_iostats(clnt->cl_metrics);
	clnt->cl_metrics = NULL;
	if (!IS_ERR(clnt->cl_dentry)) {
		dput(clnt->cl_dentry);
		rpc_put_mount();
	}
	kfree(clnt);
	return 0;
}

/*
 * Release an RPC client
 */
void
rpc_release_client(struct rpc_clnt *clnt)
{
	dprintk("RPC:      rpc_release_client(%p, %d)\n",
				clnt, atomic_read(&clnt->cl_users));

	if (!atomic_dec_and_test(&clnt->cl_users))
		return;
	wake_up(&destroy_wait);
	if (clnt->cl_oneshot || clnt->cl_dead)
		rpc_destroy_client(clnt);
}

/**
 * rpc_bind_new_program - bind a new RPC program to an existing client
 * @old - old rpc_client
 * @program - rpc program to set
 * @vers - rpc program version
 *
 * Clones the rpc client and sets up a new RPC program. This is mainly
 * of use for enabling different RPC programs to share the same transport.
 * The Sun NFSv2/v3 ACL protocol can do this.
 */
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
				      struct rpc_program *program,
				      int vers)
{
	struct rpc_clnt *clnt;
	struct rpc_version *version;
	int err;

	BUG_ON(vers >= program->nrvers || !program->version[vers]);
	version = program->version[vers];
	clnt = rpc_clone_client(old);
	if (IS_ERR(clnt))
		goto out;
	clnt->cl_procinfo = version->procs;
	clnt->cl_maxproc  = version->nrprocs;
	clnt->cl_protname = program->name;
	clnt->cl_prog     = program->number;
	clnt->cl_vers     = version->number;
	clnt->cl_stats    = program->stats;
	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
	if (err != 0) {
		rpc_shutdown_client(clnt);
		clnt = ERR_PTR(err);
	}
out:	
	return clnt;
}

/*
 * Default callback for async RPC calls
 */
static void
rpc_default_callback(struct rpc_task *task, void *data)
{
}

static const struct rpc_call_ops rpc_default_ops = {
	.rpc_call_done = rpc_default_callback,
};

/*
 *	Export the signal mask handling for synchronous code that
 *	sleeps on RPC calls
 */
#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
 
static void rpc_save_sigmask(sigset_t *oldset, int intr)
{
	unsigned long	sigallow = sigmask(SIGKILL);
	sigset_t sigmask;

	/* Block all signals except those listed in sigallow */
	if (intr)
		sigallow |= RPC_INTR_SIGNALS;
	siginitsetinv(&sigmask, sigallow);
	sigprocmask(SIG_BLOCK, &sigmask, oldset);
}

static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
{
	rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
}

static inline void rpc_restore_sigmask(sigset_t *oldset)
{
	sigprocmask(SIG_SETMASK, oldset, NULL);
}

void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
{
	rpc_save_sigmask(oldset, clnt->cl_intr);
}

void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
{
	rpc_restore_sigmask(oldset);
}

/*
 * New rpc_call implementation
 */
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
	struct rpc_task	*task;
	sigset_t	oldset;
	int		status;

	/* If this client is slain all further I/O fails */
	if (clnt->cl_dead) 
		return -EIO;

	BUG_ON(flags & RPC_TASK_ASYNC);

	status = -ENOMEM;
	task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
	if (task == NULL)
		goto out;

	/* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
	rpc_task_sigmask(task, &oldset);

	rpc_call_setup(task, msg, 0);

	/* Set up the call info struct and execute the task */
	status = task->tk_status;
	if (status == 0) {
		atomic_inc(&task->tk_count);
		status = rpc_execute(task);
		if (status == 0)
			status = task->tk_status;
	}
	rpc_restore_sigmask(&oldset);
	rpc_release_task(task);
out:
	return status;
}

/*
 * New rpc_call implementation
 */
int
rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
	       const struct rpc_call_ops *tk_ops, void *data)
{
	struct rpc_task	*task;
	sigset_t	oldset;
	int		status;

	/* If this client is slain all further I/O fails */
	status = -EIO;
	if (clnt->cl_dead) 
		goto out_release;

	flags |= RPC_TASK_ASYNC;

	/* Create/initialize a new RPC task */
	status = -ENOMEM;
	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
		goto out_release;

	/* Mask signals on GSS_AUTH upcalls */
	rpc_task_sigmask(task, &oldset);		

	rpc_call_setup(task, msg, 0);

	/* Set up the call info struct and execute the task */
	status = task->tk_status;
	if (status == 0)
		rpc_execute(task);
	else
		rpc_release_task(task);

	rpc_restore_sigmask(&oldset);		
	return status;
out_release:
	if (tk_ops->rpc_release != NULL)
		tk_ops->rpc_release(data);
	return status;
}


void
rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
{
	task->tk_msg   = *msg;
	task->tk_flags |= flags;
	/* Bind the user cred */
	if (task->tk_msg.rpc_cred != NULL)
		rpcauth_holdcred(task);
	else
		rpcauth_bindcred(task);

	if (task->tk_status == 0)
		task->tk_action = call_start;
	else
		task->tk_action = rpc_exit_task;
}

void
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
{
	struct rpc_xprt *xprt = clnt->cl_xprt;
	if (xprt->ops->set_buffer_size)
		xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
}

/*
 * Return size of largest payload RPC client can support, in bytes
 *
 * For stream transports, this is one RPC record fragment (see RFC
 * 1831), as we don't support multi-record requests yet.  For datagram
 * transports, this is the size of an IP packet minus the IP, UDP, and
 * RPC header sizes.
 */
size_t rpc_max_payload(struct rpc_clnt *clnt)
{
	return clnt->cl_xprt->max_payload;
}
EXPORT_SYMBOL(rpc_max_payload);

/**
 * rpc_force_rebind - force transport to check that remote port is unchanged
 * @clnt: client to rebind
 *
 */
void rpc_force_rebind(struct rpc_clnt *clnt)
{
	if (clnt->cl_autobind)
		clnt->cl_port = 0;
}
EXPORT_SYMBOL(rpc_force_rebind);

/*
 * Restart an (async) RPC call. Usually called from within the
 * exit handler.
 */
void
rpc_restart_call(struct rpc_task *task)
{
	if (RPC_ASSASSINATED(task))
		return;

	task->tk_action = call_start;
}

/*
 * 0.  Initial state
 *
 *     Other FSM states can be visited zero or more times, but
 *     this state is visited exactly once for each RPC.
 */
static void
call_start(struct rpc_task *task)
{
	struct rpc_clnt	*clnt = task->tk_client;

	dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid,
		clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc->p_proc,
		(RPC_IS_ASYNC(task) ? "async" : "sync"));

	/* Increment call count */
	task->tk_msg.rpc_proc->p_count++;
	clnt->cl_stats->rpccnt++;
	task->tk_action = call_reserve;
}

/*
 * 1.	Reserve an RPC call slot
 */
static void
call_reserve(struct rpc_task *task)
{
	dprintk("RPC: %4d call_reserve\n", task->tk_pid);

	if (!rpcauth_uptodatecred(task)) {
		task->tk_action = call_refresh;
		return;
	}

	task->tk_status  = 0;
	task->tk_action  = call_reserveresult;
	xprt_reserve(task);
}

/*
 * 1b.	Grok the result of xprt_reserve()
 */
static void
call_reserveresult(struct rpc_task *task)
{
	int status = task->tk_status;

	dprintk("RPC: %4d call_reserveresult (status %d)\n",
				task->tk_pid, task->tk_status);

	/*
	 * After a call to xprt_reserve(), we must have either
	 * a request slot or else an error status.
	 */
	task->tk_status = 0;
	if (status >= 0) {
		if (task->tk_rqstp) {
			task->tk_action = call_allocate;
			return;
		}

		printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
				__FUNCTION__, status);
		rpc_exit(task, -EIO);
		return;
	}

	/*
	 * Even though there was an error, we may have acquired
	 * a request slot somehow.  Make sure not to leak it.
	 */
	if (task->tk_rqstp) {
		printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
				__FUNCTION__, status);
		xprt_release(task);
	}

	switch (status) {
	case -EAGAIN:	/* woken up; retry */
		task->tk_action = call_reserve;
		return;
	case -EIO:	/* probably a shutdown */
		break;
	default:
		printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
				__FUNCTION__, status);
		break;
	}
	rpc_exit(task, status);
}

/*
 * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
 *	(Note: buffer memory is freed in xprt_release).
 */
static void
call_allocate(struct rpc_task *task)
{
	struct rpc_rqst *req = task->tk_rqstp;
	struct rpc_xprt *xprt = task->tk_xprt;
	unsigned int	bufsiz;

	dprintk("RPC: %4d call_allocate (status %d)\n", 
				task->tk_pid, task->tk_status);
	task->tk_action = call_bind;
	if (req->rq_buffer)
		return;

	/* FIXME: compute buffer requirements more exactly using
	 * auth->au_wslack */
	bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;

	if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
		return;
	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 

	if (RPC_IS_ASYNC(task) || !signalled()) {
		xprt_release(task);
		task->tk_action = call_reserve;
		rpc_delay(task, HZ>>4);
		return;
	}

	rpc_exit(task, -ERESTARTSYS);
}

static inline int
rpc_task_need_encode(struct rpc_task *task)
{
	return task->tk_rqstp->rq_snd_buf.len == 0;
}

static inline void
rpc_task_force_reencode(struct rpc_task *task)
{
	task->tk_rqstp->rq_snd_buf.len = 0;
}

/*
 * 3.	Encode arguments of an RPC call
 */
static void
call_encode(struct rpc_task *task)
{
	struct rpc_rqst	*req = task->tk_rqstp;
	struct xdr_buf *sndbuf = &req->rq_snd_buf;
	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
	unsigned int	bufsiz;
	kxdrproc_t	encode;
	u32		*p;

	dprintk("RPC: %4d call_encode (status %d)\n", 
				task->tk_pid, task->tk_status);

	/* Default buffer setup */
	bufsiz = req->rq_bufsize >> 1;
	sndbuf->head[0].iov_base = (void *)req->rq_buffer;
	sndbuf->head[0].iov_len  = bufsiz;
	sndbuf->tail[0].iov_len  = 0;
	sndbuf->page_len	 = 0;
	sndbuf->len		 = 0;
	sndbuf->buflen		 = bufsiz;
	rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
	rcvbuf->head[0].iov_len  = bufsiz;
	rcvbuf->tail[0].iov_len  = 0;
	rcvbuf->page_len	 = 0;
	rcvbuf->len		 = 0;
	rcvbuf->buflen		 = bufsiz;

	/* Encode header and provided arguments */
	encode = task->tk_msg.rpc_proc->p_encode;
	if (!(p = call_header(task))) {
		printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
		rpc_exit(task, -EIO);
		return;
	}
	if (encode == NULL)
		return;

	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
			task->tk_msg.rpc_argp);
	if (task->tk_status == -ENOMEM) {
		/* XXX: Is this sane? */
		rpc_delay(task, 3*HZ);
		task->tk_status = -EAGAIN;
	}
}

/*
 * 4.	Get the server port number if not yet set
 */
static void
call_bind(struct rpc_task *task)
{
	struct rpc_clnt	*clnt = task->tk_client;

	dprintk("RPC: %4d call_bind (status %d)\n",
				task->tk_pid, task->tk_status);

	task->tk_action = call_connect;
	if (!clnt->cl_port) {
		task->tk_action = call_bind_status;
		task->tk_timeout = task->tk_xprt->bind_timeout;
		rpc_getport(task, clnt);
	}
}

/*
 * 4a.	Sort out bind result
 */
static void
call_bind_status(struct rpc_task *task)
{
	int status = -EACCES;

	if (task->tk_status >= 0) {
		dprintk("RPC: %4d call_bind_status (status %d)\n",
					task->tk_pid, task->tk_status);
		task->tk_status = 0;
		task->tk_action = call_connect;
		return;
	}

	switch (task->tk_status) {
	case -EACCES:
		dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
				task->tk_pid);
		rpc_delay(task, 3*HZ);
		goto retry_bind;
	case -ETIMEDOUT:
		dprintk("RPC: %4d rpcbind request timed out\n",
				task->tk_pid);
		if (RPC_IS_SOFT(task)) {
			status = -EIO;
			break;
		}
		goto retry_bind;
	case -EPFNOSUPPORT:
		dprintk("RPC: %4d remote rpcbind service unavailable\n",
				task->tk_pid);
		break;
	case -EPROTONOSUPPORT:
		dprintk("RPC: %4d remote rpcbind version 2 unavailable\n",
				task->tk_pid);
		break;
	default:
		dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
				task->tk_pid, -task->tk_status);
		status = -EIO;
		break;
	}

	rpc_exit(task, status);
	return;

retry_bind:
	task->tk_status = 0;
	task->tk_action = call_bind;
	return;
}

/*
 * 4b.	Connect to the RPC server
 */
static void
call_connect(struct rpc_task *task)
{
	struct rpc_xprt *xprt = task->tk_xprt;

	dprintk("RPC: %4d call_connect xprt %p %s connected\n",
			task->tk_pid, xprt,
			(xprt_connected(xprt) ? "is" : "is not"));

	task->tk_action = call_transmit;
	if (!xprt_connected(xprt)) {
		task->tk_action = call_connect_status;
		if (task->tk_status < 0)
			return;
		xprt_connect(task);
	}
}

/*
 * 4c.	Sort out connect result
 */
static void
call_connect_status(struct rpc_task *task)
{
	struct rpc_clnt *clnt = task->tk_client;
	int status = task->tk_status;

	dprintk("RPC: %5u call_connect_status (status %d)\n", 
				task->tk_pid, task->tk_status);

	task->tk_status = 0;
	if (status >= 0) {
		clnt->cl_stats->netreconn++;
		task->tk_action = call_transmit;
		return;
	}

	/* Something failed: remote service port may have changed */
	rpc_force_rebind(clnt);

	switch (status) {
	case -ENOTCONN:
	case -ETIMEDOUT:
	case -EAGAIN:
		task->tk_action = call_bind;
		break;
	default:
		rpc_exit(task, -EIO);
		break;
	}
}

/*
 * 5.	Transmit the RPC request, and wait for reply
 */
static void
call_transmit(struct rpc_task *task)
{
	dprintk("RPC: %4d call_transmit (status %d)\n", 
				task->tk_pid, task->tk_status);

	task->tk_action = call_status;
	if (task->tk_status < 0)
		return;
	task->tk_status = xprt_prepare_transmit(task);
	if (task->tk_status != 0)
		return;
	/* Encode here so that rpcsec_gss can use correct sequence number. */
	if (rpc_task_need_encode(task)) {
		task->tk_rqstp->rq_bytes_sent = 0;
		call_encode(task);
		/* Did the encode result in an error condition? */
		if (task->tk_status != 0)
			goto out_nosend;
	}
	task->tk_action = call_transmit_status;
	xprt_transmit(task);
	if (task->tk_status < 0)
		return;
	if (!task->tk_msg.rpc_proc->p_decode) {
		task->tk_action = rpc_exit_task;
		rpc_wake_up_task(task);
	}
	return;
out_nosend:
	/* release socket write lock before attempting to handle error */
	xprt_abort_transmit(task);
	rpc_task_force_reencode(task);
}

/*
 * 6.	Sort out the RPC call status
 */
static void
call_status(struct rpc_task *task)
{
	struct rpc_clnt	*clnt = task->tk_client;
	struct rpc_rqst	*req = task->tk_rqstp;
	int		status;

	if (req->rq_received > 0 && !req->rq_bytes_sent)
		task->tk_status = req->rq_received;

	dprintk("RPC: %4d call_status (status %d)\n", 
				task->tk_pid, task->tk_status);

	status = task->tk_status;
	if (status >= 0) {
		task->tk_action = call_decode;
		return;
	}

	task->tk_status = 0;
	switch(status) {
	case -ETIMEDOUT:
		task->tk_action = call_timeout;
		break;
	case -ECONNREFUSED:
	case -ENOTCONN:
		rpc_force_rebind(clnt);
		task->tk_action = call_bind;
		break;
	case -EAGAIN:
		task->tk_action = call_transmit;
		break;
	case -EIO:
		/* shutdown or soft timeout */
		rpc_exit(task, status);
		break;
	default:
		printk("%s: RPC call returned error %d\n",
			       clnt->cl_protname, -status);
		rpc_exit(task, status);
		break;
	}
}

/*
 * 6a.	Handle transmission errors.
 */
static void
call_transmit_status(struct rpc_task *task)
{
	if (task->tk_status != -EAGAIN)
		rpc_task_force_reencode(task);
	call_status(task);
}

/*
 * 6b.	Handle RPC timeout
 * 	We do not release the request slot, so we keep using the
 *	same XID for all retransmits.
 */
static void
call_timeout(struct rpc_task *task)
{
	struct rpc_clnt	*clnt = task->tk_client;

	if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
		dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid);
		goto retry;
	}

	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
	task->tk_timeouts++;

	if (RPC_IS_SOFT(task)) {
		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
				clnt->cl_protname, clnt->cl_server);
		rpc_exit(task, -EIO);
		return;
	}

	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
		task->tk_flags |= RPC_CALL_MAJORSEEN;
		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
			clnt->cl_protname, clnt->cl_server);
	}
	rpc_force_rebind(clnt);

retry:
	clnt->cl_stats->rpcretrans++;
	task->tk_action = call_bind;
	task->tk_status = 0;
}

/*
 * 7.	Decode the RPC reply
 */
static void
call_decode(struct rpc_task *task)
{
	struct rpc_clnt	*clnt = task->tk_client;
	struct rpc_rqst	*req = task->tk_rqstp;
	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
	u32		*p;

	dprintk("RPC: %4d call_decode (status %d)\n", 
				task->tk_pid, task->tk_status);

	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
		printk(KERN_NOTICE "%s: server %s OK\n",
			clnt->cl_protname, clnt->cl_server);
		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
	}

	if (task->tk_status < 12) {
		if (!RPC_IS_SOFT(task)) {
			task->tk_action = call_bind;
			clnt->cl_stats->rpcretrans++;
			goto out_retry;
		}
		printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
			clnt->cl_protname, task->tk_status);
		rpc_exit(task, -EIO);
		return;
	}

	/*
	 * Ensure that we see all writes made by xprt_complete_rqst()
	 * before it changed req->rq_received.
	 */
	smp_rmb();
	req->rq_rcv_buf.len = req->rq_private_buf.len;

	/* Check that the softirq receive buffer is valid */
	WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
				sizeof(req->rq_rcv_buf)) != 0);

	/* Verify the RPC header */
	p = call_verify(task);
	if (IS_ERR(p)) {
		if (p == ERR_PTR(-EAGAIN))
			goto out_retry;
		return;
	}

	task->tk_action = rpc_exit_task;

	if (decode)
		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
						      task->tk_msg.rpc_resp);
	dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
					task->tk_status);
	return;
out_retry:
	req->rq_received = req->rq_private_buf.len = 0;
	task->tk_status = 0;
}

/*
 * 8.	Refresh the credentials if rejected by the server
 */
static void
call_refresh(struct rpc_task *task)
{
	dprintk("RPC: %4d call_refresh\n", task->tk_pid);

	xprt_release(task);	/* Must do to obtain new XID */
	task->tk_action = call_refreshresult;
	task->tk_status = 0;
	task->tk_client->cl_stats->rpcauthrefresh++;
	rpcauth_refreshcred(task);
}

/*
 * 8a.	Process the results of a credential refresh
 */
static void
call_refreshresult(struct rpc_task *task)
{
	int status = task->tk_status;
	dprintk("RPC: %4d call_refreshresult (status %d)\n", 
				task->tk_pid, task->tk_status);

	task->tk_status = 0;
	task->tk_action = call_reserve;
	if (status >= 0 && rpcauth_uptodatecred(task))
		return;
	if (status == -EACCES) {
		rpc_exit(task, -EACCES);
		return;
	}
	task->tk_action = call_refresh;
	if (status != -ETIMEDOUT)
		rpc_delay(task, 3*HZ);
	return;
}

/*
 * Call header serialization
 */
static u32 *
call_header(struct rpc_task *task)
{
	struct rpc_clnt *clnt = task->tk_client;
	struct rpc_rqst	*req = task->tk_rqstp;
	u32		*p = req->rq_svec[0].iov_base;

	/* FIXME: check buffer size? */

	p = xprt_skip_transport_header(task->tk_xprt, p);
	*p++ = req->rq_xid;		/* XID */
	*p++ = htonl(RPC_CALL);		/* CALL */
	*p++ = htonl(RPC_VERSION);	/* RPC version */
	*p++ = htonl(clnt->cl_prog);	/* program number */
	*p++ = htonl(clnt->cl_vers);	/* program version */
	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
	p = rpcauth_marshcred(task, p);
	req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
	return p;
}

/*
 * Reply header verification
 */
static u32 *
call_verify(struct rpc_task *task)
{
	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
	u32	*p = iov->iov_base, n;
	int error = -EACCES;

	if ((len -= 3) < 0)
		goto out_overflow;
	p += 1;	/* skip XID */

	if ((n = ntohl(*p++)) != RPC_REPLY) {
		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
		goto out_garbage;
	}
	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
		if (--len < 0)
			goto out_overflow;
		switch ((n = ntohl(*p++))) {
			case RPC_AUTH_ERROR:
				break;
			case RPC_MISMATCH:
				dprintk("%s: RPC call version mismatch!\n", __FUNCTION__);
				error = -EPROTONOSUPPORT;
				goto out_err;
			default:
				dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
				goto out_eio;
		}
		if (--len < 0)
			goto out_overflow;
		switch ((n = ntohl(*p++))) {
		case RPC_AUTH_REJECTEDCRED:
		case RPC_AUTH_REJECTEDVERF:
		case RPCSEC_GSS_CREDPROBLEM:
		case RPCSEC_GSS_CTXPROBLEM:
			if (!task->tk_cred_retry)
				break;
			task->tk_cred_retry--;
			dprintk("RPC: %4d call_verify: retry stale creds\n",
							task->tk_pid);
			rpcauth_invalcred(task);
			task->tk_action = call_refresh;
			goto out_retry;
		case RPC_AUTH_BADCRED:
		case RPC_AUTH_BADVERF:
			/* possibly garbled cred/verf? */
			if (!task->tk_garb_retry)
				break;
			task->tk_garb_retry--;
			dprintk("RPC: %4d call_verify: retry garbled creds\n",
							task->tk_pid);
			task->tk_action = call_bind;
			goto out_retry;
		case RPC_AUTH_TOOWEAK:
			printk(KERN_NOTICE "call_verify: server %s requires stronger "
			       "authentication.\n", task->tk_client->cl_server);
			break;
		default:
			printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
			error = -EIO;
		}
		dprintk("RPC: %4d call_verify: call rejected %d\n",
						task->tk_pid, n);
		goto out_err;
	}
	if (!(p = rpcauth_checkverf(task, p))) {
		printk(KERN_WARNING "call_verify: auth check failed\n");
		goto out_garbage;		/* bad verifier, retry */
	}
	len = p - (u32 *)iov->iov_base - 1;
	if (len < 0)
		goto out_overflow;
	switch ((n = ntohl(*p++))) {
	case RPC_SUCCESS:
		return p;
	case RPC_PROG_UNAVAIL:
		dprintk("RPC: call_verify: program %u is unsupported by server %s\n",
				(unsigned int)task->tk_client->cl_prog,
				task->tk_client->cl_server);
		error = -EPFNOSUPPORT;
		goto out_err;
	case RPC_PROG_MISMATCH:
		dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n",
				(unsigned int)task->tk_client->cl_prog,
				(unsigned int)task->tk_client->cl_vers,
				task->tk_client->cl_server);
		error = -EPROTONOSUPPORT;
		goto out_err;
	case RPC_PROC_UNAVAIL:
		dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
				task->tk_msg.rpc_proc,
				task->tk_client->cl_prog,
				task->tk_client->cl_vers,
				task->tk_client->cl_server);
		error = -EOPNOTSUPP;
		goto out_err;
	case RPC_GARBAGE_ARGS:
		dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
		break;			/* retry */
	default:
		printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
		/* Also retry */
	}

out_garbage:
	task->tk_client->cl_stats->rpcgarbage++;
	if (task->tk_garb_retry) {
		task->tk_garb_retry--;
		dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
		task->tk_action = call_bind;
out_retry:
		return ERR_PTR(-EAGAIN);
	}
	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
out_eio:
	error = -EIO;
out_err:
	rpc_exit(task, error);
	return ERR_PTR(error);
out_overflow:
	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
	goto out_garbage;
}

static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
{
	return 0;
}

static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj)
{
	return 0;
}

static struct rpc_procinfo rpcproc_null = {
	.p_encode = rpcproc_encode_null,
	.p_decode = rpcproc_decode_null,
};

int rpc_ping(struct rpc_clnt *clnt, int flags)
{
	struct rpc_message msg = {
		.rpc_proc = &rpcproc_null,
	};
	int err;
	msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
	err = rpc_call_sync(clnt, &msg, flags);
	put_rpccred(msg.rpc_cred);
	return err;
}
[sym_counter]) pcnt = 100.0 * line->count[sym_counter] / (float)total; if (pcnt >= (float)sym_pcnt_filter) { if (displayed <= print_entries) show_lines(line_queue, line_queue_count, total); else more++; displayed += line_queue_count; line_queue_count = 0; line_queue = NULL; } else if (line_queue_count > TRACE_COUNT) { line_queue = line_queue->next; line_queue_count--; } line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; line = line->next; } pthread_mutex_unlock(&syme->src->lock); if (more) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } /* * Symbols will be added here in event__process_sample and will get out * after decayed. */ static LIST_HEAD(active_symbols); static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; /* * Ordering weight: count-1 * count-2 * ... / count-n */ static double sym_weight(const struct sym_entry *sym) { double weight = sym->snap_count; int counter; if (!display_weighted) return weight; for (counter = 1; counter < nr_counters-1; counter++) weight *= sym->count[counter]; weight /= (sym->count[counter] + 1); return weight; } static long samples; static long userspace_samples; static const char CONSOLE_CLEAR[] = ""; static void __list_insert_active_sym(struct sym_entry *syme) { list_add(&syme->node, &active_symbols); } static void list_remove_active_sym(struct sym_entry *syme) { pthread_mutex_lock(&active_symbols_lock); list_del_init(&syme->node); pthread_mutex_unlock(&active_symbols_lock); } static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) { struct rb_node **p = &tree->rb_node; struct rb_node *parent = NULL; struct sym_entry *iter; while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct sym_entry, rb_node); if (se->weight > iter->weight) p = &(*p)->rb_left; else p = &(*p)->rb_right; } rb_link_node(&se->rb_node, parent, p); rb_insert_color(&se->rb_node, tree); } static void print_sym_table(void) { int printed = 0, j; int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; float ksamples_per_sec = (samples-userspace_samples)/delay_secs; float sum_ksamples = 0.0; struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; int sym_width = 0, dso_width = 0, max_dso_width; const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; /* Sort the active symbols */ pthread_mutex_lock(&active_symbols_lock); syme = list_entry(active_symbols.next, struct sym_entry, node); pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { if ((hide_user_symbols && syme->origin == PERF_RECORD_MISC_USER) || (hide_kernel_symbols && syme->origin == PERF_RECORD_MISC_KERNEL)) { list_remove_active_sym(syme); continue; } syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); sum_ksamples += syme->snap_count; for (j = 0; j < nr_counters; j++) syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; } else list_remove_active_sym(syme); } puts(CONSOLE_CLEAR); printf("%-*.*s\n", win_width, win_width, graph_dotted_line); printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); else printf(" "); } if (!display_weighted) printf("%s", event_name(sym_counter)); else for (counter = 0; counter < nr_counters; counter++) { if (counter) printf("/"); printf("%s", event_name(counter)); } printf( "], "); if (target_pid != -1) printf(" (target_pid: %d", target_pid); else printf(" (all"); if (profile_cpu != -1) printf(", cpu: %d)\n", profile_cpu); else { if (target_pid != -1) printf(")\n"); else printf(", %d CPUs)\n", nr_cpus); } printf("%-*.*s\n", win_width, win_width, graph_dotted_line); if (sym_filter_entry) { show_details(sym_filter_entry); return; } /* * Find the longest symbol name that will be displayed */ for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { syme = rb_entry(nd, struct sym_entry, rb_node); if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; if (syme->map->dso->long_name_len > dso_width) dso_width = syme->map->dso->long_name_len; if (syme->name_len > sym_width) sym_width = syme->name_len; } printed = 0; max_dso_width = winsize.ws_col - sym_width - 29; if (dso_width > max_dso_width) dso_width = max_dso_width; putchar('\n'); if (nr_counters == 1) printf(" samples pcnt"); else printf(" weight samples pcnt"); if (verbose) printf(" RIP "); printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) printf(" ________________"); printf(" %-*.*s", sym_width, sym_width, graph_line); printf(" %-*.*s", dso_width, dso_width, graph_line); puts("\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; double pcnt; syme = rb_entry(nd, struct sym_entry, rb_node); sym = sym_entry__symbol(syme); if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); if (nr_counters == 1 || !display_weighted) printf("%20.2f ", syme->weight); else printf("%9.1f %10ld ", syme->weight, syme->snap_count); percent_color_fprintf(stdout, "%4.1f%%", pcnt); if (verbose) printf(" %016llx", sym->start); printf(" %-*.*s", sym_width, sym_width, sym->name); printf(" %-*.*s\n", dso_width, dso_width, dso_width >= syme->map->dso->long_name_len ? syme->map->dso->long_name : syme->map->dso->short_name); } } static void prompt_integer(int *target, const char *msg) { char *buf = malloc(0), *p; size_t dummy = 0; int tmp; fprintf(stdout, "\n%s: ", msg); if (getline(&buf, &dummy, stdin) < 0) return; p = strchr(buf, '\n'); if (p) *p = 0; p = buf; while(*p) { if (!isdigit(*p)) goto out_free; p++; } tmp = strtoul(buf, NULL, 10); *target = tmp; out_free: free(buf); } static void prompt_percent(int *target, const char *msg) { int tmp = 0; prompt_integer(&tmp, msg); if (tmp >= 0 && tmp <= 100) *target = tmp; } static void prompt_symbol(struct sym_entry **target, const char *msg) { char *buf = malloc(0), *p; struct sym_entry *syme = *target, *n, *found = NULL; size_t dummy = 0; /* zero counters of active symbol */ if (syme) { pthread_mutex_lock(&syme->src->lock); __zero_source_counters(syme); *target = NULL; pthread_mutex_unlock(&syme->src->lock); } fprintf(stdout, "\n%s: ", msg); if (getline(&buf, &dummy, stdin) < 0) goto out_free; p = strchr(buf, '\n'); if (p) *p = 0; pthread_mutex_lock(&active_symbols_lock); syme = list_entry(active_symbols.next, struct sym_entry, node); pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { struct symbol *sym = sym_entry__symbol(syme); if (!strcmp(buf, sym->name)) { found = syme; break; } } if (!found) { fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); sleep(1); return; } else parse_source(found); out_free: free(buf); } static void print_mapped_keys(void) { char *name = NULL; if (sym_filter_entry) { struct symbol *sym = sym_entry__symbol(sym_filter_entry); name = sym->name; } fprintf(stdout, "\nMapped keys:\n"); fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); if (nr_counters > 1) fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); if (symbol_conf.vmlinux_name) { fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); } if (nr_counters > 1) fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); fprintf(stdout, "\t[K] hide kernel_symbols symbols. \t(%s)\n", hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", hide_user_symbols ? "yes" : "no"); fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); fprintf(stdout, "\t[qQ] quit.\n"); } static int key_mapped(int c) { switch (c) { case 'd': case 'e': case 'f': case 'z': case 'q': case 'Q': case 'K': case 'U': return 1; case 'E': case 'w': return nr_counters > 1 ? 1 : 0; case 'F': case 's': case 'S': return symbol_conf.vmlinux_name ? 1 : 0; default: break; } return 0; } static void handle_keypress(int c) { if (!key_mapped(c)) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; struct termios tc, save; print_mapped_keys(); fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); fflush(stdout); tcgetattr(0, &save); tc = save; tc.c_lflag &= ~(ICANON | ECHO); tc.c_cc[VMIN] = 0; tc.c_cc[VTIME] = 0; tcsetattr(0, TCSANOW, &tc); poll(&stdin_poll, 1, -1); c = getc(stdin); tcsetattr(0, TCSAFLUSH, &save); if (!key_mapped(c)) return; } switch (c) { case 'd': prompt_integer(&delay_secs, "Enter display delay"); if (delay_secs < 1) delay_secs = 1; break; case 'e': prompt_integer(&print_entries, "Enter display entries (lines)"); if (print_entries == 0) { sig_winch_handler(SIGWINCH); signal(SIGWINCH, sig_winch_handler); } else signal(SIGWINCH, SIG_DFL); break; case 'E': if (nr_counters > 1) { int i; fprintf(stderr, "\nAvailable events:"); for (i = 0; i < nr_counters; i++) fprintf(stderr, "\n\t%d %s", i, event_name(i)); prompt_integer(&sym_counter, "Enter details event counter"); if (sym_counter >= nr_counters) { fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); sym_counter = 0; sleep(1); } } else sym_counter = 0; break; case 'f': prompt_integer(&count_filter, "Enter display event count filter"); break; case 'F': prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); break; case 'K': hide_kernel_symbols = !hide_kernel_symbols; break; case 'q': case 'Q': printf("exiting.\n"); if (dump_symtab) dsos__fprintf(stderr); exit(0); case 's': prompt_symbol(&sym_filter_entry, "Enter details symbol"); break; case 'S': if (!sym_filter_entry) break; else { struct sym_entry *syme = sym_filter_entry; pthread_mutex_lock(&syme->src->lock); sym_filter_entry = NULL; __zero_source_counters(syme); pthread_mutex_unlock(&syme->src->lock); } break; case 'U': hide_user_symbols = !hide_user_symbols; break; case 'w': display_weighted = ~display_weighted; break; case 'z': zero = ~zero; break; default: break; } } static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; struct termios tc, save; int delay_msecs, c; tcgetattr(0, &save); tc = save; tc.c_lflag &= ~(ICANON | ECHO); tc.c_cc[VMIN] = 0; tc.c_cc[VTIME] = 0; repeat: delay_msecs = delay_secs * 1000; tcsetattr(0, TCSANOW, &tc); /* trash return*/ getc(stdin); do { print_sym_table(); } while (!poll(&stdin_poll, 1, delay_msecs) == 1); c = getc(stdin); tcsetattr(0, TCSAFLUSH, &save); handle_keypress(c); goto repeat; return NULL; } /* Tag samples to be skipped. */ static const char *skip_symbols[] = { "default_idle", "cpu_idle", "enter_idle", "exit_idle", "mwait_idle", "mwait_idle_with_hints", "poll_idle", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", NULL }; static int symbol_filter(struct map *map, struct symbol *sym) { struct sym_entry *syme; const char *name = sym->name; int i; /* * ppc64 uses function descriptors and appends a '.' to the * start of every instruction address. Remove it. */ if (name[0] == '.') name++; if (!strcmp(name, "_text") || !strcmp(name, "_etext") || !strcmp(name, "_sinittext") || !strncmp("init_module", name, 11) || !strncmp("cleanup_module", name, 14) || strstr(name, "_text_start") || strstr(name, "_text_end")) return 1; syme = symbol__priv(sym); syme->map = map; syme->src = NULL; if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) sym_filter_entry = syme; for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { syme->skip = 1; break; } } if (!syme->skip) syme->name_len = strlen(sym->name); return 0; } static void event__process_sample(const event_t *self, int counter) { u64 ip = self->ip.ip; struct sym_entry *syme; struct addr_location al; u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; switch (origin) { case PERF_RECORD_MISC_USER: if (hide_user_symbols) return; break; case PERF_RECORD_MISC_KERNEL: if (hide_kernel_symbols) return; break; default: return; } if (event__preprocess_sample(self, &al, symbol_filter) < 0 || al.sym == NULL) return; syme = symbol__priv(al.sym); if (!syme->skip) { syme->count[counter]++; syme->origin = origin; record_precise_ip(syme, counter, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); pthread_mutex_unlock(&active_symbols_lock); if (origin == PERF_RECORD_MISC_USER) ++userspace_samples; ++samples; } } static int event__process(event_t *event) { switch (event->header.type) { case PERF_RECORD_COMM: event__process_comm(event); break; case PERF_RECORD_MMAP: event__process_mmap(event); break; default: break; } return 0; } struct mmap_data { int counter; void *base; int mask; unsigned int prev; }; static unsigned int mmap_read_head(struct mmap_data *md) { struct perf_event_mmap_page *pc = md->base; int head; head = pc->data_head; rmb(); return head; } static void mmap_read_counter(struct mmap_data *md) { unsigned int head = mmap_read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; int diff; /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. * * If we somehow ended up ahead of the head, we got messed up. * * In either case, truncate and restart at head. */ diff = head - old; if (diff > md->mask / 2 || diff < 0) { fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* * head points to a known good entry, start there. */ old = head; } for (; old != head;) { event_t *event = (event_t *)&data[old & md->mask]; event_t event_copy; size_t size = event->header.size; /* * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ if ((old & md->mask) + size != ((old + size) & md->mask)) { unsigned int offset = old; unsigned int len = min(sizeof(*event), size), cpy; void *dst = &event_copy; do { cpy = min(md->mask + 1 - (offset & md->mask), len); memcpy(dst, &data[offset & md->mask], cpy); offset += cpy; dst += cpy; len -= cpy; } while (len); event = &event_copy; } if (event->header.type == PERF_RECORD_SAMPLE) event__process_sample(event, md->counter); else event__process(event); old += size; } md->prev = old; } static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static void mmap_read(void) { int i, counter; for (i = 0; i < nr_cpus; i++) { for (counter = 0; counter < nr_counters; counter++) mmap_read_counter(&mmap_array[i][counter]); } } int nr_poll; int group_fd; static void start_counter(int i, int counter) { struct perf_event_attr *attr; int cpu; cpu = profile_cpu; if (target_pid == -1 && profile_cpu == -1) cpu = i; attr = attrs + counter; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; attr->sample_freq = freq; } attr->inherit = (cpu < 0) && inherit; attr->mmap = 1; try_again: fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); if (fd[i][counter] < 0) { int err = errno; if (err == EPERM || err == EACCES) die("No permission - are you root?\n"); /* * If it's cycles then fall back to hrtimer * based cpu-clock-tick sw counter, which * is always available even if no PMU support: */ if (attr->type == PERF_TYPE_HARDWARE && attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) warning(" ... trying to fall back to cpu-clock-ticks\n"); attr->type = PERF_TYPE_SOFTWARE; attr->config = PERF_COUNT_SW_CPU_CLOCK; goto try_again; } printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[i][counter], strerror(err)); die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } assert(fd[i][counter] >= 0); fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); /* * First counter acts as the group leader: */ if (group && group_fd == -1) group_fd = fd[i][counter]; event_array[nr_poll].fd = fd[i][counter]; event_array[nr_poll].events = POLLIN; nr_poll++; mmap_array[i][counter].counter = counter; mmap_array[i][counter].prev = 0; mmap_array[i][counter].mask = mmap_pages*page_size - 1; mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, PROT_READ, MAP_SHARED, fd[i][counter], 0); if (mmap_array[i][counter].base == MAP_FAILED) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } static int __cmd_top(void) { pthread_t thread; int i, counter; int ret; if (target_pid != -1) event__synthesize_thread(target_pid, event__process); else event__synthesize_threads(event__process); for (i = 0; i < nr_cpus; i++) { group_fd = -1; for (counter = 0; counter < nr_counters; counter++) start_counter(i, counter); } /* Wait for a minimal set of events before starting the snapshot */ poll(event_array, nr_poll, 100); mmap_read(); if (pthread_create(&thread, NULL, display_thread, NULL)) { printf("Could not create display thread.\n"); exit(-1); } if (realtime_prio) { struct sched_param param; param.sched_priority = realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, &param)) { printf("Could not set realtime priority.\n"); exit(-1); } } while (1) { int hits = samples; mmap_read(); if (hits == samples) ret = poll(event_array, nr_poll, 100); } return 0; } static const char * const top_usage[] = { "perf top [<options>]", NULL }; static const struct option options[] = { OPT_CALLBACK('e', "event", NULL, "event", "event selector. use 'perf list' to list available events", parse_events), OPT_INTEGER('c', "count", &default_interval, "event period to sample"), OPT_INTEGER('p', "pid", &target_pid, "profile events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, "hide kernel symbols"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &delay_secs, "number of seconds to delay between refreshes"), OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, "dump the symbol table used for profiling"), OPT_INTEGER('f', "count-filter", &count_filter, "only display functions with more events than this"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", "symbol to annotate - requires -k option"), OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, "profile at this frequency"), OPT_INTEGER('E', "entries", &print_entries, "display this many functions"), OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, "hide user symbols"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() }; int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); /* CPU and PID are mutually exclusive */ if (target_pid != -1 && profile_cpu != -1) { printf("WARNING: PID switch overriding CPU\n"); sleep(1); profile_cpu = -1; } if (!nr_counters) nr_counters = 1; symbol_conf.priv_size = (sizeof(struct sym_entry) + (nr_counters + 1) * sizeof(unsigned long)); if (symbol_conf.vmlinux_name == NULL) symbol_conf.try_vmlinux_path = true; if (symbol__init(&symbol_conf) < 0) return -1; if (delay_secs < 1) delay_secs = 1; parse_source(sym_filter_entry); /* * User specified count overrides default frequency. */ if (default_interval) freq = 0; else if (freq) { default_interval = freq; } else { fprintf(stderr, "frequency and count are zero, aborting\n"); exit(EXIT_FAILURE); } /* * Fill in the ones not specifically initialized via -c: */ for (counter = 0; counter < nr_counters; counter++) { if (attrs[counter].sample_period) continue; attrs[counter].sample_period = default_interval; } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; get_term_dimensions(&winsize); if (print_entries == 0) { update_print_entries(&winsize); signal(SIGWINCH, sig_winch_handler); } return __cmd_top(); }