aboutsummaryrefslogblamecommitdiffstats
path: root/mm/page-writeback.c
blob: be0efbde4994f5454198384ce274b62eb827adc4 (plain) (tree)
1
2
  
                      




















                                                                
                                         

                         
                       





                           
                              
                          















                                                                           
                                                                                         
























                                                                              
                                                              
   
                                      

  
                                                                          
   
                                    






                                                                     

                                                                                









                                                          

















                                                                               

                                                                      





                                                         
                                                        

                                

                     
                                                        
           
                                            


      

                                                                          
                                                       































                                                                               













                                                                 
                                            

                  





                                                                             
 

                                           








                                                                       







                                                                           



                                                                         
                                              

         


                                                            
















                                                                                









                                                                   
   
                                                                  
                                            
                                                                       









                                                                               

                                                                       
 


                                                             








                                                                              





                                        


                                             
                         
 
                                                  


                               



                               
                                                                          






                                                                       


                                                                        
                                              
















                                                                            
                                    


                     


                                       


                                                                              








                                                                   
                                                      










                                                                           
                                 
 


                                                                   





                                                                

                                                                 








                                                                               

                                                                          










                                                                               






                                               
                                    



                      
                                                     
                            
                                                        

                                                            






                                                                        
                                                              






                                                                         
                                     








                                                                                     

                                                                               
                                    

                                                            



























                                                                               
                                                                




























                                                                               
                                  
 
                                                                    





                                                                  
                    

                                                                        
                                  


                 
                                                           




                                            















                                                              


                                     
                                                                 
                                  


                                             




































































































































                                                                                                

                                                                               

                

                                  
                                
                                       
                                                               



                                                       




                                                                      

                                   



















































                                                                              



                                                                   






                                                         
                                                                 
                                                                           

                                                                       






                                                                         
                 
                         
         
                 
























                                                                                




                                                       
         



                                            

















                                                                             
                               






                                   
















                                                                           

































                                                                               
                                                                 
                                 
                 
                         
         
                                        
 
                                       































































                                                                         
/*
 * mm/page-writeback.c
 *
 * Copyright (C) 2002, Linus Torvalds.
 *
 * Contains functions related to writing back dirty pages at the
 * address_space level.
 *
 * 10Apr2002	akpm@zip.com.au
 *		Initial version
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/buffer_head.h>
#include <linux/pagevec.h>

/*
 * The maximum number of pages to writeout in a single bdflush/kupdate
 * operation.  We do this so we don't hold I_LOCK against an inode for
 * enormous amounts of time, which would block a userspace task which has
 * been forced to throttle against that inode.  Also, the code reevaluates
 * the dirty each time it has written this many pages.
 */
#define MAX_WRITEBACK_PAGES	1024

/*
 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
 * will look to see if it needs to force writeback or throttling.
 */
static long ratelimit_pages = 32;

static int dirty_exceeded __cacheline_aligned_in_smp;	/* Dirty mem may be over limit */

/*
 * When balance_dirty_pages decides that the caller needs to perform some
 * non-background writeback, this is how many pages it will attempt to write.
 * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably
 * large amounts of I/O are submitted.
 */
static inline long sync_writeback_pages(void)
{
	return ratelimit_pages + ratelimit_pages / 2;
}

/* The following parameters are exported via /proc/sys/vm */

/*
 * Start background writeback (via pdflush) at this percentage
 */
int dirty_background_ratio = 10;

/*
 * The generator of dirty data starts writeback at this percentage
 */
int vm_dirty_ratio = 40;

/*
 * The interval between `kupdate'-style writebacks, in jiffies
 */
int dirty_writeback_interval = 5 * HZ;

/*
 * The longest number of jiffies for which data is allowed to remain dirty
 */
int dirty_expire_interval = 30 * HZ;

/*
 * Flag that makes the machine dump writes/reads and block dirtyings.
 */
int block_dump;

/*
 * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
 * a full sync is triggered after this time elapses without any disk activity.
 */
int laptop_mode;

EXPORT_SYMBOL(laptop_mode);

/* End of sysctl-exported parameters */


static void background_writeout(unsigned long _min_pages);

/*
 * Work out the current dirty-memory clamping and background writeout
 * thresholds.
 *
 * The main aim here is to lower them aggressively if there is a lot of mapped
 * memory around.  To avoid stressing page reclaim with lots of unreclaimable
 * pages.  It is better to clamp down on writers than to start swapping, and
 * performing lots of scanning.
 *
 * We only allow 1/2 of the currently-unmapped memory to be dirtied.
 *
 * We don't permit the clamping level to fall below 5% - that is getting rather
 * excessive.
 *
 * We make sure that the background writeout level is below the adjusted
 * clamping level.
 */
static void
get_dirty_limits(long *pbackground, long *pdirty,
					struct address_space *mapping)
{
	int background_ratio;		/* Percentages */
	int dirty_ratio;
	int unmapped_ratio;
	long background;
	long dirty;
	unsigned long available_memory = vm_total_pages;
	struct task_struct *tsk;

#ifdef CONFIG_HIGHMEM
	/*
	 * We always exclude high memory from our count.
	 */
	available_memory -= totalhigh_pages;
#endif


	unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
				global_page_state(NR_ANON_PAGES)) * 100) /
					vm_total_pages;

	dirty_ratio = vm_dirty_ratio;
	if (dirty_ratio > unmapped_ratio / 2)
		dirty_ratio = unmapped_ratio / 2;

	if (dirty_ratio < 5)
		dirty_ratio = 5;

	background_ratio = dirty_background_ratio;
	if (background_ratio >= dirty_ratio)
		background_ratio = dirty_ratio / 2;

	background = (background_ratio * available_memory) / 100;
	dirty = (dirty_ratio * available_memory) / 100;
	tsk = current;
	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
		background += background / 4;
		dirty += dirty / 4;
	}
	*pbackground = background;
	*pdirty = dirty;
}

/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
 * If we're over `background_thresh' then pdflush is woken to perform some
 * writeout.
 */
static void balance_dirty_pages(struct address_space *mapping)
{
	long nr_reclaimable;
	long background_thresh;
	long dirty_thresh;
	unsigned long pages_written = 0;
	unsigned long write_chunk = sync_writeback_pages();

	struct backing_dev_info *bdi = mapping->backing_dev_info;

	for (;;) {
		struct writeback_control wbc = {
			.bdi		= bdi,
			.sync_mode	= WB_SYNC_NONE,
			.older_than_this = NULL,
			.nr_to_write	= write_chunk,
			.range_cyclic	= 1,
		};

		get_dirty_limits(&background_thresh, &dirty_thresh, mapping);
		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
		if (nr_reclaimable + global_page_state(NR_WRITEBACK) <=
			dirty_thresh)
				break;

		if (!dirty_exceeded)
			dirty_exceeded = 1;

		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
		 * Unstable writes are a feature of certain networked
		 * filesystems (i.e. NFS) in which data may have been
		 * written to the server's write cache, but has not yet
		 * been flushed to permanent storage.
		 */
		if (nr_reclaimable) {
			writeback_inodes(&wbc);
			get_dirty_limits(&background_thresh,
					 	&dirty_thresh, mapping);
			nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
			if (nr_reclaimable +
				global_page_state(NR_WRITEBACK)
					<= dirty_thresh)
						break;
			pages_written += write_chunk - wbc.nr_to_write;
			if (pages_written >= write_chunk)
				break;		/* We've done our duty */
		}
		congestion_wait(WRITE, HZ/10);
	}

	if (nr_reclaimable + global_page_state(NR_WRITEBACK)
		<= dirty_thresh && dirty_exceeded)
			dirty_exceeded = 0;

	if (writeback_in_progress(bdi))
		return;		/* pdflush is already working this queue */

	/*
	 * In laptop mode, we wait until hitting the higher threshold before
	 * starting background writeout, and then write out all the way down
	 * to the lower threshold.  So slow writers cause minimal disk activity.
	 *
	 * In normal mode, we start background writeout at the lower
	 * background_thresh, to keep the amount of dirty memory low.
	 */
	if ((laptop_mode && pages_written) ||
	     (!laptop_mode && (nr_reclaimable > background_thresh)))
		pdflush_operation(background_writeout, 0);
}

void set_page_dirty_balance(struct page *page)
{
	if (set_page_dirty(page)) {
		struct address_space *mapping = page_mapping(page);

		if (mapping)
			balance_dirty_pages_ratelimited(mapping);
	}
}

/**
 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
 * @mapping: address_space which was dirtied
 * @nr_pages_dirtied: number of pages which the caller has just dirtied
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * On really big machines, get_writeback_state is expensive, so try to avoid
 * calling it too often (ratelimiting).  But once we're over the dirty memory
 * limit we decrease the ratelimiting by a lot, to prevent individual processes
 * from overshooting the limit by (ratelimit_pages) each.
 */
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
					unsigned long nr_pages_dirtied)
{
	static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
	unsigned long ratelimit;
	unsigned long *p;

	ratelimit = ratelimit_pages;
	if (dirty_exceeded)
		ratelimit = 8;

	/*
	 * Check the rate limiting. Also, we do not want to throttle real-time
	 * tasks in balance_dirty_pages(). Period.
	 */
	preempt_disable();
	p =  &__get_cpu_var(ratelimits);
	*p += nr_pages_dirtied;
	if (unlikely(*p >= ratelimit)) {
		*p = 0;
		preempt_enable();
		balance_dirty_pages(mapping);
		return;
	}
	preempt_enable();
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);

void throttle_vm_writeout(void)
{
	long background_thresh;
	long dirty_thresh;

        for ( ; ; ) {
		get_dirty_limits(&background_thresh, &dirty_thresh, NULL);

                /*
                 * Boost the allowable dirty threshold a bit for page
                 * allocators so they don't get DoS'ed by heavy writers
                 */
                dirty_thresh += dirty_thresh / 10;      /* wheeee... */

                if (global_page_state(NR_UNSTABLE_NFS) +
			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                        	break;
                congestion_wait(WRITE, HZ/10);
        }
}


/*
 * writeback at least _min_pages, and keep writing until the amount of dirty
 * memory is less than the background threshold, or until we're all clean.
 */
static void background_writeout(unsigned long _min_pages)
{
	long min_pages = _min_pages;
	struct writeback_control wbc = {
		.bdi		= NULL,
		.sync_mode	= WB_SYNC_NONE,
		.older_than_this = NULL,
		.nr_to_write	= 0,
		.nonblocking	= 1,
		.range_cyclic	= 1,
	};

	for ( ; ; ) {
		long background_thresh;
		long dirty_thresh;

		get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
		if (global_page_state(NR_FILE_DIRTY) +
			global_page_state(NR_UNSTABLE_NFS) < background_thresh
				&& min_pages <= 0)
			break;
		wbc.encountered_congestion = 0;
		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
		wbc.pages_skipped = 0;
		writeback_inodes(&wbc);
		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
			/* Wrote less than expected */
			congestion_wait(WRITE, HZ/10);
			if (!wbc.encountered_congestion)
				break;
		}
	}
}

/*
 * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
 * the whole world.  Returns 0 if a pdflush thread was dispatched.  Returns
 * -1 if all pdflush threads were busy.
 */
int wakeup_pdflush(long nr_pages)
{
	if (nr_pages == 0)
		nr_pages = global_page_state(NR_FILE_DIRTY) +
				global_page_state(NR_UNSTABLE_NFS);
	return pdflush_operation(background_writeout, nr_pages);
}

static void wb_timer_fn(unsigned long unused);
static void laptop_timer_fn(unsigned long unused);

static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);

/*
 * Periodic writeback of "old" data.
 *
 * Define "old": the first time one of an inode's pages is dirtied, we mark the
 * dirtying-time in the inode's address_space.  So this periodic writeback code
 * just walks the superblock inode list, writing back any inodes which are
 * older than a specific point in time.
 *
 * Try to run once per dirty_writeback_interval.  But if a writeback event
 * takes longer than a dirty_writeback_interval interval, then leave a
 * one-second gap.
 *
 * older_than_this takes precedence over nr_to_write.  So we'll only write back
 * all dirty pages if they are all attached to "old" mappings.
 */
static void wb_kupdate(unsigned long arg)
{
	unsigned long oldest_jif;
	unsigned long start_jif;
	unsigned long next_jif;
	long nr_to_write;
	struct writeback_control wbc = {
		.bdi		= NULL,
		.sync_mode	= WB_SYNC_NONE,
		.older_than_this = &oldest_jif,
		.nr_to_write	= 0,
		.nonblocking	= 1,
		.for_kupdate	= 1,
		.range_cyclic	= 1,
	};

	sync_supers();

	oldest_jif = jiffies - dirty_expire_interval;
	start_jif = jiffies;
	next_jif = start_jif + dirty_writeback_interval;
	nr_to_write = global_page_state(NR_FILE_DIRTY) +
			global_page_state(NR_UNSTABLE_NFS) +
			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
	while (nr_to_write > 0) {
		wbc.encountered_congestion = 0;
		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
		writeback_inodes(&wbc);
		if (wbc.nr_to_write > 0) {
			if (wbc.encountered_congestion)
				congestion_wait(WRITE, HZ/10);
			else
				break;	/* All the old data is written */
		}
		nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
	}
	if (time_before(next_jif, jiffies + HZ))
		next_jif = jiffies + HZ;
	if (dirty_writeback_interval)
		mod_timer(&wb_timer, next_jif);
}

/*
 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
 */
int dirty_writeback_centisecs_handler(ctl_table *table, int write,
		struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
	proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos);
	if (dirty_writeback_interval) {
		mod_timer(&wb_timer,
			jiffies + dirty_writeback_interval);
		} else {
		del_timer(&wb_timer);
	}
	return 0;
}

static void wb_timer_fn(unsigned long unused)
{
	if (pdflush_operation(wb_kupdate, 0) < 0)
		mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
}

static void laptop_flush(unsigned long unused)
{
	sys_sync();
}

static void laptop_timer_fn(unsigned long unused)
{
	pdflush_operation(laptop_flush, 0);
}

/*
 * We've spun up the disk and we're in laptop mode: schedule writeback
 * of all dirty data a few seconds from now.  If the flush is already scheduled
 * then push it back - the user is still using the disk.
 */
void laptop_io_completion(void)
{
	mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode);
}

/*
 * We're in laptop mode and we've just synced. The sync's writes will have
 * caused another writeback to be scheduled by laptop_io_completion.
 * Nothing needs to be written back anymore, so we unschedule the writeback.
 */
void laptop_sync_completion(void)
{
	del_timer(&laptop_mode_wb_timer);
}

/*
 * If ratelimit_pages is too high then we can get into dirty-data overload
 * if a large number of processes all perform writes at the same time.
 * If it is too low then SMP machines will call the (expensive)
 * get_writeback_state too often.
 *
 * Here we set ratelimit_pages to a level which ensures that when all CPUs are
 * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
 * thresholds before writeback cuts in.
 *
 * But the limit should not be set too high.  Because it also controls the
 * amount of memory which the balance_dirty_pages() caller has to write back.
 * If this is too large then the caller will block on the IO queue all the
 * time.  So limit it to four megabytes - the balance_dirty_pages() caller
 * will write six megabyte chunks, max.
 */

void writeback_set_ratelimit(void)
{
	ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
	if (ratelimit_pages < 16)
		ratelimit_pages = 16;
	if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
		ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
}

static int __cpuinit
ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
{
	writeback_set_ratelimit();
	return 0;
}

static struct notifier_block __cpuinitdata ratelimit_nb = {
	.notifier_call	= ratelimit_handler,
	.next		= NULL,
};

/*
 * Called early on to tune the page writeback dirty limits.
 *
 * We used to scale dirty pages according to how total memory
 * related to pages that could be allocated for buffers (by
 * comparing nr_free_buffer_pages() to vm_total_pages.
 *
 * However, that was when we used "dirty_ratio" to scale with
 * all memory, and we don't do that any more. "dirty_ratio"
 * is now applied to total non-HIGHPAGE memory (by subtracting
 * totalhigh_pages from vm_total_pages), and as such we can't
 * get into the old insane situation any more where we had
 * large amounts of dirty pages compared to a small amount of
 * non-HIGHMEM memory.
 *
 * But we might still want to scale the dirty_ratio by how
 * much memory the box has..
 */
void __init page_writeback_init(void)
{
	mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
	writeback_set_ratelimit();
	register_cpu_notifier(&ratelimit_nb);
}

/**
 * generic_writepages - walk the list of dirty pages of the given
 *                      address space and writepage() all of them.
 *
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 *
 * This is a library function, which implements the writepages()
 * address_space_operation.
 *
 * If a page is already under I/O, generic_writepages() skips it, even
 * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
 * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
 * and msync() need to guarantee that all the data which was dirty at the time
 * the call was made get new I/O started against them.  If wbc->sync_mode is
 * WB_SYNC_ALL then we were called for data integrity and we must wait for
 * existing IO to complete.
 *
 * Derived from mpage_writepages() - if you fix this you should check that
 * also!
 */
int generic_writepages(struct address_space *mapping,
		       struct writeback_control *wbc)
{
	struct backing_dev_info *bdi = mapping->backing_dev_info;
	int ret = 0;
	int done = 0;
	int (*writepage)(struct page *page, struct writeback_control *wbc);
	struct pagevec pvec;
	int nr_pages;
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	int scanned = 0;
	int range_whole = 0;

	if (wbc->nonblocking && bdi_write_congested(bdi)) {
		wbc->encountered_congestion = 1;
		return 0;
	}

	writepage = mapping->a_ops->writepage;

	/* deal with chardevs and other special file */
	if (!writepage)
		return 0;

	pagevec_init(&pvec, 0);
	if (wbc->range_cyclic) {
		index = mapping->writeback_index; /* Start from prev offset */
		end = -1;
	} else {
		index = wbc->range_start >> PAGE_CACHE_SHIFT;
		end = wbc->range_end >> PAGE_CACHE_SHIFT;
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		scanned = 1;
	}
retry:
	while (!done && (index <= end) &&
	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
					      PAGECACHE_TAG_DIRTY,
					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
		unsigned i;

		scanned = 1;
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			/*
			 * At this point we hold neither mapping->tree_lock nor
			 * lock on the page itself: the page may be truncated or
			 * invalidated (changing page->mapping to NULL), or even
			 * swizzled back from swapper_space to tmpfs file
			 * mapping
			 */
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
				unlock_page(page);
				continue;
			}

			if (!wbc->range_cyclic && page->index > end) {
				done = 1;
				unlock_page(page);
				continue;
			}

			if (wbc->sync_mode != WB_SYNC_NONE)
				wait_on_page_writeback(page);

			if (PageWriteback(page) ||
			    !clear_page_dirty_for_io(page)) {
				unlock_page(page);
				continue;
			}

			ret = (*writepage)(page, wbc);
			if (ret) {
				if (ret == -ENOSPC)
					set_bit(AS_ENOSPC, &mapping->flags);
				else
					set_bit(AS_EIO, &mapping->flags);
			}

			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
				unlock_page(page);
			if (ret || (--(wbc->nr_to_write) <= 0))
				done = 1;
			if (wbc->nonblocking && bdi_write_congested(bdi)) {
				wbc->encountered_congestion = 1;
				done = 1;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	if (!scanned && !done) {
		/*
		 * We hit the last page and there is more work to be done: wrap
		 * back to the start of the file
		 */
		scanned = 1;
		index = 0;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = index;
	return ret;
}

EXPORT_SYMBOL(generic_writepages);

int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
	int ret;

	if (wbc->nr_to_write <= 0)
		return 0;
	wbc->for_writepages = 1;
	if (mapping->a_ops->writepages)
		ret = mapping->a_ops->writepages(mapping, wbc);
	else
		ret = generic_writepages(mapping, wbc);
	wbc->for_writepages = 0;
	return ret;
}

/**
 * write_one_page - write out a single page and optionally wait on I/O
 *
 * @page: the page to write
 * @wait: if true, wait on writeout
 *
 * The page must be locked by the caller and will be unlocked upon return.
 *
 * write_one_page() returns a negative error code if I/O failed.
 */
int write_one_page(struct page *page, int wait)
{
	struct address_space *mapping = page->mapping;
	int ret = 0;
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = 1,
	};

	BUG_ON(!PageLocked(page));

	if (wait)
		wait_on_page_writeback(page);

	if (clear_page_dirty_for_io(page)) {
		page_cache_get(page);
		ret = mapping->a_ops->writepage(page, &wbc);
		if (ret == 0 && wait) {
			wait_on_page_writeback(page);
			if (PageError(page))
				ret = -EIO;
		}
		page_cache_release(page);
	} else {
		unlock_page(page);
	}
	return ret;
}
EXPORT_SYMBOL(write_one_page);

/*
 * For address_spaces which do not use buffers.  Just tag the page as dirty in
 * its radix tree.
 *
 * This is also used when a single buffer is being dirtied: we want to set the
 * page dirty in that case, but not all the buffers.  This is a "bottom-up"
 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
 *
 * Most callers have locked the page, which pins the address_space in memory.
 * But zap_pte_range() does not lock the page, however in that case the
 * mapping is pinned by the vma's ->vm_file reference.
 *
 * We take care to handle the case where the page was truncated from the
 * mapping by re-checking page_mapping() insode tree_lock.
 */
int __set_page_dirty_nobuffers(struct page *page)
{
	if (!TestSetPageDirty(page)) {
		struct address_space *mapping = page_mapping(page);
		struct address_space *mapping2;

		if (!mapping)
			return 1;

		write_lock_irq(&mapping->tree_lock);
		mapping2 = page_mapping(page);
		if (mapping2) { /* Race with truncate? */
			BUG_ON(mapping2 != mapping);
			if (mapping_cap_account_dirty(mapping)) {
				__inc_zone_page_state(page, NR_FILE_DIRTY);
				task_io_account_write(PAGE_CACHE_SIZE);
			}
			radix_tree_tag_set(&mapping->page_tree,
				page_index(page), PAGECACHE_TAG_DIRTY);
		}
		write_unlock_irq(&mapping->tree_lock);
		if (mapping->host) {
			/* !PageAnon && !swapper_space */
			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
		}
		return 1;
	}
	return 0;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

/*
 * When a writepage implementation decides that it doesn't want to write this
 * page for some reason, it should redirty the locked page via
 * redirty_page_for_writepage() and it should then unlock the page and return 0
 */
int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
{
	wbc->pages_skipped++;
	return __set_page_dirty_nobuffers(page);
}
EXPORT_SYMBOL(redirty_page_for_writepage);

/*
 * If the mapping doesn't provide a set_page_dirty a_op, then
 * just fall through and assume that it wants buffer_heads.
 */
int fastcall set_page_dirty(struct page *page)
{
	struct address_space *mapping = page_mapping(page);

	if (likely(mapping)) {
		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
#ifdef CONFIG_BLOCK
		if (!spd)
			spd = __set_page_dirty_buffers;
#endif
		return (*spd)(page);
	}
	if (!PageDirty(page)) {
		if (!TestSetPageDirty(page))
			return 1;
	}
	return 0;
}
EXPORT_SYMBOL(set_page_dirty);

/*
 * set_page_dirty() is racy if the caller has no reference against
 * page->mapping->host, and if the page is unlocked.  This is because another
 * CPU could truncate the page off the mapping and then free the mapping.
 *
 * Usually, the page _is_ locked, or the caller is a user-space process which
 * holds a reference on the inode by having an open file.
 *
 * In other cases, the page should be locked before running set_page_dirty().
 */
int set_page_dirty_lock(struct page *page)
{
	int ret;

	lock_page_nosync(page);
	ret = set_page_dirty(page);
	unlock_page(page);
	return ret;
}
EXPORT_SYMBOL(set_page_dirty_lock);

/*
 * Clear a page's dirty flag, while caring for dirty memory accounting.
 * Returns true if the page was previously dirty.
 *
 * This is for preparing to put the page under writeout.  We leave the page
 * tagged as dirty in the radix tree so that a concurrent write-for-sync
 * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
 * implementation will run either set_page_writeback() or set_page_dirty(),
 * at which stage we bring the page's dirty flag and radix-tree dirty tag
 * back into sync.
 *
 * This incoherency between the page's dirty flag and radix-tree tag is
 * unfortunate, but it only exists while the page is locked.
 */
int clear_page_dirty_for_io(struct page *page)
{
	struct address_space *mapping = page_mapping(page);

	if (mapping && mapping_cap_account_dirty(mapping)) {
		/*
		 * Yes, Virginia, this is indeed insane.
		 *
		 * We use this sequence to make sure that
		 *  (a) we account for dirty stats properly
		 *  (b) we tell the low-level filesystem to
		 *      mark the whole page dirty if it was
		 *      dirty in a pagetable. Only to then
		 *  (c) clean the page again and return 1 to
		 *      cause the writeback.
		 *
		 * This way we avoid all nasty races with the
		 * dirty bit in multiple places and clearing
		 * them concurrently from different threads.
		 *
		 * Note! Normally the "set_page_dirty(page)"
		 * has no effect on the actual dirty bit - since
		 * that will already usually be set. But we
		 * need the side effects, and it can help us
		 * avoid races.
		 *
		 * We basically use the page "master dirty bit"
		 * as a serialization point for all the different
		 * threads doing their things.
		 *
		 * FIXME! We still have a race here: if somebody
		 * adds the page back to the page tables in
		 * between the "page_mkclean()" and the "TestClearPageDirty()",
		 * we might have it mapped without the dirty bit set.
		 */
		if (page_mkclean(page))
			set_page_dirty(page);
		if (TestClearPageDirty(page)) {
			dec_zone_page_state(page, NR_FILE_DIRTY);
			return 1;
		}
		return 0;
	}
	return TestClearPageDirty(page);
}
EXPORT_SYMBOL(clear_page_dirty_for_io);

int test_clear_page_writeback(struct page *page)
{
	struct address_space *mapping = page_mapping(page);
	int ret;

	if (mapping) {
		unsigned long flags;

		write_lock_irqsave(&mapping->tree_lock, flags);
		ret = TestClearPageWriteback(page);
		if (ret)
			radix_tree_tag_clear(&mapping->page_tree,
						page_index(page),
						PAGECACHE_TAG_WRITEBACK);
		write_unlock_irqrestore(&mapping->tree_lock, flags);
	} else {
		ret = TestClearPageWriteback(page);
	}
	return ret;
}

int test_set_page_writeback(struct page *page)
{
	struct address_space *mapping = page_mapping(page);
	int ret;

	if (mapping) {
		unsigned long flags;

		write_lock_irqsave(&mapping->tree_lock, flags);
		ret = TestSetPageWriteback(page);
		if (!ret)
			radix_tree_tag_set(&mapping->page_tree,
						page_index(page),
						PAGECACHE_TAG_WRITEBACK);
		if (!PageDirty(page))
			radix_tree_tag_clear(&mapping->page_tree,
						page_index(page),
						PAGECACHE_TAG_DIRTY);
		write_unlock_irqrestore(&mapping->tree_lock, flags);
	} else {
		ret = TestSetPageWriteback(page);
	}
	return ret;

}
EXPORT_SYMBOL(test_set_page_writeback);

/*
 * Return true if any of the pages in the mapping are marged with the
 * passed tag.
 */
int mapping_tagged(struct address_space *mapping, int tag)
{
	unsigned long flags;
	int ret;

	read_lock_irqsave(&mapping->tree_lock, flags);
	ret = radix_tree_tagged(&mapping->page_tree, tag);
	read_unlock_irqrestore(&mapping->tree_lock, flags);
	return ret;
}
EXPORT_SYMBOL(mapping_tagged);
mp;np->np_thread_lock); np->np_exports--; if (np->np_exports) { spin_unlock_bh(&np->np_thread_lock); return 0; } np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN; spin_unlock_bh(&np->np_thread_lock); if (np->np_thread) { /* * We need to send the signal to wakeup Linux/Net * which may be sleeping in sock_accept().. */ send_sig(SIGINT, np->np_thread, 1); kthread_stop(np->np_thread); } iscsit_del_np_comm(np); spin_lock_bh(&np_lock); list_del(&np->np_list); spin_unlock_bh(&np_lock); pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); kfree(np); return 0; } static int __init iscsi_target_init_module(void) { int ret = 0; pr_debug("iSCSI-Target "ISCSIT_VERSION"\n"); iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL); if (!iscsit_global) { pr_err("Unable to allocate memory for iscsit_global\n"); return -1; } mutex_init(&auth_id_lock); spin_lock_init(&sess_idr_lock); idr_init(&tiqn_idr); idr_init(&sess_idr); ret = iscsi_target_register_configfs(); if (ret < 0) goto out; ret = iscsi_thread_set_init(); if (ret < 0) goto configfs_out; if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) != TARGET_THREAD_SET_COUNT) { pr_err("iscsi_allocate_thread_sets() returned" " unexpected value!\n"); goto ts_out1; } lio_cmd_cache = kmem_cache_create("lio_cmd_cache", sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), 0, NULL); if (!lio_cmd_cache) { pr_err("Unable to kmem_cache_create() for" " lio_cmd_cache\n"); goto ts_out2; } lio_qr_cache = kmem_cache_create("lio_qr_cache", sizeof(struct iscsi_queue_req), __alignof__(struct iscsi_queue_req), 0, NULL); if (!lio_qr_cache) { pr_err("nable to kmem_cache_create() for" " lio_qr_cache\n"); goto cmd_out; } lio_dr_cache = kmem_cache_create("lio_dr_cache", sizeof(struct iscsi_datain_req), __alignof__(struct iscsi_datain_req), 0, NULL); if (!lio_dr_cache) { pr_err("Unable to kmem_cache_create() for" " lio_dr_cache\n"); goto qr_out; } lio_ooo_cache = kmem_cache_create("lio_ooo_cache", sizeof(struct iscsi_ooo_cmdsn), __alignof__(struct iscsi_ooo_cmdsn), 0, NULL); if (!lio_ooo_cache) { pr_err("Unable to kmem_cache_create() for" " lio_ooo_cache\n"); goto dr_out; } lio_r2t_cache = kmem_cache_create("lio_r2t_cache", sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t), 0, NULL); if (!lio_r2t_cache) { pr_err("Unable to kmem_cache_create() for" " lio_r2t_cache\n"); goto ooo_out; } if (iscsit_load_discovery_tpg() < 0) goto r2t_out; return ret; r2t_out: kmem_cache_destroy(lio_r2t_cache); ooo_out: kmem_cache_destroy(lio_ooo_cache); dr_out: kmem_cache_destroy(lio_dr_cache); qr_out: kmem_cache_destroy(lio_qr_cache); cmd_out: kmem_cache_destroy(lio_cmd_cache); ts_out2: iscsi_deallocate_thread_sets(); ts_out1: iscsi_thread_set_free(); configfs_out: iscsi_target_deregister_configfs(); out: kfree(iscsit_global); return -ENOMEM; } static void __exit iscsi_target_cleanup_module(void) { iscsi_deallocate_thread_sets(); iscsi_thread_set_free(); iscsit_release_discovery_tpg(); kmem_cache_destroy(lio_cmd_cache); kmem_cache_destroy(lio_qr_cache); kmem_cache_destroy(lio_dr_cache); kmem_cache_destroy(lio_ooo_cache); kmem_cache_destroy(lio_r2t_cache); iscsi_target_deregister_configfs(); kfree(iscsit_global); } int iscsit_add_reject( u8 reason, int fail_conn, unsigned char *buf, struct iscsi_conn *conn) { struct iscsi_cmd *cmd; struct iscsi_reject *hdr; int ret; cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return -1; cmd->iscsi_opcode = ISCSI_OP_REJECT; if (fail_conn) cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; hdr = (struct iscsi_reject *) cmd->pdu; hdr->reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { pr_err("Unable to allocate memory for cmd->buf_ptr\n"); iscsit_release_cmd(cmd); return -1; } spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); ret = wait_for_completion_interruptible(&cmd->reject_comp); if (ret != 0) return -1; return (!fail_conn) ? 0 : -1; } int iscsit_add_reject_from_cmd( u8 reason, int fail_conn, int add_to_conn, unsigned char *buf, struct iscsi_cmd *cmd) { struct iscsi_conn *conn; struct iscsi_reject *hdr; int ret; if (!cmd->conn) { pr_err("cmd->conn is NULL for ITT: 0x%08x\n", cmd->init_task_tag); return -1; } conn = cmd->conn; cmd->iscsi_opcode = ISCSI_OP_REJECT; if (fail_conn) cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; hdr = (struct iscsi_reject *) cmd->pdu; hdr->reason = reason; cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL); if (!cmd->buf_ptr) { pr_err("Unable to allocate memory for cmd->buf_ptr\n"); iscsit_release_cmd(cmd); return -1; } if (add_to_conn) { spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); } cmd->i_state = ISTATE_SEND_REJECT; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); ret = wait_for_completion_interruptible(&cmd->reject_comp); if (ret != 0) return -1; return (!fail_conn) ? 0 : -1; } /* * Map some portion of the allocated scatterlist to an iovec, suitable for * kernel sockets to copy data in/out. This handles both pages and slab-allocated * buffers, since we have been tricky and mapped t_mem_sg to the buffer in * either case (see iscsit_alloc_buffs) */ static int iscsit_map_iovec( struct iscsi_cmd *cmd, struct kvec *iov, u32 data_offset, u32 data_length) { u32 i = 0; struct scatterlist *sg; unsigned int page_off; /* * We have a private mapping of the allocated pages in t_mem_sg. * At this point, we also know each contains a page. */ sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE]; page_off = (data_offset % PAGE_SIZE); cmd->first_data_sg = sg; cmd->first_data_sg_off = page_off; while (data_length) { u32 cur_len = min_t(u32, data_length, sg->length - page_off); iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off; iov[i].iov_len = cur_len; data_length -= cur_len; page_off = 0; sg = sg_next(sg); i++; } cmd->kmapped_nents = i; return i; } static void iscsit_unmap_iovec(struct iscsi_cmd *cmd) { u32 i; struct scatterlist *sg; sg = cmd->first_data_sg; for (i = 0; i < cmd->kmapped_nents; i++) kunmap(sg_page(&sg[i])); } static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) { struct iscsi_cmd *cmd; conn->exp_statsn = exp_statsn; spin_lock_bh(&conn->cmd_lock); list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { spin_lock(&cmd->istate_lock); if ((cmd->i_state == ISTATE_SENT_STATUS) && (cmd->stat_sn < exp_statsn)) { cmd->i_state = ISTATE_REMOVE; spin_unlock(&cmd->istate_lock); iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); continue; } spin_unlock(&cmd->istate_lock); } spin_unlock_bh(&conn->cmd_lock); } static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd) { u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 : cmd->se_cmd.t_data_nents; iov_count += ISCSI_IOV_DATA_BUFFER; cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL); if (!cmd->iov_data) { pr_err("Unable to allocate cmd->iov_data\n"); return -ENOMEM; } cmd->orig_iov_data_count = iov_count; return 0; } static int iscsit_alloc_buffs(struct iscsi_cmd *cmd) { struct scatterlist *sgl; u32 length = cmd->se_cmd.data_length; int nents = DIV_ROUND_UP(length, PAGE_SIZE); int i = 0, ret; /* * If no SCSI payload is present, allocate the default iovecs used for * iSCSI PDU Header */ if (!length) return iscsit_allocate_iovecs(cmd); sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL); if (!sgl) return -ENOMEM; sg_init_table(sgl, nents); while (length) { int buf_size = min_t(int, length, PAGE_SIZE); struct page *page; page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto page_alloc_failed; sg_set_page(&sgl[i], page, buf_size, 0); length -= buf_size; i++; } cmd->t_mem_sg = sgl; cmd->t_mem_sg_nents = nents; /* BIDI ops not supported */ /* Tell the core about our preallocated memory */ transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0); /* * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd * so that cmd->se_cmd.t_tasks_se_num has been set. */ ret = iscsit_allocate_iovecs(cmd); if (ret < 0) goto page_alloc_failed; return 0; page_alloc_failed: while (i >= 0) { __free_page(sg_page(&sgl[i])); i--; } kfree(cmd->t_mem_sg); cmd->t_mem_sg = NULL; return -ENOMEM; } static int iscsit_handle_scsi_cmd( struct iscsi_conn *conn, unsigned char *buf) { int data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret; int dump_immediate_data = 0, send_check_condition = 0, payload_length; struct iscsi_cmd *cmd = NULL; struct iscsi_scsi_req *hdr; spin_lock_bh(&conn->sess->session_stats_lock); conn->sess->cmd_pdus++; if (conn->sess->se_sess->se_node_acl) { spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); conn->sess->se_sess->se_node_acl->num_cmds++; spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); } spin_unlock_bh(&conn->sess->session_stats_lock); hdr = (struct iscsi_scsi_req *) buf; payload_length = ntoh24(hdr->dlength); hdr->itt = be32_to_cpu(hdr->itt); hdr->data_length = be32_to_cpu(hdr->data_length); hdr->cmdsn = be32_to_cpu(hdr->cmdsn); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); /* FIXME; Add checks for AdditionalHeaderSegment */ if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) && !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) { pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL" " not set. Bad iSCSI Initiator.\n"); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } if (((hdr->flags & ISCSI_FLAG_CMD_READ) || (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) { /* * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2) * that adds support for RESERVE/RELEASE. There is a bug * add with this new functionality that sets R/W bits when * neither CDB carries any READ or WRITE datapayloads. */ if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) { hdr->flags &= ~ISCSI_FLAG_CMD_READ; hdr->flags &= ~ISCSI_FLAG_CMD_WRITE; goto done; } pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" " set when Expected Data Transfer Length is 0 for" " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } done: if (!(hdr->flags & ISCSI_FLAG_CMD_READ) && !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) { pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE" " MUST be set if Expected Data Transfer Length is not 0." " Bad iSCSI Initiator\n"); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } if ((hdr->flags & ISCSI_FLAG_CMD_READ) && (hdr->flags & ISCSI_FLAG_CMD_WRITE)) { pr_err("Bidirectional operations not supported!\n"); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } if (hdr->opcode & ISCSI_OP_IMMEDIATE) { pr_err("Illegally set Immediate Bit in iSCSI Initiator" " Scsi Command PDU.\n"); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } if (payload_length && !conn->sess->sess_ops->ImmediateData) { pr_err("ImmediateData=No but DataSegmentLength=%u," " protocol error.\n", payload_length); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if ((hdr->data_length == payload_length) && (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) { pr_err("Expected Data Transfer Length and Length of" " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL" " bit is not set protocol error\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if (payload_length > hdr->data_length) { pr_err("DataSegmentLength: %u is greater than" " EDTL: %u, protocol error.\n", payload_length, hdr->data_length); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" " MaxRecvDataSegmentLength: %u, protocol error.\n", payload_length, conn->conn_ops->MaxRecvDataSegmentLength); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if (payload_length > conn->sess->sess_ops->FirstBurstLength) { pr_err("DataSegmentLength: %u is greater than" " FirstBurstLength: %u, protocol error.\n", payload_length, conn->sess->sess_ops->FirstBurstLength); return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, buf, conn); } data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE : DMA_NONE; cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction, (hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK)); if (!cmd) return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, buf, conn); pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x," " ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt, hdr->cmdsn, hdr->data_length, payload_length, conn->cid); cmd->iscsi_opcode = ISCSI_OP_SCSI_CMD; cmd->i_state = ISTATE_NEW_CMD; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); cmd->immediate_data = (payload_length) ? 1 : 0; cmd->unsolicited_data = ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) && (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0); if (cmd->unsolicited_data) cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA; conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; if (hdr->flags & ISCSI_FLAG_CMD_READ) { spin_lock_bh(&conn->sess->ttt_lock); cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; if (cmd->targ_xfer_tag == 0xFFFFFFFF) cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; spin_unlock_bh(&conn->sess->ttt_lock); } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = hdr->cmdsn; cmd->exp_stat_sn = hdr->exp_statsn; cmd->first_burst_len = payload_length; if (cmd->data_direction == DMA_FROM_DEVICE) { struct iscsi_datain_req *dr; dr = iscsit_allocate_datain_req(); if (!dr) return iscsit_add_reject_from_cmd( ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, 1, buf, cmd); iscsit_attach_datain_req(cmd, dr); } /* * The CDB is going to an se_device_t. */ ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb, get_unaligned_le64(&hdr->lun)); if (ret < 0) { if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) { pr_debug("Responding to non-acl'ed," " non-existent or non-exported iSCSI LUN:" " 0x%016Lx\n", get_unaligned_le64(&hdr->lun)); } send_check_condition = 1; goto attach_cmd; } /* * The Initiator Node has access to the LUN (the addressing method * is handled inside of iscsit_get_lun_for_cmd()). Now it's time to * allocate 1->N transport tasks (depending on sector count and * maximum request size the physical HBA(s) can handle. */ transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb); if (transport_ret == -ENOMEM) { return iscsit_add_reject_from_cmd( ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, 1, buf, cmd); } else if (transport_ret == -EINVAL) { /* * Unsupported SAM Opcode. CHECK_CONDITION will be sent * in iscsit_execute_cmd() during the CmdSN OOO Execution * Mechinism. */ send_check_condition = 1; } else { cmd->data_length = cmd->se_cmd.data_length; if (iscsit_decide_list_to_build(cmd, payload_length) < 0) return iscsit_add_reject_from_cmd( ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, 1, buf, cmd); } attach_cmd: spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); /* * Check if we need to delay processing because of ALUA * Active/NonOptimized primary access state.. */ core_alua_check_nonop_delay(&cmd->se_cmd); /* * Allocate and setup SGL used with transport_generic_map_mem_to_cmd(). * also call iscsit_allocate_iovecs() */ ret = iscsit_alloc_buffs(cmd); if (ret < 0) return iscsit_add_reject_from_cmd( ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, 1, buf, cmd); /* * Check the CmdSN against ExpCmdSN/MaxCmdSN here if * the Immediate Bit is not set, and no Immediate * Data is attached. * * A PDU/CmdSN carrying Immediate Data can only * be processed after the DataCRC has passed. * If the DataCRC fails, the CmdSN MUST NOT * be acknowledged. (See below) */ if (!cmd->immediate_data) { cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) return 0; else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); } iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); /* * If no Immediate Data is attached, it's OK to return now. */ if (!cmd->immediate_data) { if (send_check_condition) return 0; if (cmd->unsolicited_data) { iscsit_set_dataout_sequence_values(cmd); spin_lock_bh(&cmd->dataout_timeout_lock); iscsit_start_dataout_timer(cmd, cmd->conn); spin_unlock_bh(&cmd->dataout_timeout_lock); } return 0; } /* * Early CHECK_CONDITIONs never make it to the transport processing * thread. They are processed in CmdSN order by * iscsit_check_received_cmdsn() below. */ if (send_check_condition) { immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; dump_immediate_data = 1; goto after_immediate_data; } /* * Call directly into transport_generic_new_cmd() to perform * the backend memory allocation. */ ret = transport_generic_new_cmd(&cmd->se_cmd); if (ret < 0) { immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; dump_immediate_data = 1; goto after_immediate_data; } immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length); after_immediate_data: if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) { /* * A PDU/CmdSN carrying Immediate Data passed * DataCRC, check against ExpCmdSN/MaxCmdSN if * Immediate Bit is not set. */ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); /* * Special case for Unsupported SAM WRITE Opcodes * and ImmediateData=Yes. */ if (dump_immediate_data) { if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) return -1; } else if (cmd->unsolicited_data) { iscsit_set_dataout_sequence_values(cmd); spin_lock_bh(&cmd->dataout_timeout_lock); iscsit_start_dataout_timer(cmd, cmd->conn); spin_unlock_bh(&cmd->dataout_timeout_lock); } if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { /* * Immediate Data failed DataCRC and ERL>=1, * silently drop this PDU and let the initiator * plug the CmdSN gap. * * FIXME: Send Unsolicited NOPIN with reserved * TTT here to help the initiator figure out * the missing CmdSN, although they should be * intelligent enough to determine the missing * CmdSN and issue a retry to plug the sequence. */ cmd->i_state = ISTATE_REMOVE; iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */ return -1; return 0; } static u32 iscsit_do_crypto_hash_sg( struct hash_desc *hash, struct iscsi_cmd *cmd, u32 data_offset, u32 data_length, u32 padding, u8 *pad_bytes) { u32 data_crc; u32 i; struct scatterlist *sg; unsigned int page_off; crypto_hash_init(hash); sg = cmd->first_data_sg; page_off = cmd->first_data_sg_off; i = 0; while (data_length) { u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off)); crypto_hash_update(hash, &sg[i], cur_len); data_length -= cur_len; page_off = 0; i++; } if (padding) { struct scatterlist pad_sg; sg_init_one(&pad_sg, pad_bytes, padding); crypto_hash_update(hash, &pad_sg, padding); } crypto_hash_final(hash, (u8 *) &data_crc); return data_crc; } static void iscsit_do_crypto_hash_buf( struct hash_desc *hash, unsigned char *buf, u32 payload_length, u32 padding, u8 *pad_bytes, u8 *data_crc) { struct scatterlist sg; crypto_hash_init(hash); sg_init_one(&sg, (u8 *)buf, payload_length); crypto_hash_update(hash, &sg, payload_length); if (padding) { sg_init_one(&sg, pad_bytes, padding); crypto_hash_update(hash, &sg, padding); } crypto_hash_final(hash, data_crc); } static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) { int iov_ret, ooo_cmdsn = 0, ret; u8 data_crc_failed = 0; u32 checksum, iov_count = 0, padding = 0, rx_got = 0; u32 rx_size = 0, payload_length; struct iscsi_cmd *cmd = NULL; struct se_cmd *se_cmd; struct iscsi_data *hdr; struct kvec *iov; unsigned long flags; hdr = (struct iscsi_data *) buf; payload_length = ntoh24(hdr->dlength); hdr->itt = be32_to_cpu(hdr->itt); hdr->ttt = be32_to_cpu(hdr->ttt); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); hdr->datasn = be32_to_cpu(hdr->datasn); hdr->offset = be32_to_cpu(hdr->offset); if (!payload_length) { pr_err("DataOUT payload is ZERO, protocol error.\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } /* iSCSI write */ spin_lock_bh(&conn->sess->session_stats_lock); conn->sess->rx_data_octets += payload_length; if (conn->sess->se_sess->se_node_acl) { spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); conn->sess->se_sess->se_node_acl->write_bytes += payload_length; spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); } spin_unlock_bh(&conn->sess->session_stats_lock); if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" " MaxRecvDataSegmentLength: %u\n", payload_length, conn->conn_ops->MaxRecvDataSegmentLength); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, payload_length); if (!cmd) return 0; pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x," " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", hdr->itt, hdr->ttt, hdr->datasn, hdr->offset, payload_length, conn->cid); if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { pr_err("Command ITT: 0x%08x received DataOUT after" " last DataOUT received, dumping payload\n", cmd->init_task_tag); return iscsit_dump_data_payload(conn, payload_length, 1); } if (cmd->data_direction != DMA_TO_DEVICE) { pr_err("Command ITT: 0x%08x received DataOUT for a" " NON-WRITE command.\n", cmd->init_task_tag); return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); } se_cmd = &cmd->se_cmd; iscsit_mod_dataout_timer(cmd); if ((hdr->offset + payload_length) > cmd->data_length) { pr_err("DataOut Offset: %u, Length %u greater than" " iSCSI Command EDTL %u, protocol error.\n", hdr->offset, payload_length, cmd->data_length); return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd); } if (cmd->unsolicited_data) { int dump_unsolicited_data = 0; if (conn->sess->sess_ops->InitialR2T) { pr_err("Received unexpected unsolicited data" " while InitialR2T=Yes, protocol error.\n"); transport_send_check_condition_and_sense(&cmd->se_cmd, TCM_UNEXPECTED_UNSOLICITED_DATA, 0); return -1; } /* * Special case for dealing with Unsolicited DataOUT * and Unsupported SAM WRITE Opcodes and SE resource allocation * failures; */ /* Something's amiss if we're not in WRITE_PENDING state... */ spin_lock_irqsave(&se_cmd->t_state_lock, flags); WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING); spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); spin_lock_irqsave(&se_cmd->t_state_lock, flags); if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) || (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION)) dump_unsolicited_data = 1; spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); if (dump_unsolicited_data) { /* * Check if a delayed TASK_ABORTED status needs to * be sent now if the ISCSI_FLAG_CMD_FINAL has been * received with the unsolicitied data out. */ if (hdr->flags & ISCSI_FLAG_CMD_FINAL) iscsit_stop_dataout_timer(cmd); transport_check_aborted_status(se_cmd, (hdr->flags & ISCSI_FLAG_CMD_FINAL)); return iscsit_dump_data_payload(conn, payload_length, 1); } } else { /* * For the normal solicited data path: * * Check for a delayed TASK_ABORTED status and dump any * incoming data out payload if one exists. Also, when the * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current * data out sequence, we decrement outstanding_r2ts. Once * outstanding_r2ts reaches zero, go ahead and send the delayed * TASK_ABORTED status. */ if (atomic_read(&se_cmd->t_transport_aborted) != 0) { if (hdr->flags & ISCSI_FLAG_CMD_FINAL) if (--cmd->outstanding_r2ts < 1) { iscsit_stop_dataout_timer(cmd); transport_check_aborted_status( se_cmd, 1); } return iscsit_dump_data_payload(conn, payload_length, 1); } } /* * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and * within-command recovery checks before receiving the payload. */ ret = iscsit_check_pre_dataout(cmd, buf); if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY) return 0; else if (ret == DATAOUT_CANNOT_RECOVER) return -1; rx_size += payload_length; iov = &cmd->iov_data[0]; iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length); if (iov_ret < 0) return -1; iov_count += iov_ret; padding = ((-payload_length) & 3); if (padding != 0) { iov[iov_count].iov_base = cmd->pad_bytes; iov[iov_count++].iov_len = padding; rx_size += padding; pr_debug("Receiving %u padding bytes.\n", padding); } if (conn->conn_ops->DataDigest) { iov[iov_count].iov_base = &checksum; iov[iov_count++].iov_len = ISCSI_CRC_LEN; rx_size += ISCSI_CRC_LEN; } rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); iscsit_unmap_iovec(cmd); if (rx_got != rx_size) return -1; if (conn->conn_ops->DataDigest) { u32 data_crc; data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, hdr->offset, payload_length, padding, cmd->pad_bytes); if (checksum != data_crc) { pr_err("ITT: 0x%08x, Offset: %u, Length: %u," " DataSN: 0x%08x, CRC32C DataDigest 0x%08x" " does not match computed 0x%08x\n", hdr->itt, hdr->offset, payload_length, hdr->datasn, checksum, data_crc); data_crc_failed = 1; } else { pr_debug("Got CRC32C DataDigest 0x%08x for" " %u bytes of Data Out\n", checksum, payload_length); } } /* * Increment post receive data and CRC values or perform * within-command recovery. */ ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed); if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)) return 0; else if (ret == DATAOUT_SEND_R2T) { iscsit_set_dataout_sequence_values(cmd); iscsit_build_r2ts_for_cmd(cmd, conn, 0); } else if (ret == DATAOUT_SEND_TO_TRANSPORT) { /* * Handle extra special case for out of order * Unsolicited Data Out. */ spin_lock_bh(&cmd->istate_lock); ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; spin_unlock_bh(&cmd->istate_lock); iscsit_stop_dataout_timer(cmd); return (!ooo_cmdsn) ? transport_generic_handle_data( &cmd->se_cmd) : 0; } else /* DATAOUT_CANNOT_RECOVER */ return -1; return 0; } static int iscsit_handle_nop_out( struct iscsi_conn *conn, unsigned char *buf) { unsigned char *ping_data = NULL; int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size; u32 checksum, data_crc, padding = 0, payload_length; u64 lun; struct iscsi_cmd *cmd = NULL; struct kvec *iov = NULL; struct iscsi_nopout *hdr; hdr = (struct iscsi_nopout *) buf; payload_length = ntoh24(hdr->dlength); lun = get_unaligned_le64(&hdr->lun); hdr->itt = be32_to_cpu(hdr->itt); hdr->ttt = be32_to_cpu(hdr->ttt); hdr->cmdsn = be32_to_cpu(hdr->cmdsn); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { pr_err("NOPOUT ITT is reserved, but Immediate Bit is" " not set, protocol error.\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { pr_err("NOPOUT Ping Data DataSegmentLength: %u is" " greater than MaxRecvDataSegmentLength: %u, protocol" " error.\n", payload_length, conn->conn_ops->MaxRecvDataSegmentLength); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x," " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n", (hdr->itt == 0xFFFFFFFF) ? "Response" : "Request", hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn, payload_length); /* * This is not a response to a Unsolicited NopIN, which means * it can either be a NOPOUT ping request (with a valid ITT), * or a NOPOUT not requesting a NOPIN (with a reserved ITT). * Either way, make sure we allocate an struct iscsi_cmd, as both * can contain ping data. */ if (hdr->ttt == 0xFFFFFFFF) { cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return iscsit_add_reject( ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, buf, conn); cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT; cmd->i_state = ISTATE_SEND_NOPIN; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = hdr->cmdsn; cmd->exp_stat_sn = hdr->exp_statsn; cmd->data_direction = DMA_NONE; } if (payload_length && (hdr->ttt == 0xFFFFFFFF)) { rx_size = payload_length; ping_data = kzalloc(payload_length + 1, GFP_KERNEL); if (!ping_data) { pr_err("Unable to allocate memory for" " NOPOUT ping data.\n"); ret = -1; goto out; } iov = &cmd->iov_misc[0]; iov[niov].iov_base = ping_data; iov[niov++].iov_len = payload_length; padding = ((-payload_length) & 3); if (padding != 0) { pr_debug("Receiving %u additional bytes" " for padding.\n", padding); iov[niov].iov_base = &cmd->pad_bytes; iov[niov++].iov_len = padding; rx_size += padding; } if (conn->conn_ops->DataDigest) { iov[niov].iov_base = &checksum; iov[niov++].iov_len = ISCSI_CRC_LEN; rx_size += ISCSI_CRC_LEN; } rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size); if (rx_got != rx_size) { ret = -1; goto out; } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, ping_data, payload_length, padding, cmd->pad_bytes, (u8 *)&data_crc); if (checksum != data_crc) { pr_err("Ping data CRC32C DataDigest" " 0x%08x does not match computed 0x%08x\n", checksum, data_crc); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Unable to recover from" " NOPOUT Ping DataCRC failure while in" " ERL=0.\n"); ret = -1; goto out; } else { /* * Silently drop this PDU and let the * initiator plug the CmdSN gap. */ pr_debug("Dropping NOPOUT" " Command CmdSN: 0x%08x due to" " DataCRC error.\n", hdr->cmdsn); ret = 0; goto out; } } else { pr_debug("Got CRC32C DataDigest" " 0x%08x for %u bytes of ping data.\n", checksum, payload_length); } } ping_data[payload_length] = '\0'; /* * Attach ping data to struct iscsi_cmd->buf_ptr. */ cmd->buf_ptr = (void *)ping_data; cmd->buf_ptr_size = payload_length; pr_debug("Got %u bytes of NOPOUT ping" " data.\n", payload_length); pr_debug("Ping Data: \"%s\"\n", ping_data); } if (hdr->itt != 0xFFFFFFFF) { if (!cmd) { pr_err("Checking CmdSN for NOPOUT," " but cmd is NULL!\n"); return -1; } /* * Initiator is expecting a NopIN ping reply, */ spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); if (hdr->opcode & ISCSI_OP_IMMEDIATE) { iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { ret = 0; goto ping_out; } if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); return 0; } if (hdr->ttt != 0xFFFFFFFF) { /* * This was a response to a unsolicited NOPIN ping. */ cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt); if (!cmd) return -1; iscsit_stop_nopin_response_timer(conn); cmd->i_state = ISTATE_REMOVE; iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); iscsit_start_nopin_timer(conn); } else { /* * Initiator is not expecting a NOPIN is response. * Just ignore for now. * * iSCSI v19-91 10.18 * "A NOP-OUT may also be used to confirm a changed * ExpStatSN if another PDU will not be available * for a long time." */ ret = 0; goto out; } return 0; out: if (cmd) iscsit_release_cmd(cmd); ping_out: kfree(ping_data); return ret; } static int iscsit_handle_task_mgt_cmd( struct iscsi_conn *conn, unsigned char *buf) { struct iscsi_cmd *cmd; struct se_tmr_req *se_tmr; struct iscsi_tmr_req *tmr_req; struct iscsi_tm *hdr; u32 payload_length; int out_of_order_cmdsn = 0; int ret; u8 function; hdr = (struct iscsi_tm *) buf; payload_length = ntoh24(hdr->dlength); hdr->itt = be32_to_cpu(hdr->itt); hdr->rtt = be32_to_cpu(hdr->rtt); hdr->cmdsn = be32_to_cpu(hdr->cmdsn); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); hdr->refcmdsn = be32_to_cpu(hdr->refcmdsn); hdr->exp_datasn = be32_to_cpu(hdr->exp_datasn); hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; function = hdr->flags; pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:" " 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:" " 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function, hdr->rtt, hdr->refcmdsn, conn->cid); if ((function != ISCSI_TM_FUNC_ABORT_TASK) && ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && (hdr->rtt != ISCSI_RESERVED_TAG))) { pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n"); hdr->rtt = ISCSI_RESERVED_TAG; } if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { pr_err("Task Management Request TASK_REASSIGN not" " issued as immediate command, bad iSCSI Initiator" "implementation\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } if ((function != ISCSI_TM_FUNC_ABORT_TASK) && (hdr->refcmdsn != ISCSI_RESERVED_TAG)) hdr->refcmdsn = ISCSI_RESERVED_TAG; cmd = iscsit_allocate_se_cmd_for_tmr(conn, function); if (!cmd) return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, buf, conn); cmd->iscsi_opcode = ISCSI_OP_SCSI_TMFUNC; cmd->i_state = ISTATE_SEND_TASKMGTRSP; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); cmd->init_task_tag = hdr->itt; cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = hdr->cmdsn; cmd->exp_stat_sn = hdr->exp_statsn; se_tmr = cmd->se_cmd.se_tmr_req; tmr_req = cmd->tmr_req; /* * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN */ if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { ret = iscsit_get_lun_for_tmr(cmd, get_unaligned_le64(&hdr->lun)); if (ret < 0) { cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; se_tmr->response = ISCSI_TMF_RSP_NO_LUN; goto attach; } } switch (function) { case ISCSI_TM_FUNC_ABORT_TASK: se_tmr->response = iscsit_tmr_abort_task(cmd, buf); if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) { cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; goto attach; } break; case ISCSI_TM_FUNC_ABORT_TASK_SET: case ISCSI_TM_FUNC_CLEAR_ACA: case ISCSI_TM_FUNC_CLEAR_TASK_SET: case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: break; case ISCSI_TM_FUNC_TARGET_WARM_RESET: if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) { cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; goto attach; } break; case ISCSI_TM_FUNC_TARGET_COLD_RESET: if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) { cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; goto attach; } break; case ISCSI_TM_FUNC_TASK_REASSIGN: se_tmr->response = iscsit_tmr_task_reassign(cmd, buf); /* * Perform sanity checks on the ExpDataSN only if the * TASK_REASSIGN was successful. */ if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) break; if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0) return iscsit_add_reject_from_cmd( ISCSI_REASON_BOOKMARK_INVALID, 1, 1, buf, cmd); break; default: pr_err("Unknown TMR function: 0x%02x, protocol" " error.\n", function); cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED; goto attach; } if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && (se_tmr->response == ISCSI_TMF_RSP_COMPLETE)) se_tmr->call_transport = 1; attach: spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) out_of_order_cmdsn = 1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) return 0; else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); } iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); if (out_of_order_cmdsn || !(hdr->opcode & ISCSI_OP_IMMEDIATE)) return 0; /* * Found the referenced task, send to transport for processing. */ if (se_tmr->call_transport) return transport_generic_handle_tmr(&cmd->se_cmd); /* * Could not find the referenced LUN, task, or Task Management * command not authorized or supported. Change state and * let the tx_thread send the response. * * For connection recovery, this is also the default action for * TMR TASK_REASSIGN. */ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } /* #warning FIXME: Support Text Command parameters besides SendTargets */ static int iscsit_handle_text_cmd( struct iscsi_conn *conn, unsigned char *buf) { char *text_ptr, *text_in; int cmdsn_ret, niov = 0, rx_got, rx_size; u32 checksum = 0, data_crc = 0, payload_length; u32 padding = 0, pad_bytes = 0, text_length = 0; struct iscsi_cmd *cmd; struct kvec iov[3]; struct iscsi_text *hdr; hdr = (struct iscsi_text *) buf; payload_length = ntoh24(hdr->dlength); hdr->itt = be32_to_cpu(hdr->itt); hdr->ttt = be32_to_cpu(hdr->ttt); hdr->cmdsn = be32_to_cpu(hdr->cmdsn); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { pr_err("Unable to accept text parameter length: %u" "greater than MaxRecvDataSegmentLength %u.\n", payload_length, conn->conn_ops->MaxRecvDataSegmentLength); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, hdr->exp_statsn, payload_length); rx_size = text_length = payload_length; if (text_length) { text_in = kzalloc(text_length, GFP_KERNEL); if (!text_in) { pr_err("Unable to allocate memory for" " incoming text parameters\n"); return -1; } memset(iov, 0, 3 * sizeof(struct kvec)); iov[niov].iov_base = text_in; iov[niov++].iov_len = text_length; padding = ((-payload_length) & 3); if (padding != 0) { iov[niov].iov_base = &pad_bytes; iov[niov++].iov_len = padding; rx_size += padding; pr_debug("Receiving %u additional bytes" " for padding.\n", padding); } if (conn->conn_ops->DataDigest) { iov[niov].iov_base = &checksum; iov[niov++].iov_len = ISCSI_CRC_LEN; rx_size += ISCSI_CRC_LEN; } rx_got = rx_data(conn, &iov[0], niov, rx_size); if (rx_got != rx_size) { kfree(text_in); return -1; } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, text_in, text_length, padding, (u8 *)&pad_bytes, (u8 *)&data_crc); if (checksum != data_crc) { pr_err("Text data CRC32C DataDigest" " 0x%08x does not match computed" " 0x%08x\n", checksum, data_crc); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Unable to recover from" " Text Data digest failure while in" " ERL=0.\n"); kfree(text_in); return -1; } else { /* * Silently drop this PDU and let the * initiator plug the CmdSN gap. */ pr_debug("Dropping Text" " Command CmdSN: 0x%08x due to" " DataCRC error.\n", hdr->cmdsn); kfree(text_in); return 0; } } else { pr_debug("Got CRC32C DataDigest" " 0x%08x for %u bytes of text data.\n", checksum, text_length); } } text_in[text_length - 1] = '\0'; pr_debug("Successfully read %d bytes of text" " data.\n", text_length); if (strncmp("SendTargets", text_in, 11) != 0) { pr_err("Received Text Data that is not" " SendTargets, cannot continue.\n"); kfree(text_in); return -1; } text_ptr = strchr(text_in, '='); if (!text_ptr) { pr_err("No \"=\" separator found in Text Data," " cannot continue.\n"); kfree(text_in); return -1; } if (strncmp("=All", text_ptr, 4) != 0) { pr_err("Unable to locate All value for" " SendTargets key, cannot continue.\n"); kfree(text_in); return -1; } /*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */ kfree(text_in); } cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, buf, conn); cmd->iscsi_opcode = ISCSI_OP_TEXT; cmd->i_state = ISTATE_SEND_TEXTRSP; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = hdr->cmdsn; cmd->exp_stat_sn = hdr->exp_statsn; cmd->data_direction = DMA_NONE; spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); return 0; } return iscsit_execute_cmd(cmd, 0); } int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_conn *conn_p; struct iscsi_session *sess = conn->sess; pr_debug("Received logout request CLOSESESSION on CID: %hu" " for SID: %u.\n", conn->cid, conn->sess->sid); atomic_set(&sess->session_logout, 1); atomic_set(&conn->conn_logout_remove, 1); conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION; iscsit_inc_conn_usage_count(conn); iscsit_inc_session_usage_count(sess); spin_lock_bh(&sess->conn_lock); list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) { if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN) continue; pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT; } spin_unlock_bh(&sess->conn_lock); iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_conn *l_conn; struct iscsi_session *sess = conn->sess; pr_debug("Received logout request CLOSECONNECTION for CID:" " %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); /* * A Logout Request with a CLOSECONNECTION reason code for a CID * can arrive on a connection with a differing CID. */ if (conn->cid == cmd->logout_cid) { spin_lock_bh(&conn->state_lock); pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; atomic_set(&conn->conn_logout_remove, 1); conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION; iscsit_inc_conn_usage_count(conn); spin_unlock_bh(&conn->state_lock); } else { /* * Handle all different cid CLOSECONNECTION requests in * iscsit_logout_post_handler_diffcid() as to give enough * time for any non immediate command's CmdSN to be * acknowledged on the connection in question. * * Here we simply make sure the CID is still around. */ l_conn = iscsit_get_conn_from_cid(sess, cmd->logout_cid); if (!l_conn) { cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } iscsit_dec_conn_usage_count(l_conn); } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; pr_debug("Received explicit REMOVECONNFORRECOVERY logout for" " CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); if (sess->sess_ops->ErrorRecoveryLevel != 2) { pr_err("Received Logout Request REMOVECONNFORRECOVERY" " while ERL!=2.\n"); cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } if (conn->cid == cmd->logout_cid) { pr_err("Received Logout Request REMOVECONNFORRECOVERY" " with CID: %hu on CID: %hu, implementation error.\n", cmd->logout_cid, conn->cid); cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED; iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); return 0; } static int iscsit_handle_logout_cmd( struct iscsi_conn *conn, unsigned char *buf) { int cmdsn_ret, logout_remove = 0; u8 reason_code = 0; struct iscsi_cmd *cmd; struct iscsi_logout *hdr; struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn); hdr = (struct iscsi_logout *) buf; reason_code = (hdr->flags & 0x7f); hdr->itt = be32_to_cpu(hdr->itt); hdr->cid = be16_to_cpu(hdr->cid); hdr->cmdsn = be32_to_cpu(hdr->cmdsn); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); if (tiqn) { spin_lock(&tiqn->logout_stats.lock); if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) tiqn->logout_stats.normal_logouts++; else tiqn->logout_stats.abnormal_logouts++; spin_unlock(&tiqn->logout_stats.lock); } pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x" " ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n", hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code, hdr->cid, conn->cid); if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) { pr_err("Received logout request on connection that" " is not in logged in state, ignoring request.\n"); return 0; } cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); if (!cmd) return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, buf, conn); cmd->iscsi_opcode = ISCSI_OP_LOGOUT; cmd->i_state = ISTATE_SEND_LOGOUTRSP; cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = hdr->cmdsn; cmd->exp_stat_sn = hdr->exp_statsn; cmd->logout_cid = hdr->cid; cmd->logout_reason = reason_code; cmd->data_direction = DMA_NONE; /* * We need to sleep in these cases (by returning 1) until the Logout * Response gets sent in the tx thread. */ if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) || ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) && (hdr->cid == conn->cid))) logout_remove = 1; spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_list, &conn->conn_cmd_list); spin_unlock_bh(&conn->cmd_lock); if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY) iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); /* * Immediate commands are executed, well, immediately. * Non-Immediate Logout Commands are executed in CmdSN order. */ if (hdr->opcode & ISCSI_OP_IMMEDIATE) { int ret = iscsit_execute_cmd(cmd, 0); if (ret < 0) return ret; } else { cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { logout_remove = 0; } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { return iscsit_add_reject_from_cmd( ISCSI_REASON_PROTOCOL_ERROR, 1, 0, buf, cmd); } } return logout_remove; } static int iscsit_handle_snack( struct iscsi_conn *conn, unsigned char *buf) { u32 unpacked_lun; u64 lun; struct iscsi_snack *hdr; hdr = (struct iscsi_snack *) buf; hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; lun = get_unaligned_le64(&hdr->lun); unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); hdr->itt = be32_to_cpu(hdr->itt); hdr->ttt = be32_to_cpu(hdr->ttt); hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); hdr->begrun = be32_to_cpu(hdr->begrun); hdr->runlength = be32_to_cpu(hdr->runlength); pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:" " 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x," " CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags, hdr->begrun, hdr->runlength, conn->cid); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Initiator sent SNACK request while in" " ErrorRecoveryLevel=0.\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } /* * SNACK_DATA and SNACK_R2T are both 0, so check which function to * call from inside iscsi_send_recovery_datain_or_r2t(). */ switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) { case 0: return iscsit_handle_recovery_datain_or_r2t(conn, buf, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); case ISCSI_FLAG_SNACK_TYPE_STATUS: return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); case ISCSI_FLAG_SNACK_TYPE_DATA_ACK: return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun, hdr->runlength); case ISCSI_FLAG_SNACK_TYPE_RDATA: /* FIXME: Support R-Data SNACK */ pr_err("R-Data SNACK Not Supported.\n"); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); default: pr_err("Unknown SNACK type 0x%02x, protocol" " error.\n", hdr->flags & 0x0f); return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buf, conn); } return 0; } static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn) { if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { wait_for_completion_interruptible_timeout( &conn->rx_half_close_comp, ISCSI_RX_THREAD_TCP_TIMEOUT * HZ); } } static int iscsit_handle_immediate_data( struct iscsi_cmd *cmd, unsigned char *buf, u32 length) { int iov_ret, rx_got = 0, rx_size = 0; u32 checksum, iov_count = 0, padding = 0; struct iscsi_conn *conn = cmd->conn; struct kvec *iov; iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length); if (iov_ret < 0) return IMMEDIATE_DATA_CANNOT_RECOVER; rx_size = length; iov_count = iov_ret; iov = &cmd->iov_data[0]; padding = ((-length) & 3); if (padding != 0) { iov[iov_count].iov_base = cmd->pad_bytes; iov[iov_count++].iov_len = padding; rx_size += padding; } if (conn->conn_ops->DataDigest) { iov[iov_count].iov_base = &checksum; iov[iov_count++].iov_len = ISCSI_CRC_LEN; rx_size += ISCSI_CRC_LEN; } rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); iscsit_unmap_iovec(cmd); if (rx_got != rx_size) { iscsit_rx_thread_wait_for_tcp(conn); return IMMEDIATE_DATA_CANNOT_RECOVER; } if (conn->conn_ops->DataDigest) { u32 data_crc; data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, cmd->write_data_done, length, padding, cmd->pad_bytes); if (checksum != data_crc) { pr_err("ImmediateData CRC32C DataDigest 0x%08x" " does not match computed 0x%08x\n", checksum, data_crc); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Unable to recover from" " Immediate Data digest failure while" " in ERL=0.\n"); iscsit_add_reject_from_cmd( ISCSI_REASON_DATA_DIGEST_ERROR, 1, 0, buf, cmd); return IMMEDIATE_DATA_CANNOT_RECOVER; } else { iscsit_add_reject_from_cmd( ISCSI_REASON_DATA_DIGEST_ERROR, 0, 0, buf, cmd); return IMMEDIATE_DATA_ERL1_CRC_FAILURE; } } else { pr_debug("Got CRC32C DataDigest 0x%08x for" " %u bytes of Immediate Data\n", checksum, length); } } cmd->write_data_done += length; if (cmd->write_data_done == cmd->data_length) { spin_lock_bh(&cmd->istate_lock); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; spin_unlock_bh(&cmd->istate_lock); } return IMMEDIATE_DATA_NORMAL_OPERATION; } /* * Called with sess->conn_lock held. */ /* #warning iscsi_build_conn_drop_async_message() only sends out on connections with active network interface */ static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn) { struct iscsi_cmd *cmd; struct iscsi_conn *conn_p; /* * Only send a Asynchronous Message on connections whos network * interface is still functional. */ list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) { if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) { iscsit_inc_conn_usage_count(conn_p); break; } } if (!conn_p) return; cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL); if (!cmd) { iscsit_dec_conn_usage_count(conn_p); return; } cmd->logout_cid = conn->cid; cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; cmd->i_state = ISTATE_SEND_ASYNCMSG; spin_lock_bh(&conn_p->cmd_lock); list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list); spin_unlock_bh(&conn_p->cmd_lock); iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state); iscsit_dec_conn_usage_count(conn_p); } static int iscsit_send_conn_drop_async_message( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_async *hdr; cmd->tx_size = ISCSI_HDR_LEN; cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; hdr = (struct iscsi_async *) cmd->pdu; hdr->opcode = ISCSI_OP_ASYNC_EVENT; hdr->flags = ISCSI_FLAG_CMD_FINAL; cmd->init_task_tag = 0xFFFFFFFF; cmd->targ_xfer_tag = 0xFFFFFFFF; put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION; hdr->param1 = cpu_to_be16(cmd->logout_cid); hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait); hdr->param3 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain); if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); cmd->tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32C HeaderDigest to" " Async Message 0x%08x\n", *header_digest); } cmd->iov_misc[0].iov_base = cmd->pdu; cmd->iov_misc[0].iov_len = cmd->tx_size; cmd->iov_misc_count = 1; pr_debug("Sending Connection Dropped Async Message StatSN:" " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn, cmd->logout_cid, conn->cid); return 0; } static int iscsit_send_data_in( struct iscsi_cmd *cmd, struct iscsi_conn *conn, int *eodr) { int iov_ret = 0, set_statsn = 0; u32 iov_count = 0, tx_size = 0; struct iscsi_datain datain; struct iscsi_datain_req *dr; struct iscsi_data_rsp *hdr; struct kvec *iov; memset(&datain, 0, sizeof(struct iscsi_datain)); dr = iscsit_get_datain_values(cmd, &datain); if (!dr) { pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n", cmd->init_task_tag); return -1; } /* * Be paranoid and double check the logic for now. */ if ((datain.offset + datain.length) > cmd->data_length) { pr_err("Command ITT: 0x%08x, datain.offset: %u and" " datain.length: %u exceeds cmd->data_length: %u\n", cmd->init_task_tag, datain.offset, datain.length, cmd->data_length); return -1; } spin_lock_bh(&conn->sess->session_stats_lock); conn->sess->tx_data_octets += datain.length; if (conn->sess->se_sess->se_node_acl) { spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); conn->sess->se_sess->se_node_acl->read_bytes += datain.length; spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); } spin_unlock_bh(&conn->sess->session_stats_lock); /* * Special case for successfully execution w/ both DATAIN * and Sense Data. */ if ((datain.flags & ISCSI_FLAG_DATA_STATUS) && (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)) datain.flags &= ~ISCSI_FLAG_DATA_STATUS; else { if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) || (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) { iscsit_increment_maxcmdsn(cmd, conn->sess); cmd->stat_sn = conn->stat_sn++; set_statsn = 1; } else if (dr->dr_complete == DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY) set_statsn = 1; } hdr = (struct iscsi_data_rsp *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_SCSI_DATA_IN; hdr->flags = datain.flags; if (hdr->flags & ISCSI_FLAG_DATA_STATUS) { if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW; hdr->residual_count = cpu_to_be32(cmd->se_cmd.residual_count); } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW; hdr->residual_count = cpu_to_be32(cmd->se_cmd.residual_count); } } hton24(hdr->dlength, datain.length); if (hdr->flags & ISCSI_FLAG_DATA_ACK) int_to_scsilun(cmd->se_cmd.orig_fe_lun, (struct scsi_lun *)&hdr->lun); else put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); hdr->itt = cpu_to_be32(cmd->init_task_tag); hdr->ttt = (hdr->flags & ISCSI_FLAG_DATA_ACK) ? cpu_to_be32(cmd->targ_xfer_tag) : 0xFFFFFFFF; hdr->statsn = (set_statsn) ? cpu_to_be32(cmd->stat_sn) : 0xFFFFFFFF; hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); hdr->datasn = cpu_to_be32(datain.data_sn); hdr->offset = cpu_to_be32(datain.offset); iov = &cmd->iov_data[0]; iov[iov_count].iov_base = cmd->pdu; iov[iov_count++].iov_len = ISCSI_HDR_LEN; tx_size += ISCSI_HDR_LEN; if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest" " for DataIN PDU 0x%08x\n", *header_digest); } iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length); if (iov_ret < 0) return -1; iov_count += iov_ret; tx_size += datain.length; cmd->padding = ((-datain.length) & 3); if (cmd->padding) { iov[iov_count].iov_base = cmd->pad_bytes; iov[iov_count++].iov_len = cmd->padding; tx_size += cmd->padding; pr_debug("Attaching %u padding bytes\n", cmd->padding); } if (conn->conn_ops->DataDigest) { cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd, datain.offset, datain.length, cmd->padding, cmd->pad_bytes); iov[iov_count].iov_base = &cmd->data_crc; iov[iov_count++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attached CRC32C DataDigest %d bytes, crc" " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc); } cmd->iov_data_count = iov_count; cmd->tx_size = tx_size; pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x," " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn), ntohl(hdr->offset), datain.length, conn->cid); if (dr->dr_complete) { *eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ? 2 : 1; iscsit_free_datain_req(cmd, dr); } return 0; } static int iscsit_send_logout_response( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { int niov = 0, tx_size; struct iscsi_conn *logout_conn = NULL; struct iscsi_conn_recovery *cr = NULL; struct iscsi_session *sess = conn->sess; struct kvec *iov; struct iscsi_logout_rsp *hdr; /* * The actual shutting down of Sessions and/or Connections * for CLOSESESSION and CLOSECONNECTION Logout Requests * is done in scsi_logout_post_handler(). */ switch (cmd->logout_reason) { case ISCSI_LOGOUT_REASON_CLOSE_SESSION: pr_debug("iSCSI session logout successful, setting" " logout response to ISCSI_LOGOUT_SUCCESS.\n"); cmd->logout_response = ISCSI_LOGOUT_SUCCESS; break; case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND) break; /* * For CLOSECONNECTION logout requests carrying * a matching logout CID -> local CID, the reference * for the local CID will have been incremented in * iscsi_logout_closeconnection(). * * For CLOSECONNECTION logout requests carrying * a different CID than the connection it arrived * on, the connection responding to cmd->logout_cid * is stopped in iscsit_logout_post_handler_diffcid(). */ pr_debug("iSCSI CID: %hu logout on CID: %hu" " successful.\n", cmd->logout_cid, conn->cid); cmd->logout_response = ISCSI_LOGOUT_SUCCESS; break; case ISCSI_LOGOUT_REASON_RECOVERY: if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) || (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED)) break; /* * If the connection is still active from our point of view * force connection recovery to occur. */ logout_conn = iscsit_get_conn_from_cid_rcfr(sess, cmd->logout_cid); if ((logout_conn)) { iscsit_connection_reinstatement_rcfr(logout_conn); iscsit_dec_conn_usage_count(logout_conn); } cr = iscsit_get_inactive_connection_recovery_entry( conn->sess, cmd->logout_cid); if (!cr) { pr_err("Unable to locate CID: %hu for" " REMOVECONNFORRECOVERY Logout Request.\n", cmd->logout_cid); cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; break; } iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn); pr_debug("iSCSI REMOVECONNFORRECOVERY logout" " for recovery for CID: %hu on CID: %hu successful.\n", cmd->logout_cid, conn->cid); cmd->logout_response = ISCSI_LOGOUT_SUCCESS; break; default: pr_err("Unknown cmd->logout_reason: 0x%02x\n", cmd->logout_reason); return -1; } tx_size = ISCSI_HDR_LEN; hdr = (struct iscsi_logout_rsp *)cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_LOGOUT_RSP; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hdr->response = cmd->logout_response; hdr->itt = cpu_to_be32(cmd->init_task_tag); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); iov = &cmd->iov_misc[0]; iov[niov].iov_base = cmd->pdu; iov[niov++].iov_len = ISCSI_HDR_LEN; if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32C HeaderDigest to" " Logout Response 0x%08x\n", *header_digest); } cmd->iov_misc_count = niov; cmd->tx_size = tx_size; pr_debug("Sending Logout Response ITT: 0x%08x StatSN:" " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, hdr->response, cmd->logout_cid, conn->cid); return 0; } /* * Unsolicited NOPIN, either requesting a response or not. */ static int iscsit_send_unsolicited_nopin( struct iscsi_cmd *cmd, struct iscsi_conn *conn, int want_response) { int tx_size = ISCSI_HDR_LEN; struct iscsi_nopin *hdr; hdr = (struct iscsi_nopin *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_NOOP_IN; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hdr->itt = cpu_to_be32(cmd->init_task_tag); hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); cmd->stat_sn = conn->stat_sn; hdr->statsn = cpu_to_be32(cmd->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32C HeaderDigest to" " NopIN 0x%08x\n", *header_digest); } cmd->iov_misc[0].iov_base = cmd->pdu; cmd->iov_misc[0].iov_len = tx_size; cmd->iov_misc_count = 1; cmd->tx_size = tx_size; pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:" " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid); return 0; } static int iscsit_send_nopin_response( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { int niov = 0, tx_size; u32 padding = 0; struct kvec *iov; struct iscsi_nopin *hdr; tx_size = ISCSI_HDR_LEN; hdr = (struct iscsi_nopin *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_NOOP_IN; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, cmd->buf_ptr_size); put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); hdr->itt = cpu_to_be32(cmd->init_task_tag); hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); iov = &cmd->iov_misc[0]; iov[niov].iov_base = cmd->pdu; iov[niov++].iov_len = ISCSI_HDR_LEN; if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32C HeaderDigest" " to NopIn 0x%08x\n", *header_digest); } /* * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr. * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size. */ if (cmd->buf_ptr_size) { iov[niov].iov_base = cmd->buf_ptr; iov[niov++].iov_len = cmd->buf_ptr_size; tx_size += cmd->buf_ptr_size; pr_debug("Echoing back %u bytes of ping" " data.\n", cmd->buf_ptr_size); padding = ((-cmd->buf_ptr_size) & 3); if (padding != 0) { iov[niov].iov_base = &cmd->pad_bytes; iov[niov++].iov_len = padding; tx_size += padding; pr_debug("Attaching %u additional" " padding bytes.\n", padding); } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, cmd->buf_ptr, cmd->buf_ptr_size, padding, (u8 *)&cmd->pad_bytes, (u8 *)&cmd->data_crc); iov[niov].iov_base = &cmd->data_crc; iov[niov++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attached DataDigest for %u" " bytes of ping data, CRC 0x%08x\n", cmd->buf_ptr_size, cmd->data_crc); } } cmd->iov_misc_count = niov; cmd->tx_size = tx_size; pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:" " 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag, cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size); return 0; } int iscsit_send_r2t( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { int tx_size = 0; struct iscsi_r2t *r2t; struct iscsi_r2t_rsp *hdr; r2t = iscsit_get_r2t_from_list(cmd); if (!r2t) return -1; hdr = (struct iscsi_r2t_rsp *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_R2T; hdr->flags |= ISCSI_FLAG_CMD_FINAL; int_to_scsilun(cmd->se_cmd.orig_fe_lun, (struct scsi_lun *)&hdr->lun); hdr->itt = cpu_to_be32(cmd->init_task_tag); spin_lock_bh(&conn->sess->ttt_lock); r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; if (r2t->targ_xfer_tag == 0xFFFFFFFF) r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; spin_unlock_bh(&conn->sess->ttt_lock); hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag); hdr->statsn = cpu_to_be32(conn->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); hdr->r2tsn = cpu_to_be32(r2t->r2t_sn); hdr->data_offset = cpu_to_be32(r2t->offset); hdr->data_length = cpu_to_be32(r2t->xfer_len); cmd->iov_misc[0].iov_base = cmd->pdu; cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; tx_size += ISCSI_HDR_LEN; if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest for R2T" " PDU 0x%08x\n", *header_digest); } pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:" " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n", (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag, r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn, r2t->offset, r2t->xfer_len, conn->cid); cmd->iov_misc_count = 1; cmd->tx_size = tx_size; spin_lock_bh(&cmd->r2t_lock); r2t->sent_r2t = 1; spin_unlock_bh(&cmd->r2t_lock); return 0; } /* * type 0: Normal Operation. * type 1: Called from Storage Transport. * type 2: Called from iscsi_task_reassign_complete_write() for * connection recovery. */ int iscsit_build_r2ts_for_cmd( struct iscsi_cmd *cmd, struct iscsi_conn *conn, int type) { int first_r2t = 1; u32 offset = 0, xfer_len = 0; spin_lock_bh(&cmd->r2t_lock); if (cmd->cmd_flags & ICF_SENT_LAST_R2T) { spin_unlock_bh(&cmd->r2t_lock); return 0; } if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2)) if (cmd->r2t_offset < cmd->write_data_done) cmd->r2t_offset = cmd->write_data_done; while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) { if (conn->sess->sess_ops->DataSequenceInOrder) { offset = cmd->r2t_offset; if (first_r2t && (type == 2)) { xfer_len = ((offset + (conn->sess->sess_ops->MaxBurstLength - cmd->next_burst_len) > cmd->data_length) ? (cmd->data_length - offset) : (conn->sess->sess_ops->MaxBurstLength - cmd->next_burst_len)); } else { xfer_len = ((offset + conn->sess->sess_ops->MaxBurstLength) > cmd->data_length) ? (cmd->data_length - offset) : conn->sess->sess_ops->MaxBurstLength; } cmd->r2t_offset += xfer_len; if (cmd->r2t_offset == cmd->data_length) cmd->cmd_flags |= ICF_SENT_LAST_R2T; } else { struct iscsi_seq *seq; seq = iscsit_get_seq_holder_for_r2t(cmd); if (!seq) { spin_unlock_bh(&cmd->r2t_lock); return -1; } offset = seq->offset; xfer_len = seq->xfer_len; if (cmd->seq_send_order == cmd->seq_count) cmd->cmd_flags |= ICF_SENT_LAST_R2T; } cmd->outstanding_r2ts++; first_r2t = 0; if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) { spin_unlock_bh(&cmd->r2t_lock); return -1; } if (cmd->cmd_flags & ICF_SENT_LAST_R2T) break; } spin_unlock_bh(&cmd->r2t_lock); return 0; } static int iscsit_send_status( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { u8 iov_count = 0, recovery; u32 padding = 0, tx_size = 0; struct iscsi_scsi_rsp *hdr; struct kvec *iov; recovery = (cmd->i_state != ISTATE_SEND_STATUS); if (!recovery) cmd->stat_sn = conn->stat_sn++; spin_lock_bh(&conn->sess->session_stats_lock); conn->sess->rsp_pdus++; spin_unlock_bh(&conn->sess->session_stats_lock); hdr = (struct iscsi_scsi_rsp *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_SCSI_CMD_RSP; hdr->flags |= ISCSI_FLAG_CMD_FINAL; if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW; hdr->residual_count = cpu_to_be32(cmd->se_cmd.residual_count); } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW; hdr->residual_count = cpu_to_be32(cmd->se_cmd.residual_count); } hdr->response = cmd->iscsi_response; hdr->cmd_status = cmd->se_cmd.scsi_status; hdr->itt = cpu_to_be32(cmd->init_task_tag); hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); iov = &cmd->iov_misc[0]; iov[iov_count].iov_base = cmd->pdu; iov[iov_count++].iov_len = ISCSI_HDR_LEN; tx_size += ISCSI_HDR_LEN; /* * Attach SENSE DATA payload to iSCSI Response PDU */ if (cmd->se_cmd.sense_buffer && ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) || (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) { padding = -(cmd->se_cmd.scsi_sense_length) & 3; hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length); iov[iov_count].iov_base = cmd->se_cmd.sense_buffer; iov[iov_count++].iov_len = (cmd->se_cmd.scsi_sense_length + padding); tx_size += cmd->se_cmd.scsi_sense_length; if (padding) { memset(cmd->se_cmd.sense_buffer + cmd->se_cmd.scsi_sense_length, 0, padding); tx_size += padding; pr_debug("Adding %u bytes of padding to" " SENSE.\n", padding); } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, cmd->se_cmd.sense_buffer, (cmd->se_cmd.scsi_sense_length + padding), 0, NULL, (u8 *)&cmd->data_crc); iov[iov_count].iov_base = &cmd->data_crc; iov[iov_count++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 DataDigest for" " SENSE, %u bytes CRC 0x%08x\n", (cmd->se_cmd.scsi_sense_length + padding), cmd->data_crc); } pr_debug("Attaching SENSE DATA: %u bytes to iSCSI" " Response PDU\n", cmd->se_cmd.scsi_sense_length); } if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest for Response" " PDU 0x%08x\n", *header_digest); } cmd->iov_misc_count = iov_count; cmd->tx_size = tx_size; pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x," " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n", (!recovery) ? "" : "Recovery ", cmd->init_task_tag, cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid); return 0; } static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr) { switch (se_tmr->response) { case TMR_FUNCTION_COMPLETE: return ISCSI_TMF_RSP_COMPLETE; case TMR_TASK_DOES_NOT_EXIST: return ISCSI_TMF_RSP_NO_TASK; case TMR_LUN_DOES_NOT_EXIST: return ISCSI_TMF_RSP_NO_LUN; case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: return ISCSI_TMF_RSP_NOT_SUPPORTED; case TMR_FUNCTION_AUTHORIZATION_FAILED: return ISCSI_TMF_RSP_AUTH_FAILED; case TMR_FUNCTION_REJECTED: default: return ISCSI_TMF_RSP_REJECTED; } } static int iscsit_send_task_mgt_rsp( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; struct iscsi_tm_rsp *hdr; u32 tx_size = 0; hdr = (struct iscsi_tm_rsp *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_SCSI_TMFUNC_RSP; hdr->flags = ISCSI_FLAG_CMD_FINAL; hdr->response = iscsit_convert_tcm_tmr_rsp(se_tmr); hdr->itt = cpu_to_be32(cmd->init_task_tag); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); cmd->iov_misc[0].iov_base = cmd->pdu; cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; tx_size += ISCSI_HDR_LEN; if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest for Task" " Mgmt Response PDU 0x%08x\n", *header_digest); } cmd->iov_misc_count = 1; cmd->tx_size = tx_size; pr_debug("Built Task Management Response ITT: 0x%08x," " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid); return 0; } static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) { char *payload = NULL; struct iscsi_conn *conn = cmd->conn; struct iscsi_portal_group *tpg; struct iscsi_tiqn *tiqn; struct iscsi_tpg_np *tpg_np; int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; unsigned char buf[256]; buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ? 32768 : conn->conn_ops->MaxRecvDataSegmentLength; memset(buf, 0, 256); payload = kzalloc(buffer_len, GFP_KERNEL); if (!payload) { pr_err("Unable to allocate memory for sendtargets" " response.\n"); return -ENOMEM; } spin_lock(&tiqn_lock); list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { len = sprintf(buf, "TargetName=%s", tiqn->tiqn); len += 1; if ((len + payload_len) > buffer_len) { spin_unlock(&tiqn->tiqn_tpg_lock); end_of_buf = 1; goto eob; } memcpy((void *)payload + payload_len, buf, len); payload_len += len; spin_lock(&tiqn->tiqn_tpg_lock); list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { spin_lock(&tpg->tpg_state_lock); if ((tpg->tpg_state == TPG_STATE_FREE) || (tpg->tpg_state == TPG_STATE_INACTIVE)) { spin_unlock(&tpg->tpg_state_lock); continue; } spin_unlock(&tpg->tpg_state_lock); spin_lock(&tpg->tpg_np_lock); list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { len = sprintf(buf, "TargetAddress=" "%s%s%s:%hu,%hu", (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? "[" : "", tpg_np->tpg_np->np_ip, (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? "]" : "", tpg_np->tpg_np->np_port, tpg->tpgt); len += 1; if ((len + payload_len) > buffer_len) { spin_unlock(&tpg->tpg_np_lock); spin_unlock(&tiqn->tiqn_tpg_lock); end_of_buf = 1; goto eob; } memcpy((void *)payload + payload_len, buf, len); payload_len += len; } spin_unlock(&tpg->tpg_np_lock); } spin_unlock(&tiqn->tiqn_tpg_lock); eob: if (end_of_buf) break; } spin_unlock(&tiqn_lock); cmd->buf_ptr = payload; return payload_len; } /* * FIXME: Add support for F_BIT and C_BIT when the length is longer than * MaxRecvDataSegmentLength. */ static int iscsit_send_text_rsp( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_text_rsp *hdr; struct kvec *iov; u32 padding = 0, tx_size = 0; int text_length, iov_count = 0; text_length = iscsit_build_sendtargets_response(cmd); if (text_length < 0) return text_length; padding = ((-text_length) & 3); if (padding != 0) { memset(cmd->buf_ptr + text_length, 0, padding); pr_debug("Attaching %u additional bytes for" " padding.\n", padding); } hdr = (struct iscsi_text_rsp *) cmd->pdu; memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_TEXT_RSP; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, text_length); hdr->itt = cpu_to_be32(cmd->init_task_tag); hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); iscsit_increment_maxcmdsn(cmd, conn->sess); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); iov = &cmd->iov_misc[0]; iov[iov_count].iov_base = cmd->pdu; iov[iov_count++].iov_len = ISCSI_HDR_LEN; iov[iov_count].iov_base = cmd->buf_ptr; iov[iov_count++].iov_len = text_length + padding; tx_size += (ISCSI_HDR_LEN + text_length + padding); if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest for" " Text Response PDU 0x%08x\n", *header_digest); } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, cmd->buf_ptr, (text_length + padding), 0, NULL, (u8 *)&cmd->data_crc); iov[iov_count].iov_base = &cmd->data_crc; iov[iov_count++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching DataDigest for %u bytes of text" " data, CRC 0x%08x\n", (text_length + padding), cmd->data_crc); } cmd->iov_misc_count = iov_count; cmd->tx_size = tx_size; pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x," " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, text_length, conn->cid); return 0; } static int iscsit_send_reject( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { u32 iov_count = 0, tx_size = 0; struct iscsi_reject *hdr; struct kvec *iov; hdr = (struct iscsi_reject *) cmd->pdu; hdr->opcode = ISCSI_OP_REJECT; hdr->flags |= ISCSI_FLAG_CMD_FINAL; hton24(hdr->dlength, ISCSI_HDR_LEN); cmd->stat_sn = conn->stat_sn++; hdr->statsn = cpu_to_be32(cmd->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); iov = &cmd->iov_misc[0]; iov[iov_count].iov_base = cmd->pdu; iov[iov_count++].iov_len = ISCSI_HDR_LEN; iov[iov_count].iov_base = cmd->buf_ptr; iov[iov_count++].iov_len = ISCSI_HDR_LEN; tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN); if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)hdr, ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 HeaderDigest for" " REJECT PDU 0x%08x\n", *header_digest); } if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, (unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN, 0, NULL, (u8 *)&cmd->data_crc); iov[iov_count].iov_base = &cmd->data_crc; iov[iov_count++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; pr_debug("Attaching CRC32 DataDigest for REJECT" " PDU 0x%08x\n", cmd->data_crc); } cmd->iov_misc_count = iov_count; cmd->tx_size = tx_size; pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x," " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid); return 0; } static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn) { if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { wait_for_completion_interruptible_timeout( &conn->tx_half_close_comp, ISCSI_TX_THREAD_TCP_TIMEOUT * HZ); } } #ifdef CONFIG_SMP void iscsit_thread_get_cpumask(struct iscsi_conn *conn) { struct iscsi_thread_set *ts = conn->thread_set; int ord, cpu; /* * thread_id is assigned from iscsit_global->ts_bitmap from * within iscsi_thread_set.c:iscsi_allocate_thread_sets() * * Here we use thread_id to determine which CPU that this * iSCSI connection's iscsi_thread_set will be scheduled to * execute upon. */ ord = ts->thread_id % cpumask_weight(cpu_online_mask); #if 0 pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from" " thread_id: %d\n", ord, ts->thread_id); #endif for_each_online_cpu(cpu) { if (ord-- == 0) { cpumask_set_cpu(cpu, conn->conn_cpumask); return; } } /* * This should never be reached.. */ dump_stack(); cpumask_setall(conn->conn_cpumask); } static inline void iscsit_thread_check_cpumask( struct iscsi_conn *conn, struct task_struct *p, int mode) { char buf[128]; /* * mode == 1 signals iscsi_target_tx_thread() usage. * mode == 0 signals iscsi_target_rx_thread() usage. */ if (mode == 1) { if (!conn->conn_tx_reset_cpumask) return; conn->conn_tx_reset_cpumask = 0; } else { if (!conn->conn_rx_reset_cpumask) return; conn->conn_rx_reset_cpumask = 0; } /* * Update the CPU mask for this single kthread so that * both TX and RX kthreads are scheduled to run on the * same CPU. */ memset(buf, 0, 128); cpumask_scnprintf(buf, 128, conn->conn_cpumask); #if 0 pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():" " %s for %s\n", buf, p->comm); #endif set_cpus_allowed_ptr(p, conn->conn_cpumask); } #else void iscsit_thread_get_cpumask(struct iscsi_conn *conn) { return; } #define iscsit_thread_check_cpumask(X, Y, Z) ({}) #endif /* CONFIG_SMP */ int iscsi_target_tx_thread(void *arg) { u8 state; int eodr = 0; int ret = 0; int sent_status = 0; int use_misc = 0; int map_sg = 0; struct iscsi_cmd *cmd = NULL; struct iscsi_conn *conn; struct iscsi_queue_req *qr = NULL; struct se_cmd *se_cmd; struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); restart: conn = iscsi_tx_thread_pre_handler(ts); if (!conn) goto out; eodr = map_sg = ret = sent_status = use_misc = 0; while (!kthread_should_stop()) { /* * Ensure that both TX and RX per connection kthreads * are scheduled to run on the same CPU. */ iscsit_thread_check_cpumask(conn, current, 1); schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT); if ((ts->status == ISCSI_THREAD_SET_RESET) || signal_pending(current)) goto transport_err; get_immediate: qr = iscsit_get_cmd_from_immediate_queue(conn); if (qr) { atomic_set(&conn->check_immediate_queue, 0); cmd = qr->cmd; state = qr->state; kmem_cache_free(lio_qr_cache, qr); spin_lock_bh(&cmd->istate_lock); switch (state) { case ISTATE_SEND_R2T: spin_unlock_bh(&cmd->istate_lock); ret = iscsit_send_r2t(cmd, conn); break; case ISTATE_REMOVE: spin_unlock_bh(&cmd->istate_lock); if (cmd->data_direction == DMA_TO_DEVICE) iscsit_stop_dataout_timer(cmd); spin_lock_bh(&conn->cmd_lock); list_del(&cmd->i_list); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd); goto get_immediate; case ISTATE_SEND_NOPIN_WANT_RESPONSE: spin_unlock_bh(&cmd->istate_lock); iscsit_mod_nopin_response_timer(conn); ret = iscsit_send_unsolicited_nopin(cmd, conn, 1); break; case ISTATE_SEND_NOPIN_NO_RESPONSE: spin_unlock_bh(&cmd->istate_lock); ret = iscsit_send_unsolicited_nopin(cmd, conn, 0); break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, state, conn->cid); spin_unlock_bh(&cmd->istate_lock); goto transport_err; } if (ret < 0) { conn->tx_immediate_queue = 0; goto transport_err; } if (iscsit_send_tx_data(cmd, conn, 1) < 0) { conn->tx_immediate_queue = 0; iscsit_tx_thread_wait_for_tcp(conn); goto transport_err; } spin_lock_bh(&cmd->istate_lock); switch (state) { case ISTATE_SEND_R2T: spin_unlock_bh(&cmd->istate_lock); spin_lock_bh(&cmd->dataout_timeout_lock); iscsit_start_dataout_timer(cmd, conn); spin_unlock_bh(&cmd->dataout_timeout_lock); break; case ISTATE_SEND_NOPIN_WANT_RESPONSE: cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE; spin_unlock_bh(&cmd->istate_lock); break; case ISTATE_SEND_NOPIN_NO_RESPONSE: cmd->i_state = ISTATE_SENT_STATUS; spin_unlock_bh(&cmd->istate_lock); break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, state, conn->cid); spin_unlock_bh(&cmd->istate_lock); goto transport_err; } goto get_immediate; } else conn->tx_immediate_queue = 0; get_response: qr = iscsit_get_cmd_from_response_queue(conn); if (qr) { cmd = qr->cmd; state = qr->state; kmem_cache_free(lio_qr_cache, qr); spin_lock_bh(&cmd->istate_lock); check_rsp_state: switch (state) { case ISTATE_SEND_DATAIN: spin_unlock_bh(&cmd->istate_lock); ret = iscsit_send_data_in(cmd, conn, &eodr); map_sg = 1; break; case ISTATE_SEND_STATUS: case ISTATE_SEND_STATUS_RECOVERY: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_status(cmd, conn); break; case ISTATE_SEND_LOGOUTRSP: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_logout_response(cmd, conn); break; case ISTATE_SEND_ASYNCMSG: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_conn_drop_async_message( cmd, conn); break; case ISTATE_SEND_NOPIN: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_nopin_response(cmd, conn); break; case ISTATE_SEND_REJECT: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_reject(cmd, conn); break; case ISTATE_SEND_TASKMGTRSP: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_task_mgt_rsp(cmd, conn); if (ret != 0) break; ret = iscsit_tmr_post_handler(cmd, conn); if (ret != 0) iscsit_fall_back_to_erl0(conn->sess); break; case ISTATE_SEND_TEXTRSP: spin_unlock_bh(&cmd->istate_lock); use_misc = 1; ret = iscsit_send_text_rsp(cmd, conn); break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, state, conn->cid); spin_unlock_bh(&cmd->istate_lock); goto transport_err; } if (ret < 0) { conn->tx_response_queue = 0; goto transport_err; } se_cmd = &cmd->se_cmd; if (map_sg && !conn->conn_ops->IFMarker) { if (iscsit_fe_sendpage_sg(cmd, conn) < 0) { conn->tx_response_queue = 0; iscsit_tx_thread_wait_for_tcp(conn); iscsit_unmap_iovec(cmd); goto transport_err; } } else { if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) { conn->tx_response_queue = 0; iscsit_tx_thread_wait_for_tcp(conn); iscsit_unmap_iovec(cmd); goto transport_err; } } map_sg = 0; iscsit_unmap_iovec(cmd); spin_lock_bh(&cmd->istate_lock); switch (state) { case ISTATE_SEND_DATAIN: if (!eodr) goto check_rsp_state; if (eodr == 1) { cmd->i_state = ISTATE_SENT_LAST_DATAIN; sent_status = 1; eodr = use_misc = 0; } else if (eodr == 2) { cmd->i_state = state = ISTATE_SEND_STATUS; sent_status = 0; eodr = use_misc = 0; goto check_rsp_state; } break; case ISTATE_SEND_STATUS: use_misc = 0; sent_status = 1; break; case ISTATE_SEND_ASYNCMSG: case ISTATE_SEND_NOPIN: case ISTATE_SEND_STATUS_RECOVERY: case ISTATE_SEND_TEXTRSP: use_misc = 0; sent_status = 1; break; case ISTATE_SEND_REJECT: use_misc = 0; if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) { cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN; spin_unlock_bh(&cmd->istate_lock); complete(&cmd->reject_comp); goto transport_err; } complete(&cmd->reject_comp); break; case ISTATE_SEND_TASKMGTRSP: use_misc = 0; sent_status = 1; break; case ISTATE_SEND_LOGOUTRSP: spin_unlock_bh(&cmd->istate_lock); if (!iscsit_logout_post_handler(cmd, conn)) goto restart; spin_lock_bh(&cmd->istate_lock); use_misc = 0; sent_status = 1; break; default: pr_err("Unknown Opcode: 0x%02x ITT:" " 0x%08x, i_state: %d on CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, cmd->i_state, conn->cid); spin_unlock_bh(&cmd->istate_lock); goto transport_err; } if (sent_status) { cmd->i_state = ISTATE_SENT_STATUS; sent_status = 0; } spin_unlock_bh(&cmd->istate_lock); if (atomic_read(&conn->check_immediate_queue)) goto get_immediate; goto get_response; } else conn->tx_response_queue = 0; } transport_err: iscsit_take_action_for_connection_exit(conn); goto restart; out: return 0; } int iscsi_target_rx_thread(void *arg) { int ret; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; struct iscsi_conn *conn = NULL; struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; struct kvec iov; /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); restart: conn = iscsi_rx_thread_pre_handler(ts); if (!conn) goto out; while (!kthread_should_stop()) { /* * Ensure that both TX and RX per connection kthreads * are scheduled to run on the same CPU. */ iscsit_thread_check_cpumask(conn, current, 0); memset(buffer, 0, ISCSI_HDR_LEN); memset(&iov, 0, sizeof(struct kvec)); iov.iov_base = buffer; iov.iov_len = ISCSI_HDR_LEN; ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN); if (ret != ISCSI_HDR_LEN) { iscsit_rx_thread_wait_for_tcp(conn); goto transport_err; } /* * Set conn->bad_hdr for use with REJECT PDUs. */ memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN); if (conn->conn_ops->HeaderDigest) { iov.iov_base = &digest; iov.iov_len = ISCSI_CRC_LEN; ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); if (ret != ISCSI_CRC_LEN) { iscsit_rx_thread_wait_for_tcp(conn); goto transport_err; } iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, buffer, ISCSI_HDR_LEN, 0, NULL, (u8 *)&checksum); if (digest != checksum) { pr_err("HeaderDigest CRC32C failed," " received 0x%08x, computed 0x%08x\n", digest, checksum); /* * Set the PDU to 0xff so it will intentionally * hit default in the switch below. */ memset(buffer, 0xff, ISCSI_HDR_LEN); spin_lock_bh(&conn->sess->session_stats_lock); conn->sess->conn_digest_errors++; spin_unlock_bh(&conn->sess->session_stats_lock); } else { pr_debug("Got HeaderDigest CRC32C" " 0x%08x\n", checksum); } } if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) goto transport_err; opcode = buffer[0] & ISCSI_OPCODE_MASK; if (conn->sess->sess_ops->SessionType && ((!(opcode & ISCSI_OP_TEXT)) || (!(opcode & ISCSI_OP_LOGOUT)))) { pr_err("Received illegal iSCSI Opcode: 0x%02x" " while in Discovery Session, rejecting.\n", opcode); iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, buffer, conn); goto transport_err; } switch (opcode) { case ISCSI_OP_SCSI_CMD: if (iscsit_handle_scsi_cmd(conn, buffer) < 0) goto transport_err; break; case ISCSI_OP_SCSI_DATA_OUT: if (iscsit_handle_data_out(conn, buffer) < 0) goto transport_err; break; case ISCSI_OP_NOOP_OUT: if (iscsit_handle_nop_out(conn, buffer) < 0) goto transport_err; break; case ISCSI_OP_SCSI_TMFUNC: if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0) goto transport_err; break; case ISCSI_OP_TEXT: if (iscsit_handle_text_cmd(conn, buffer) < 0) goto transport_err; break; case ISCSI_OP_LOGOUT: ret = iscsit_handle_logout_cmd(conn, buffer); if (ret > 0) { wait_for_completion_timeout(&conn->conn_logout_comp, SECONDS_FOR_LOGOUT_COMP * HZ); goto transport_err; } else if (ret < 0) goto transport_err; break; case ISCSI_OP_SNACK: if (iscsit_handle_snack(conn, buffer) < 0) goto transport_err; break; default: pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { pr_err("Cannot recover from unknown" " opcode while ERL=0, closing iSCSI connection" ".\n"); goto transport_err; } if (!conn->conn_ops->OFMarker) { pr_err("Unable to recover from unknown" " opcode while OFMarker=No, closing iSCSI" " connection.\n"); goto transport_err; } if (iscsit_recover_from_unknown_opcode(conn) < 0) { pr_err("Unable to recover from unknown" " opcode, closing iSCSI connection.\n"); goto transport_err; } break; } } transport_err: if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); iscsit_take_action_for_connection_exit(conn); goto restart; out: return 0; } static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) { struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL; struct iscsi_session *sess = conn->sess; /* * We expect this function to only ever be called from either RX or TX * thread context via iscsit_close_connection() once the other context * has been reset -> returned sleeping pre-handler state. */ spin_lock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { list_del(&cmd->i_list); spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); iscsit_free_cmd(cmd); spin_lock_bh(&conn->cmd_lock); } spin_unlock_bh(&conn->cmd_lock); } static void iscsit_stop_timers_for_cmds( struct iscsi_conn *conn) { struct iscsi_cmd *cmd; spin_lock_bh(&conn->cmd_lock); list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { if (cmd->data_direction == DMA_TO_DEVICE) iscsit_stop_dataout_timer(cmd); } spin_unlock_bh(&conn->cmd_lock); } int iscsit_close_connection( struct iscsi_conn *conn) { int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT); struct iscsi_session *sess = conn->sess; pr_debug("Closing iSCSI connection CID %hu on SID:" " %u\n", conn->cid, sess->sid); /* * Always up conn_logout_comp just in case the RX Thread is sleeping * and the logout response never got sent because the connection * failed. */ complete(&conn->conn_logout_comp); iscsi_release_thread_set(conn); iscsit_stop_timers_for_cmds(conn); iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); iscsit_free_queue_reqs_for_conn(conn); /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands * for realligence. * * During normal operation clear the out of order commands (but * do not free the struct iscsi_ooo_cmdsn's) and release all * struct iscsi_cmds. */ if (atomic_read(&conn->connection_recovery)) { iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn); iscsit_prepare_cmds_for_realligance(conn); } else { iscsit_clear_ooo_cmdsns_for_conn(conn); iscsit_release_commands_from_conn(conn); } /* * Handle decrementing session or connection usage count if * a logout response was not able to be sent because the * connection failed. Fall back to Session Recovery here. */ if (atomic_read(&conn->conn_logout_remove)) { if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) { iscsit_dec_conn_usage_count(conn); iscsit_dec_session_usage_count(sess); } if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) iscsit_dec_conn_usage_count(conn); atomic_set(&conn->conn_logout_remove, 0); atomic_set(&sess->session_reinstatement, 0); atomic_set(&sess->session_fall_back_to_erl0, 1); } spin_lock_bh(&sess->conn_lock); list_del(&conn->conn_list); /* * Attempt to let the Initiator know this connection failed by * sending an Connection Dropped Async Message on another * active connection. */ if (atomic_read(&conn->connection_recovery)) iscsit_build_conn_drop_async_message(conn); spin_unlock_bh(&sess->conn_lock); /* * If connection reinstatement is being performed on this connection, * up the connection reinstatement semaphore that is being blocked on * in iscsit_cause_connection_reinstatement(). */ spin_lock_bh(&conn->state_lock); if (atomic_read(&conn->sleep_on_conn_wait_comp)) { spin_unlock_bh(&conn->state_lock); complete(&conn->conn_wait_comp); wait_for_completion(&conn->conn_post_wait_comp); spin_lock_bh(&conn->state_lock); } /* * If connection reinstatement is being performed on this connection * by receiving a REMOVECONNFORRECOVERY logout request, up the * connection wait rcfr semaphore that is being blocked on * an iscsit_connection_reinstatement_rcfr(). */ if (atomic_read(&conn->connection_wait_rcfr)) { spin_unlock_bh(&conn->state_lock); complete(&conn->conn_wait_rcfr_comp); wait_for_completion(&conn->conn_post_wait_comp); spin_lock_bh(&conn->state_lock); } atomic_set(&conn->connection_reinstatement, 1); spin_unlock_bh(&conn->state_lock); /* * If any other processes are accessing this connection pointer we * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); if (conn->conn_rx_hash.tfm) crypto_free_hash(conn->conn_rx_hash.tfm); if (conn->conn_tx_hash.tfm) crypto_free_hash(conn->conn_tx_hash.tfm); if (conn->conn_cpumask) free_cpumask_var(conn->conn_cpumask); kfree(conn->conn_ops); conn->conn_ops = NULL; if (conn->sock) { if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) { kfree(conn->sock->file); conn->sock->file = NULL; } sock_release(conn->sock); } conn->thread_set = NULL; pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); conn->conn_state = TARG_CONN_STATE_FREE; kfree(conn); spin_lock_bh(&sess->conn_lock); atomic_dec(&sess->nconn); pr_debug("Decremented iSCSI connection count to %hu from node:" " %s\n", atomic_read(&sess->nconn), sess->sess_ops->InitiatorName); /* * Make sure that if one connection fails in an non ERL=2 iSCSI * Session that they all fail. */ if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout && !atomic_read(&sess->session_logout)) atomic_set(&sess->session_fall_back_to_erl0, 1); /* * If this was not the last connection in the session, and we are * performing session reinstatement or falling back to ERL=0, call * iscsit_stop_session() without sleeping to shutdown the other * active connections. */ if (atomic_read(&sess->nconn)) { if (!atomic_read(&sess->session_reinstatement) && !atomic_read(&sess->session_fall_back_to_erl0)) { spin_unlock_bh(&sess->conn_lock); return 0; } if (!atomic_read(&sess->session_stop_active)) { atomic_set(&sess->session_stop_active, 1); spin_unlock_bh(&sess->conn_lock); iscsit_stop_session(sess, 0, 0); return 0; } spin_unlock_bh(&sess->conn_lock); return 0; } /* * If this was the last connection in the session and one of the * following is occurring: * * Session Reinstatement is not being performed, and are falling back * to ERL=0 call iscsit_close_session(). * * Session Logout was requested. iscsit_close_session() will be called * elsewhere. * * Session Continuation is not being performed, start the Time2Retain * handler and check if sleep_on_sess_wait_sem is active. */ if (!atomic_read(&sess->session_reinstatement) && atomic_read(&sess->session_fall_back_to_erl0)) { spin_unlock_bh(&sess->conn_lock); iscsit_close_session(sess); return 0; } else if (atomic_read(&sess->session_logout)) { pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); sess->session_state = TARG_SESS_STATE_FREE; spin_unlock_bh(&sess->conn_lock); if (atomic_read(&sess->sleep_on_sess_wait_comp)) complete(&sess->session_wait_comp); return 0; } else { pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); sess->session_state = TARG_SESS_STATE_FAILED; if (!atomic_read(&sess->session_continuation)) { spin_unlock_bh(&sess->conn_lock); iscsit_start_time2retain_handler(sess); } else spin_unlock_bh(&sess->conn_lock); if (atomic_read(&sess->sleep_on_sess_wait_comp)) complete(&sess->session_wait_comp); return 0; } spin_unlock_bh(&sess->conn_lock); return 0; } int iscsit_close_session(struct iscsi_session *sess) { struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; if (atomic_read(&sess->nconn)) { pr_err("%d connection(s) still exist for iSCSI session" " to %s\n", atomic_read(&sess->nconn), sess->sess_ops->InitiatorName); BUG(); } spin_lock_bh(&se_tpg->session_lock); atomic_set(&sess->session_logout, 1); atomic_set(&sess->session_reinstatement, 1); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); /* * transport_deregister_session_configfs() will clear the * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context * can be setting it again with __transport_register_session() in * iscsi_post_login_handler() again after the iscsit_stop_session() * completes in iscsi_np context. */ transport_deregister_session_configfs(sess->se_sess); /* * If any other processes are accessing this session pointer we must * wait until they have completed. If we are in an interrupt (the * time2retain handler) and contain and active session usage count we * restart the timer and exit. */ if (!in_interrupt()) { if (iscsit_check_session_usage_count(sess) == 1) iscsit_stop_session(sess, 1, 1); } else { if (iscsit_check_session_usage_count(sess) == 2) { atomic_set(&sess->session_logout, 0); iscsit_start_time2retain_handler(sess); return 0; } } transport_deregister_session(sess->se_sess); if (sess->sess_ops->ErrorRecoveryLevel == 2) iscsit_free_connection_recovery_entires(sess); iscsit_free_all_ooo_cmdsns(sess); spin_lock_bh(&se_tpg->session_lock); pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); sess->session_state = TARG_SESS_STATE_FREE; pr_debug("Released iSCSI session from node: %s\n", sess->sess_ops->InitiatorName); tpg->nsessions--; if (tpg->tpg_tiqn) tpg->tpg_tiqn->tiqn_nsessions--; pr_debug("Decremented number of active iSCSI Sessions on" " iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions); spin_lock(&sess_idr_lock); idr_remove(&sess_idr, sess->session_index); spin_unlock(&sess_idr_lock); kfree(sess->sess_ops); sess->sess_ops = NULL; spin_unlock_bh(&se_tpg->session_lock); kfree(sess); return 0; } static void iscsit_logout_post_handler_closesession( struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); iscsit_dec_conn_usage_count(conn); iscsit_stop_session(sess, 1, 1); iscsit_dec_session_usage_count(sess); iscsit_close_session(sess); } static void iscsit_logout_post_handler_samecid( struct iscsi_conn *conn) { iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); iscsit_cause_connection_reinstatement(conn, 1); iscsit_dec_conn_usage_count(conn); } static void iscsit_logout_post_handler_diffcid( struct iscsi_conn *conn, u16 cid) { struct iscsi_conn *l_conn; struct iscsi_session *sess = conn->sess; if (!sess) return; spin_lock_bh(&sess->conn_lock); list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) { if (l_conn->cid == cid) { iscsit_inc_conn_usage_count(l_conn); break; } } spin_unlock_bh(&sess->conn_lock); if (!l_conn) return; if (l_conn->sock) l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN); spin_lock_bh(&l_conn->state_lock); pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; spin_unlock_bh(&l_conn->state_lock); iscsit_cause_connection_reinstatement(l_conn, 1); iscsit_dec_conn_usage_count(l_conn); } /* * Return of 0 causes the TX thread to restart. */ static int iscsit_logout_post_handler( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { int ret = 0; switch (cmd->logout_reason) { case ISCSI_LOGOUT_REASON_CLOSE_SESSION: switch (cmd->logout_response) { case ISCSI_LOGOUT_SUCCESS: case ISCSI_LOGOUT_CLEANUP_FAILED: default: iscsit_logout_post_handler_closesession(conn); break; } ret = 0; break; case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: if (conn->cid == cmd->logout_cid) { switch (cmd->logout_response) { case ISCSI_LOGOUT_SUCCESS: case ISCSI_LOGOUT_CLEANUP_FAILED: default: iscsit_logout_post_handler_samecid(conn); break; } ret = 0; } else { switch (cmd->logout_response) { case ISCSI_LOGOUT_SUCCESS: iscsit_logout_post_handler_diffcid(conn, cmd->logout_cid); break; case ISCSI_LOGOUT_CID_NOT_FOUND: case ISCSI_LOGOUT_CLEANUP_FAILED: default: break; } ret = 1; } break; case ISCSI_LOGOUT_REASON_RECOVERY: switch (cmd->logout_response) { case ISCSI_LOGOUT_SUCCESS: case ISCSI_LOGOUT_CID_NOT_FOUND: case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED: case ISCSI_LOGOUT_CLEANUP_FAILED: default: break; } ret = 1; break; default: break; } return ret; } void iscsit_fail_session(struct iscsi_session *sess) { struct iscsi_conn *conn; spin_lock_bh(&sess->conn_lock); list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n"); conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT; } spin_unlock_bh(&sess->conn_lock); pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); sess->session_state = TARG_SESS_STATE_FAILED; } int iscsit_free_session(struct iscsi_session *sess) { u16 conn_count = atomic_read(&sess->nconn); struct iscsi_conn *conn, *conn_tmp = NULL; int is_last; spin_lock_bh(&sess->conn_lock); atomic_set(&sess->sleep_on_sess_wait_comp, 1); list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, conn_list) { if (conn_count == 0) break; if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { is_last = 1; } else { iscsit_inc_conn_usage_count(conn_tmp); is_last = 0; } iscsit_inc_conn_usage_count(conn); spin_unlock_bh(&sess->conn_lock); iscsit_cause_connection_reinstatement(conn, 1); spin_lock_bh(&sess->conn_lock); iscsit_dec_conn_usage_count(conn); if (is_last == 0) iscsit_dec_conn_usage_count(conn_tmp); conn_count--; } if (atomic_read(&sess->nconn)) { spin_unlock_bh(&sess->conn_lock); wait_for_completion(&sess->session_wait_comp); } else spin_unlock_bh(&sess->conn_lock); iscsit_close_session(sess); return 0; } void iscsit_stop_session( struct iscsi_session *sess, int session_sleep, int connection_sleep) { u16 conn_count = atomic_read(&sess->nconn); struct iscsi_conn *conn, *conn_tmp = NULL; int is_last; spin_lock_bh(&sess->conn_lock); if (session_sleep) atomic_set(&sess->sleep_on_sess_wait_comp, 1); if (connection_sleep) { list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, conn_list) { if (conn_count == 0) break; if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { is_last = 1; } else { iscsit_inc_conn_usage_count(conn_tmp); is_last = 0; } iscsit_inc_conn_usage_count(conn); spin_unlock_bh(&sess->conn_lock); iscsit_cause_connection_reinstatement(conn, 1); spin_lock_bh(&sess->conn_lock); iscsit_dec_conn_usage_count(conn); if (is_last == 0) iscsit_dec_conn_usage_count(conn_tmp); conn_count--; } } else { list_for_each_entry(conn, &sess->sess_conn_list, conn_list) iscsit_cause_connection_reinstatement(conn, 0); } if (session_sleep && atomic_read(&sess->nconn)) { spin_unlock_bh(&sess->conn_lock); wait_for_completion(&sess->session_wait_comp); } else spin_unlock_bh(&sess->conn_lock); } int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) { struct iscsi_session *sess; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct se_session *se_sess, *se_sess_tmp; int session_count = 0; spin_lock_bh(&se_tpg->session_lock); if (tpg->nsessions && !force) { spin_unlock_bh(&se_tpg->session_lock); return -1; } list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, sess_list) { sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); continue; } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); spin_unlock_bh(&se_tpg->session_lock); iscsit_free_session(sess); spin_lock_bh(&se_tpg->session_lock); session_count++; } spin_unlock_bh(&se_tpg->session_lock); pr_debug("Released %d iSCSI Session(s) from Target Portal" " Group: %hu\n", session_count, tpg->tpgt); return 0; } MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure"); MODULE_VERSION("4.1.x"); MODULE_AUTHOR("nab@Linux-iSCSI.org"); MODULE_LICENSE("GPL"); module_init(iscsi_target_init_module); module_exit(iscsi_target_cleanup_module);