aboutsummaryrefslogblamecommitdiffstats
path: root/mm/fremap.c
blob: 07a9c82ce1a3fa0e24da2f2da23e9416e3f1819c (plain) (tree)
1
2
3
4
5
6
7
8






                                                                         
                              













                            
                                                                     



                                                        
                               

                                  
                                                          
                                                        



                                                      
                                                    
                                                 

                                                     



                                                                   
                                                              



         


                                                                    
                                                                             



                                                                       
                        
 
                                             
                 
                         
 

                                            

                                                       






                                                                                


                                   


                   


















                                                                                

                                                                            

                                                     

                                                      

                                                            



                                                                           



                                                                        
                                                                     

                                                                  

                                                                             
                                                                     







                                           
                 

























                                                                            
                                                
           





                                                                    
                                                



















                                                                       






                                                                        
                                                         

                                              

                                                             
                                                                        
                                   







                                                      








                                                                      




                                                          
                 

                                                      
 




                                                                
 
    







                                        
/*
 *   linux/mm/fremap.c
 * 
 * Explicit pagetable population and nonlinear (random) mappings support.
 *
 * started by Ingo Molnar, Copyright (C) 2002, 2003
 */
#include <linux/backing-dev.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/file.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swapops.h>
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/syscalls.h>

#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
			unsigned long addr, pte_t *ptep)
{
	pte_t pte = *ptep;

	if (pte_present(pte)) {
		struct page *page;

		flush_cache_page(vma, addr, pte_pfn(pte));
		pte = ptep_clear_flush(vma, addr, ptep);
		page = vm_normal_page(vma, addr, pte);
		if (page) {
			if (pte_dirty(pte))
				set_page_dirty(page);
			page_remove_rmap(page, vma);
			page_cache_release(page);
			update_hiwater_rss(mm);
			dec_mm_counter(mm, file_rss);
		}
	} else {
		if (!pte_file(pte))
			free_swap_and_cache(pte_to_swp_entry(pte));
		pte_clear_not_present_full(mm, addr, ptep, 0);
	}
}

/*
 * Install a file pte to a given virtual memory address, release any
 * previously existing mapping.
 */
static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
		unsigned long addr, unsigned long pgoff, pgprot_t prot)
{
	int err = -ENOMEM;
	pte_t *pte;
	spinlock_t *ptl;

	pte = get_locked_pte(mm, addr, &ptl);
	if (!pte)
		goto out;

	if (!pte_none(*pte))
		zap_pte(mm, vma, addr, pte);

	set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
	/*
	 * We don't need to run update_mmu_cache() here because the "file pte"
	 * being installed by install_file_pte() is not a real pte - it's a
	 * non-present entry (like a swap entry), noting what file offset should
	 * be mapped there when there's a fault (in a non-linear vma where
	 * that's not obvious).
	 */
	pte_unmap_unlock(pte, ptl);
	err = 0;
out:
	return err;
}

static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
			unsigned long addr, unsigned long size, pgoff_t pgoff)
{
	int err;

	do {
		err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
		if (err)
			return err;

		size -= PAGE_SIZE;
		addr += PAGE_SIZE;
		pgoff++;
	} while (size);

        return 0;

}

/**
 * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
 * @start: start of the remapped virtual memory range
 * @size: size of the remapped virtual memory range
 * @prot: new protection bits of the range (see NOTE)
 * @pgoff: to-be-mapped page of the backing store file
 * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
 *
 * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
 * (shared backing store file).
 *
 * This syscall works purely via pagetables, so it's the most efficient
 * way to map the same (large) file into a given virtual window. Unlike
 * mmap()/mremap() it does not create any new vmas. The new mappings are
 * also safe across swapout.
 *
 * NOTE: the @prot parameter right now is ignored (but must be zero),
 * and the vma's default protection is used. Arbitrary protections
 * might be implemented in the future.
 */
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
	unsigned long prot, unsigned long pgoff, unsigned long flags)
{
	struct mm_struct *mm = current->mm;
	struct address_space *mapping;
	unsigned long end = start + size;
	struct vm_area_struct *vma;
	int err = -EINVAL;
	int has_write_lock = 0;

	if (prot)
		return err;
	/*
	 * Sanitize the syscall parameters:
	 */
	start = start & PAGE_MASK;
	size = size & PAGE_MASK;

	/* Does the address range wrap, or is the span zero-sized? */
	if (start + size <= start)
		return err;

	/* Can we represent this offset inside this architecture's pte's? */
#if PTE_FILE_MAX_BITS < BITS_PER_LONG
	if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
		return err;
#endif

	/* We need down_write() to change vma->vm_flags. */
	down_read(&mm->mmap_sem);
 retry:
	vma = find_vma(mm, start);

	/*
	 * Make sure the vma is shared, that it supports prefaulting,
	 * and that the remapped range is valid and fully within
	 * the single existing vma.  vm_private_data is used as a
	 * swapout cursor in a VM_NONLINEAR vma.
	 */
	if (!vma || !(vma->vm_flags & VM_SHARED))
		goto out;

	if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
		goto out;

	if (!(vma->vm_flags & VM_CAN_NONLINEAR))
		goto out;

	if (end <= start || start < vma->vm_start || end > vma->vm_end)
		goto out;

	/* Must set VM_NONLINEAR before any pages are populated. */
	if (!(vma->vm_flags & VM_NONLINEAR)) {
		/* Don't need a nonlinear mapping, exit success */
		if (pgoff == linear_page_index(vma, start)) {
			err = 0;
			goto out;
		}

		if (!has_write_lock) {
			up_read(&mm->mmap_sem);
			down_write(&mm->mmap_sem);
			has_write_lock = 1;
			goto retry;
		}
		mapping = vma->vm_file->f_mapping;
		/*
		 * page_mkclean doesn't work on nonlinear vmas, so if
		 * dirty pages need to be accounted, emulate with linear
		 * vmas.
		 */
		if (mapping_cap_account_dirty(mapping)) {
			unsigned long addr;
			struct file *file = vma->vm_file;

			flags &= MAP_NONBLOCK;
			get_file(file);
			addr = mmap_region(file, start, size,
					flags, vma->vm_flags, pgoff, 1);
			fput(file);
			if (IS_ERR_VALUE(addr)) {
				err = addr;
			} else {
				BUG_ON(addr != start);
				err = 0;
			}
			goto out;
		}
		spin_lock(&mapping->i_mmap_lock);
		flush_dcache_mmap_lock(mapping);
		vma->vm_flags |= VM_NONLINEAR;
		vma_prio_tree_remove(vma, &mapping->i_mmap);
		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
		flush_dcache_mmap_unlock(mapping);
		spin_unlock(&mapping->i_mmap_lock);
	}

	err = populate_range(mm, vma, start, size, pgoff);
	if (!err && !(flags & MAP_NONBLOCK)) {
		if (unlikely(has_write_lock)) {
			downgrade_write(&mm->mmap_sem);
			has_write_lock = 0;
		}
		make_pages_present(start, start+size);
	}

	/*
	 * We can't clear VM_NONLINEAR because we'd have to do
	 * it after ->populate completes, and that would prevent
	 * downgrading the lock.  (Locks can't be upgraded).
	 */

out:
	if (likely(!has_write_lock))
		up_read(&mm->mmap_sem);
	else
		up_write(&mm->mmap_sem);

	return err;
}

hl kwd">likely(!list_empty(&shost->free_list))) { cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list); list_del_init(&cmd->list); } spin_unlock_irqrestore(&shost->free_list_lock, flags); } return cmd; } /* * Function: scsi_get_command() * * Purpose: Allocate and setup a scsi command block * * Arguments: dev - parent scsi device * gfp_mask- allocator flags * * Returns: The allocated scsi command structure. */ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int gfp_mask) { struct scsi_cmnd *cmd; /* Bail if we can't get a reference to the device */ if (!get_device(&dev->sdev_gendev)) return NULL; cmd = __scsi_get_command(dev->host, gfp_mask); if (likely(cmd != NULL)) { unsigned long flags; memset(cmd, 0, sizeof(*cmd)); cmd->device = dev; init_timer(&cmd->eh_timeout); INIT_LIST_HEAD(&cmd->list); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); spin_unlock_irqrestore(&dev->list_lock, flags); } else put_device(&dev->sdev_gendev); return cmd; } EXPORT_SYMBOL(scsi_get_command); /* * Function: scsi_put_command() * * Purpose: Free a scsi command block * * Arguments: cmd - command block to free * * Returns: Nothing. * * Notes: The command must not belong to any lists. */ void scsi_put_command(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; struct Scsi_Host *shost = sdev->host; unsigned long flags; /* serious error if the command hasn't come from a device list */ spin_lock_irqsave(&cmd->device->list_lock, flags); BUG_ON(list_empty(&cmd->list)); list_del_init(&cmd->list); spin_unlock(&cmd->device->list_lock); /* changing locks here, don't need to restore the irq state */ spin_lock(&shost->free_list_lock); if (unlikely(list_empty(&shost->free_list))) { list_add(&cmd->list, &shost->free_list); cmd = NULL; } spin_unlock_irqrestore(&shost->free_list_lock, flags); if (likely(cmd != NULL)) kmem_cache_free(shost->cmd_pool->slab, cmd); put_device(&sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_put_command); /* * Function: scsi_setup_command_freelist() * * Purpose: Setup the command freelist for a scsi host. * * Arguments: shost - host to allocate the freelist for. * * Returns: Nothing. */ int scsi_setup_command_freelist(struct Scsi_Host *shost) { struct scsi_host_cmd_pool *pool; struct scsi_cmnd *cmd; spin_lock_init(&shost->free_list_lock); INIT_LIST_HEAD(&shost->free_list); /* * Select a command slab for this host and create it if not * yet existant. */ down(&host_cmd_pool_mutex); pool = (shost->unchecked_isa_dma ? &scsi_cmd_dma_pool : &scsi_cmd_pool); if (!pool->users) { pool->slab = kmem_cache_create(pool->name, sizeof(struct scsi_cmnd), 0, pool->slab_flags, NULL, NULL); if (!pool->slab) goto fail; } pool->users++; shost->cmd_pool = pool; up(&host_cmd_pool_mutex); /* * Get one backup command for this host. */ cmd = kmem_cache_alloc(shost->cmd_pool->slab, GFP_KERNEL | shost->cmd_pool->gfp_mask); if (!cmd) goto fail2; list_add(&cmd->list, &shost->free_list); return 0; fail2: if (!--pool->users) kmem_cache_destroy(pool->slab); return -ENOMEM; fail: up(&host_cmd_pool_mutex); return -ENOMEM; } /* * Function: scsi_destroy_command_freelist() * * Purpose: Release the command freelist for a scsi host. * * Arguments: shost - host that's freelist is going to be destroyed */ void scsi_destroy_command_freelist(struct Scsi_Host *shost) { while (!list_empty(&shost->free_list)) { struct scsi_cmnd *cmd; cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list); list_del_init(&cmd->list); kmem_cache_free(shost->cmd_pool->slab, cmd); } down(&host_cmd_pool_mutex); if (!--shost->cmd_pool->users) kmem_cache_destroy(shost->cmd_pool->slab); up(&host_cmd_pool_mutex); } #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd) { unsigned int level; struct scsi_device *sdev; /* * If ML QUEUE log level is greater than or equal to: * * 1: nothing (match completion) * * 2: log opcode + command of all commands * * 3: same as 2 plus dump cmd address * * 4: same as 3 plus dump extra junk */ if (unlikely(scsi_logging_level)) { level = SCSI_LOG_LEVEL(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS); if (level > 1) { sdev = cmd->device; printk(KERN_INFO "scsi <%d:%d:%d:%d> send ", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); if (level > 2) printk("0x%p ", cmd); /* * spaces to match disposition and cmd->result * output in scsi_log_completion. */ printk(" "); scsi_print_command(cmd); if (level > 3) { printk(KERN_INFO "buffer = 0x%p, bufflen = %d," " done = 0x%p, queuecommand 0x%p\n", cmd->buffer, cmd->bufflen, cmd->done, sdev->host->hostt->queuecommand); } } } } void scsi_log_completion(struct scsi_cmnd *cmd, int disposition) { unsigned int level; struct scsi_device *sdev; /* * If ML COMPLETE log level is greater than or equal to: * * 1: log disposition, result, opcode + command, and conditionally * sense data for failures or non SUCCESS dispositions. * * 2: same as 1 but for all command completions. * * 3: same as 2 plus dump cmd address * * 4: same as 3 plus dump extra junk */ if (unlikely(scsi_logging_level)) { level = SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS); if (((level > 0) && (cmd->result || disposition != SUCCESS)) || (level > 1)) { sdev = cmd->device; printk(KERN_INFO "scsi <%d:%d:%d:%d> done ", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); if (level > 2) printk("0x%p ", cmd); /* * Dump truncated values, so we usually fit within * 80 chars. */ switch (disposition) { case SUCCESS: printk("SUCCESS"); break; case NEEDS_RETRY: printk("RETRY "); break; case ADD_TO_MLQUEUE: printk("MLQUEUE"); break; case FAILED: printk("FAILED "); break; case TIMEOUT_ERROR: /* * If called via scsi_times_out. */ printk("TIMEOUT"); break; default: printk("UNKNOWN"); } printk(" %8x ", cmd->result); scsi_print_command(cmd); if (status_byte(cmd->result) & CHECK_CONDITION) { /* * XXX The scsi_print_sense formatting/prefix * doesn't match this function. */ scsi_print_sense("", cmd); } if (level > 3) { printk(KERN_INFO "scsi host busy %d failed %d\n", sdev->host->host_busy, sdev->host->host_failed); } } } } #endif /* * Assign a serial number and pid to the request for error recovery * and debugging purposes. Protected by the Host_Lock of host. */ static inline void scsi_cmd_get_serial(struct Scsi_Host *host, struct scsi_cmnd *cmd) { cmd->serial_number = host->cmd_serial_number++; if (cmd->serial_number == 0) cmd->serial_number = host->cmd_serial_number++; cmd->pid = host->cmd_pid++; if (cmd->pid == 0) cmd->pid = host->cmd_pid++; } /* * Function: scsi_dispatch_command * * Purpose: Dispatch a command to the low-level driver. * * Arguments: cmd - command block we are dispatching. * * Notes: */ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; unsigned long flags = 0; unsigned long timeout; int rtn = 0; /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; atomic_inc(&cmd->device->iorequest_cnt); __scsi_done(cmd); /* return 0 (because the command has been processed) */ goto out; } /* Check to see if the scsi lld put this device into state SDEV_BLOCK. */ if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) { /* * in SDEV_BLOCK, the command is just put back on the device * queue. The suspend state has already blocked the queue so * future requests should not occur until the device * transitions out of the suspend state. */ scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); /* * NOTE: rtn is still zero here because we don't need the * queue to be plugged on return (it's already stopped) */ goto out; } /* * If SCSI-2 or lower, store the LUN value in cmnd. */ if (cmd->device->scsi_level <= SCSI_2) { cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) | (cmd->device->lun << 5 & 0xe0); } /* * We will wait MIN_RESET_DELAY clock ticks after the last reset so * we can avoid the drive not being ready. */ timeout = host->last_reset + MIN_RESET_DELAY; if (host->resetting && time_before(jiffies, timeout)) { int ticks_remaining = timeout - jiffies; /* * NOTE: This may be executed from within an interrupt * handler! This is bad, but for now, it'll do. The irq * level of the interrupt handler has been masked out by the * platform dependent interrupt handling code already, so the * sti() here will not cause another call to the SCSI host's * interrupt handler (assuming there is one irq-level per * host). */ while (--ticks_remaining >= 0) mdelay(1 + 999 / HZ); host->resetting = 0; } /* * AK: unlikely race here: for some reason the timer could * expire before the serial number is set up below. */ scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out); scsi_log_send(cmd); /* * We will use a queued command if possible, otherwise we will * emulate the queuing and calling of completion function ourselves. */ atomic_inc(&cmd->device->iorequest_cnt); /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. */ if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) { SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); cmd->result = (DID_ABORT << 16); scsi_done(cmd); goto out; } spin_lock_irqsave(host->host_lock, flags); scsi_cmd_get_serial(host, cmd); if (unlikely(test_bit(SHOST_CANCEL, &host->shost_state))) { cmd->result = (DID_NO_CONNECT << 16); scsi_done(cmd); } else { rtn = host->hostt->queuecommand(cmd, scsi_done); } spin_unlock_irqrestore(host->host_lock, flags); if (rtn) { if (scsi_delete_timer(cmd)) { atomic_inc(&cmd->device->iodone_cnt); scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? rtn : SCSI_MLQUEUE_HOST_BUSY); } SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } out: SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; } /* * Function: scsi_init_cmd_from_req * * Purpose: Queue a SCSI command * Purpose: Initialize a struct scsi_cmnd from a struct scsi_request * * Arguments: cmd - command descriptor. * sreq - Request from the queue. * * Lock status: None needed. * * Returns: Nothing. * * Notes: Mainly transfer data from the request structure to the * command structure. The request structure is allocated * using the normal memory allocator, and requests can pile * up to more or less any depth. The command structure represents * a consumable resource, as these are allocated into a pool * when the SCSI subsystem initializes. The preallocation is * required so that in low-memory situations a disk I/O request * won't cause the memory manager to try and write out a page. * The request structure is generally used by ioctls and character * devices. */ void scsi_init_cmd_from_req(struct scsi_cmnd *cmd, struct scsi_request *sreq) { sreq->sr_command = cmd; cmd->cmd_len = sreq->sr_cmd_len; cmd->use_sg = sreq->sr_use_sg; cmd->request = sreq->sr_request; memcpy(cmd->data_cmnd, sreq->sr_cmnd, sizeof(cmd->data_cmnd)); cmd->serial_number = 0; cmd->bufflen = sreq->sr_bufflen; cmd->buffer = sreq->sr_buffer; cmd->retries = 0; cmd->allowed = sreq->sr_allowed; cmd->done = sreq->sr_done; cmd->timeout_per_command = sreq->sr_timeout_per_command; cmd->sc_data_direction = sreq->sr_data_direction; cmd->sglist_len = sreq->sr_sglist_len; cmd->underflow = sreq->sr_underflow; cmd->sc_request = sreq; memcpy(cmd->cmnd, sreq->sr_cmnd, sizeof(sreq->sr_cmnd)); /* * Zero the sense buffer. Some host adapters automatically request * sense on error. 0 is not a valid sense code. */ memset(cmd->sense_buffer, 0, sizeof(sreq->sr_sense_buffer)); cmd->request_buffer = sreq->sr_buffer; cmd->request_bufflen = sreq->sr_bufflen; cmd->old_use_sg = cmd->use_sg; if (cmd->cmd_len == 0) cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]); cmd->old_cmd_len = cmd->cmd_len; cmd->sc_old_data_direction = cmd->sc_data_direction; cmd->old_underflow = cmd->underflow; /* * Start the timer ticking. */ cmd->result = 0; SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n")); } /* * Per-CPU I/O completion queue. */ static DEFINE_PER_CPU(struct list_head, scsi_done_q); /** * scsi_done - Enqueue the finished SCSI command into the done queue. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives * ownership back to SCSI Core -- i.e. the LLDD has finished with it. * * This function is the mid-level's (SCSI Core) interrupt routine, which * regains ownership of the SCSI command (de facto) from a LLDD, and enqueues * the command to the done queue for further processing. * * This is the producer of the done queue who enqueues at the tail. * * This function is interrupt context safe. */ static void scsi_done(struct scsi_cmnd *cmd) { /* * We don't have to worry about this one timing out any more. * If we are unable to remove the timer, then the command * has already timed out. In which case, we have no choice but to * let the timeout function run, as we have no idea where in fact * that function could really be. It might be on another processor, * etc, etc. */ if (!scsi_delete_timer(cmd)) return; __scsi_done(cmd); } /* Private entry to scsi_done() to complete a command when the timer * isn't running --- used by scsi_times_out */ void __scsi_done(struct scsi_cmnd *cmd) { unsigned long flags; /* * Set the serial numbers back to zero */ cmd->serial_number = 0; atomic_inc(&cmd->device->iodone_cnt); if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); /* * Next, enqueue the command into the done queue. * It is a per-CPU queue, so we just disable local interrupts * and need no spinlock. */ local_irq_save(flags); list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_restore(flags); } /** * scsi_softirq - Perform post-interrupt processing of finished SCSI commands. * * This is the consumer of the done queue. * * This is called with all interrupts enabled. This should reduce * interrupt latency, stack depth, and reentrancy of the low-level * drivers. */ static void scsi_softirq(struct softirq_action *h) { int disposition; LIST_HEAD(local_q); local_irq_disable(); list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); local_irq_enable(); while (!list_empty(&local_q)) { struct scsi_cmnd *cmd = list_entry(local_q.next, struct scsi_cmnd, eh_entry); list_del_init(&cmd->eh_entry); disposition = scsi_decide_disposition(cmd); scsi_log_completion(cmd, disposition); switch (disposition) { case SUCCESS: scsi_finish_command(cmd); break; case NEEDS_RETRY: scsi_retry_command(cmd); break; case ADD_TO_MLQUEUE: scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); break; default: if (!scsi_eh_scmd_add(cmd, 0)) scsi_finish_command(cmd); } } } /* * Function: scsi_retry_command * * Purpose: Send a command back to the low level to be retried. * * Notes: This command is always executed in the context of the * bottom half handler, or the error handler thread. Low * level drivers should not become re-entrant as a result of * this. */ static int scsi_retry_command(struct scsi_cmnd *cmd) { /* * Restore the SCSI command state. */ scsi_setup_cmd_retry(cmd); /* * Zero the sense information from the last time we tried * this command. */ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer)); return scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY); } /* * Function: scsi_finish_command * * Purpose: Pass command off to upper layer for finishing of I/O * request, waking processes that are waiting on results, * etc. */ void scsi_finish_command(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; struct Scsi_Host *shost = sdev->host; struct scsi_request *sreq; scsi_device_unbusy(sdev); /* * Clear the flags which say that the device/host is no longer * capable of accepting new commands. These are set in scsi_queue.c * for both the queue full condition on a device, and for a * host full condition on the host. * * XXX(hch): What about locking? */ shost->host_blocked = 0; sdev->device_blocked = 0; /* * If we have valid sense information, then some kind of recovery * must have taken place. Make a note of this. */ if (SCSI_SENSE_VALID(cmd)) cmd->result |= (DRIVER_SENSE << 24); SCSI_LOG_MLCOMPLETE(4, printk("Notifying upper driver of completion " "for device %d %x\n", sdev->id, cmd->result)); /* * We can get here with use_sg=0, causing a panic in the upper level */ cmd->use_sg = cmd->old_use_sg; /* * If there is an associated request structure, copy the data over * before we call the completion function. */ sreq = cmd->sc_request; if (sreq) { sreq->sr_result = sreq->sr_command->result; if (sreq->sr_result) { memcpy(sreq->sr_sense_buffer, sreq->sr_command->sense_buffer, sizeof(sreq->sr_sense_buffer)); } } cmd->done(cmd); } EXPORT_SYMBOL(scsi_finish_command); /* * Function: scsi_adjust_queue_depth() * * Purpose: Allow low level drivers to tell us to change the queue depth * on a specific SCSI device * * Arguments: sdev - SCSI Device in question * tagged - Do we use tagged queueing (non-0) or do we treat * this device as an untagged device (0) * tags - Number of tags allowed if tagged queueing enabled, * or number of commands the low level driver can * queue up in non-tagged mode (as per cmd_per_lun). * * Returns: Nothing * * Lock Status: None held on entry * * Notes: Low level drivers may call this at any time and we will do * the right thing depending on whether or not the device is * currently active and whether or not it even has the * command blocks built yet. */ void scsi_adjust_queue_depth(struct scsi_device *sdev, int tagged, int tags) { unsigned long flags; /* * refuse to set tagged depth to an unworkable size */ if (tags <= 0) return; spin_lock_irqsave(sdev->request_queue->queue_lock, flags); /* Check to see if the queue is managed by the block layer * if it is, and we fail to adjust the depth, exit */ if (blk_queue_tagged(sdev->request_queue) && blk_queue_resize_tags(sdev->request_queue, tags) != 0) goto out; sdev->queue_depth = tags; switch (tagged) { case MSG_ORDERED_TAG: sdev->ordered_tags = 1; sdev->simple_tags = 1; break; case MSG_SIMPLE_TAG: sdev->ordered_tags = 0; sdev->simple_tags = 1; break; default: printk(KERN_WARNING "(scsi%d:%d:%d:%d) " "scsi_adjust_queue_depth, bad queue type, " "disabled\n", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); case 0: sdev->ordered_tags = sdev->simple_tags = 0; sdev->queue_depth = tags; break; } out: spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags); } EXPORT_SYMBOL(scsi_adjust_queue_depth); /* * Function: scsi_track_queue_full() * * Purpose: This function will track successive QUEUE_FULL events on a * specific SCSI device to determine if and when there is a * need to adjust the queue depth on the device. * * Arguments: sdev - SCSI Device in question * depth - Current number of outstanding SCSI commands on * this device, not counting the one returned as * QUEUE_FULL. * * Returns: 0 - No change needed * >0 - Adjust queue depth to this new depth * -1 - Drop back to untagged operation using host->cmd_per_lun * as the untagged command depth * * Lock Status: None held on entry * * Notes: Low level drivers may call this at any time and we will do * "The Right Thing." We are interrupt context safe. */ int scsi_track_queue_full(struct scsi_device *sdev, int depth) { if ((jiffies >> 4) == sdev->last_queue_full_time) return 0; sdev->last_queue_full_time = (jiffies >> 4); if (sdev->last_queue_full_depth != depth) { sdev->last_queue_full_count = 1; sdev->last_queue_full_depth = depth; } else { sdev->last_queue_full_count++; } if (sdev->last_queue_full_count <= 10) return 0; if (sdev->last_queue_full_depth < 8) { /* Drop back to untagged */ scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); return -1; } if (sdev->ordered_tags) scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth); else scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); return depth; } EXPORT_SYMBOL(scsi_track_queue_full); /** * scsi_device_get - get an addition reference to a scsi_device * @sdev: device to get a reference to * * Gets a reference to the scsi_device and increments the use count * of the underlying LLDD module. You must hold host_lock of the * parent Scsi_Host or already have a reference when calling this. */ int scsi_device_get(struct scsi_device *sdev) { if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL) return -ENXIO; if (!get_device(&sdev->sdev_gendev)) return -ENXIO; if (!try_module_get(sdev->host->hostt->module)) { put_device(&sdev->sdev_gendev); return -ENXIO; } return 0; } EXPORT_SYMBOL(scsi_device_get); /** * scsi_device_put - release a reference to a scsi_device * @sdev: device to release a reference on. * * Release a reference to the scsi_device and decrements the use count * of the underlying LLDD module. The device is freed once the last * user vanishes. */ void scsi_device_put(struct scsi_device *sdev) { module_put(sdev->host->hostt->module); put_device(&sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_device_put); /* helper for shost_for_each_device, thus not documented */ struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *shost, struct scsi_device *prev) { struct list_head *list = (prev ? &prev->siblings : &shost->__devices); struct scsi_device *next = NULL; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); while (list->next != &shost->__devices) { next = list_entry(list->next, struct scsi_device, siblings); /* skip devices that we can't get a reference to */ if (!scsi_device_get(next)) break; next = NULL; list = list->next; } spin_unlock_irqrestore(shost->host_lock, flags); if (prev) scsi_device_put(prev); return next; } EXPORT_SYMBOL(__scsi_iterate_devices); /** * starget_for_each_device - helper to walk all devices of a target * @starget: target whose devices we want to iterate over. * * This traverses over each devices of @shost. The devices have * a reference that must be released by scsi_host_put when breaking * out of the loop. */ void starget_for_each_device(struct scsi_target *starget, void * data, void (*fn)(struct scsi_device *, void *)) { struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct scsi_device *sdev; shost_for_each_device(sdev, shost) { if ((sdev->channel == starget->channel) && (sdev->id == starget->id)) fn(sdev, data); } } EXPORT_SYMBOL(starget_for_each_device); /** * __scsi_device_lookup_by_target - find a device given the target (UNLOCKED) * @starget: SCSI target pointer * @lun: SCSI Logical Unit Number * * Looks up the scsi_device with the specified @lun for a give * @starget. The returned scsi_device does not have an additional * reference. You must hold the host's host_lock over this call and * any access to the returned scsi_device. * * Note: The only reason why drivers would want to use this is because * they're need to access the device list in irq context. Otherwise you * really want to use scsi_device_lookup_by_target instead. **/ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget, uint lun) { struct scsi_device *sdev; list_for_each_entry(sdev, &starget->devices, same_target_siblings) { if (sdev->lun ==lun) return sdev; } return NULL; } EXPORT_SYMBOL(__scsi_device_lookup_by_target); /** * scsi_device_lookup_by_target - find a device given the target * @starget: SCSI target pointer * @lun: SCSI Logical Unit Number * * Looks up the scsi_device with the specified @channel, @id, @lun for a * give host. The returned scsi_device has an additional reference that * needs to be release with scsi_host_put once you're done with it. **/ struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget, uint lun) { struct scsi_device *sdev; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); sdev = __scsi_device_lookup_by_target(starget, lun); if (sdev && scsi_device_get(sdev)) sdev = NULL; spin_unlock_irqrestore(shost->host_lock, flags); return sdev; } EXPORT_SYMBOL(scsi_device_lookup_by_target); /** * scsi_device_lookup - find a device given the host (UNLOCKED) * @shost: SCSI host pointer * @channel: SCSI channel (zero if only one channel) * @pun: SCSI target number (physical unit number) * @lun: SCSI Logical Unit Number * * Looks up the scsi_device with the specified @channel, @id, @lun for a * give host. The returned scsi_device does not have an additional reference. * You must hold the host's host_lock over this call and any access to the * returned scsi_device. * * Note: The only reason why drivers would want to use this is because * they're need to access the device list in irq context. Otherwise you * really want to use scsi_device_lookup instead. **/ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost, uint channel, uint id, uint lun) { struct scsi_device *sdev; list_for_each_entry(sdev, &shost->__devices, siblings) { if (sdev->channel == channel && sdev->id == id && sdev->lun ==lun) return sdev; } return NULL; } EXPORT_SYMBOL(__scsi_device_lookup); /** * scsi_device_lookup - find a device given the host * @shost: SCSI host pointer * @channel: SCSI channel (zero if only one channel) * @id: SCSI target number (physical unit number) * @lun: SCSI Logical Unit Number * * Looks up the scsi_device with the specified @channel, @id, @lun for a * give host. The returned scsi_device has an additional reference that * needs to be release with scsi_host_put once you're done with it. **/ struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost, uint channel, uint id, uint lun) { struct scsi_device *sdev; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); sdev = __scsi_device_lookup(shost, channel, id, lun); if (sdev && scsi_device_get(sdev)) sdev = NULL; spin_unlock_irqrestore(shost->host_lock, flags); return sdev; } EXPORT_SYMBOL(scsi_device_lookup); /** * scsi_device_cancel - cancel outstanding IO to this device * @sdev: Pointer to struct scsi_device * @recovery: Boolean instructing function to recover device or not. * **/ int scsi_device_cancel(struct scsi_device *sdev, int recovery) { struct scsi_cmnd *scmd; LIST_HEAD(active_list); struct list_head *lh, *lh_sf; unsigned long flags; scsi_device_set_state(sdev, SDEV_CANCEL); spin_lock_irqsave(&sdev->list_lock, flags); list_for_each_entry(scmd, &sdev->cmd_list, list) { if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) { /* * If we are unable to remove the timer, it means * that the command has already timed out or * finished. */ if (!scsi_delete_timer(scmd)) continue; list_add_tail(&scmd->eh_entry, &active_list); } } spin_unlock_irqrestore(&sdev->list_lock, flags); if (!list_empty(&active_list)) { list_for_each_safe(lh, lh_sf, &active_list) { scmd = list_entry(lh, struct scsi_cmnd, eh_entry); list_del_init(lh); if (recovery) { scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD); } else { scmd->result = (DID_ABORT << 16); scsi_finish_command(scmd); } } } return 0; } EXPORT_SYMBOL(scsi_device_cancel); #ifdef CONFIG_HOTPLUG_CPU static int scsi_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int cpu = (unsigned long)hcpu; switch(action) { case CPU_DEAD: /* Drain scsi_done_q. */ local_irq_disable(); list_splice_init(&per_cpu(scsi_done_q, cpu), &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_enable(); break; default: break; } return NOTIFY_OK; } static struct notifier_block __devinitdata scsi_cpu_nb = { .notifier_call = scsi_cpu_notify, }; #define register_scsi_cpu() register_cpu_notifier(&scsi_cpu_nb) #define unregister_scsi_cpu() unregister_cpu_notifier(&scsi_cpu_nb) #else #define register_scsi_cpu() #define unregister_scsi_cpu() #endif /* CONFIG_HOTPLUG_CPU */ MODULE_DESCRIPTION("SCSI core"); MODULE_LICENSE("GPL"); module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels"); static int __init init_scsi(void) { int error, i; error = scsi_init_queue(); if (error) return error; error = scsi_init_procfs(); if (error) goto cleanup_queue; error = scsi_init_devinfo(); if (error) goto cleanup_procfs; error = scsi_init_hosts(); if (error) goto cleanup_devlist; error = scsi_init_sysctl(); if (error) goto cleanup_hosts; error = scsi_sysfs_register(); if (error) goto cleanup_sysctl; for (i = 0; i < NR_CPUS; i++) INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); devfs_mk_dir("scsi"); open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL); register_scsi_cpu(); printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; cleanup_sysctl: scsi_exit_sysctl(); cleanup_hosts: scsi_exit_hosts(); cleanup_devlist: scsi_exit_devinfo(); cleanup_procfs: scsi_exit_procfs(); cleanup_queue: scsi_exit_queue(); printk(KERN_ERR "SCSI subsystem failed to initialize, error = %d\n", -error); return error; } static void __exit exit_scsi(void) { scsi_sysfs_unregister(); scsi_exit_sysctl(); scsi_exit_hosts(); scsi_exit_devinfo(); devfs_remove("scsi"); scsi_exit_procfs(); scsi_exit_queue(); unregister_scsi_cpu(); } subsys_initcall(init_scsi); module_exit(exit_scsi);