博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Linux3.10.0块IO子系统流程(3)-- SCSI策略例程
阅读量:6420 次
发布时间:2019-06-23

本文共 11770 字,大约阅读时间需要 39 分钟。

很长时间以来,Linux块设备使用了一种称为“蓄流/泄流”(plugging/unplugging)的技术来改进吞吐率。简单而言,这种工作方式类似浴盆排水系统的塞子。当IO被提交时,它被储存在一个队列,稍后的某个时间,我们才允许IO从队列派发出去。之所以这么做是为IO尽可能做合并和排序。

1 static void scsi_request_fn(struct request_queue *q)  2 {  3     struct scsi_device *sdev = q->queuedata;  4     struct Scsi_Host *shost;  5     struct scsi_cmnd *cmd;  6     struct request *req;  7     if(!get_device(&sdev->sdev_gendev))  8         /* We must be tearing the block queue down already */  9         return; 10     /* 11      * To start with, we keep looping until the queue is empty, or until 12      * the host is no longer able to accept any more requests. 13      */ 14     shost = sdev->host; 15     for (;;) { 16         int rtn; 17         /* 18          * get next queueable request.  We do this early to make sure 19          * that the request is fully prepared even if we cannot 20          * accept it. 21          */ 22         req = blk_peek_request(q);    // 获得下一个可排队的请求,如果没有请求或者现在还不能想SCSI设备发送请求,则退出循环 23         if (!req || !scsi_dev_queue_ready(q, sdev)) 24             break; 25         /* 如果设备已经离线,则输出错误消息, 调用scsi_kill_request函数释放请求,并以此方式处理后面所有的请求 */ 26         if (unlikely(!scsi_device_online(sdev))) { 27             sdev_printk(KERN_ERR, sdev, 28                     "rejecting I/O to offline device\n"); 29             scsi_kill_request(req, q); 30             continue; 31         } 32         /* 33          * Remove the request from the request list. 34          * 如果队列不是使用generic tag queueing,并且没有为请求启动tagged操作,调用blk_start_request开始由驱动处理请求,这个函数将请求从队列中取出,为它启动超时定时器 35          */ 36         if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))    //  37             blk_start_request(req); 38         sdev->device_busy++; 39         spin_unlock(q->queue_lock); 40         /* 从块设备驱动层请求描述符的special域获得SCSI命令描述符,这是在之前的blk_peek_request函数中调用请求队列的prep_rq_fn回调函数准备的 */ 41         cmd = req->special; 42         if (unlikely(cmd == NULL)) { 43             printk(KERN_CRIT "impossible request in %s.\n" 44                      "please mail a stack trace to " 45                      "linux-scsi@vger.kernel.org\n", 46                      __func__); 47             blk_dump_rq_flags(req, "foo"); 48             BUG(); 49         } 50         spin_lock(shost->host_lock); 51         /* 52          * We hit this when the driver is using a host wide 53          * tag map. For device level tag maps the queue_depth check 54          * in the device ready fn would prevent us from trying 55          * to allocate a tag. Since the map is a shared host resource 56          * we add the dev to the starved list so it eventually gets 57          * a run when a tag is freed. 58          */ 59         if (blk_queue_tagged(q) && !blk_rq_tagged(req)) { 60             if (list_empty(&sdev->starved_entry)) 61                 list_add_tail(&sdev->starved_entry, 62                           &shost->starved_list); 63             goto not_ready; 64         } 65         if (!scsi_target_queue_ready(shost, sdev)) 66             goto not_ready; 67         if (!scsi_host_queue_ready(q, shost, sdev)) 68             goto not_ready; 69         scsi_target(sdev)->target_busy++; 70         shost->host_busy++; 71         /* 72          * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will 73          *        take the lock again. 74          */ 75         spin_unlock_irq(shost->host_lock); 76         /* 77          * Finally, initialize any error handling parameters, and set up the timers for timeouts. 78          * 初始化错误处理参数, 设置超时定时器 79          */ 80         scsi_init_cmd_errh(cmd); 81         /* 82          * Dispatch the command to the low-level driver. 83          * 将命令派发到底层驱动 84          */ 85         rtn = scsi_dispatch_cmd(cmd); 86         spin_lock_irq(q->queue_lock); 87         if (rtn) 88             goto out_delay; 89     } 90     goto out; 91  92 not_ready: 93     spin_unlock_irq(shost->host_lock); 94     /* 95      * lock q, handle tag, requeue req, and decrement device_busy. We 96      * must return with queue_lock held. 97      * 98      * Decrementing device_busy without checking it is OK, as all such 99      * cases (host limits or settings) should run the queue at some100      * later time.101      */102     spin_lock_irq(q->queue_lock);103     blk_requeue_request(q, req);104     sdev->device_busy--;105 out_delay:106     if (sdev->device_busy == 0)107         blk_delay_queue(q, SCSI_QUEUE_DELAY);108 out:109     /* must be careful here...if we trigger the ->remove() function110      * we cannot be holding the q lock */111     spin_unlock_irq(q->queue_lock);112     put_device(&sdev->sdev_gendev);113     spin_lock_irq(q->queue_lock);114 }

 

blk_peek_request从请求队列“顶部”取得下一个请求。函数的实现就是一个大循环,每次调用__elv_next_request从电梯队列中取出一个请求进行处理

1 /**  2 * blk_peek_request - peek at the top of a request queue  3 * @q: request queue to peek at  4 *  5 * Description:  6 *     Return the request at the top of @q.  The returned request  7 *     should be started using blk_start_request() before LLD starts  8 *     processing it.  9 * 10 * Return: 11 *     Pointer to the request at the top of @q if available.  Null 12 *     otherwise. 13 * 14 * Context: 15 *     queue_lock must be held. 16 */ 17 struct request *blk_peek_request(struct request_queue *q) 18 { 19     struct request *rq; 20     int ret; 21  22     while ((rq = __elv_next_request(q)) != NULL) { 23  24         rq = blk_pm_peek_request(q, rq); 25         if (!rq) 26             break; 27         /* 请求可能是全新的或者是由于暂时不能处理而重新排入队列的,对于后一种情况,必然设置了REQ_STARTED标志。 28           * 换句话说,如果没有该标志,则表示第一次看见此请求,如果请求被插入还需要排序,则调用elv_activate_rq函数确定合适执行该请求 29           */ 30         if (!(rq->cmd_flags & REQ_STARTED)) { 31             /* 32              * This is the first time the device driver 33              * sees this request (possibly after 34              * requeueing).  Notify IO scheduler. 35              */ 36             if (rq->cmd_flags & REQ_SORTED) 37                 elv_activate_rq(q, rq); 38  39             /* 40              * just mark as started even if we don't start 41              * it, a request that has been delayed should 42              * not be passed by new incoming requests 43              */ 44             rq->cmd_flags |= REQ_STARTED; 45             trace_block_rq_issue(q, rq); 46         } 47         /* 配合IO调度器 */ 48         if (!q->boundary_rq || q->boundary_rq == rq) { 49             q->end_sector = rq_end_sector(rq); 50             q->boundary_rq = NULL; 51         } 52  53         /* 如果请求队列设置了REQ_DONTPREP,表明不需要准备SCSI命令,退出循环,向调用者返回这个请求 */ 54         if (rq->cmd_flags & REQ_DONTPREP) 55             break; 56  57         /*  58           * 如果请求队列的dma_drain_size不为0,说明存在“过剩DMA”问题,这种情况下,需要为请求增加一个额外的段 59           * 以便将来在聚散列表后追加“抽干缓冲区” 60           */ 61         if (q->dma_drain_size && blk_rq_bytes(rq)) { 62             /* 63              * make sure space for the drain appears we 64              * know we can do this because max_hw_segments 65              * has been adjusted to be one fewer than the 66              * device can handle 67              */ 68             rq->nr_phys_segments++; 69         } 70         /*  71           * 如果没有定义 prep_rq_fn回调,则返回 72           * 否则调用回调为请求准备SCSI命令描述符,它有三种返回值: 73           *     BLKPREP_OK:表示命令初期准备成功 74           *     BLKPREP_DEFER:表示暂时还不能继续处理,需要将命令重新排入队列 75           *     BLKPREP_KILL:该请求没办法继续处理,上上层报告IO错误,这里不退出循环,而是继续尝试下一个请求 76           */ 77         if (!q->prep_rq_fn) 78             break; 79  80         ret = q->prep_rq_fn(q, rq); 81         if (ret == BLKPREP_OK) { 82             break; 83         } else if (ret == BLKPREP_DEFER) { 84             /* 85              * the request may have been (partially) prepped. 86              * we need to keep this request in the front to 87              * avoid resource deadlock.  REQ_STARTED will 88              * prevent other fs requests from passing this one. 89              */ 90             if (q->dma_drain_size && blk_rq_bytes(rq) && 91                 !(rq->cmd_flags & REQ_DONTPREP)) { 92                 /* 93                  * remove the space for the drain we added 94                  * so that we don't add it again 95                  */ 96                 --rq->nr_phys_segments; 97             } 98  99             rq = NULL;100             break;101         } else if (ret == BLKPREP_KILL) {102             rq->cmd_flags |= REQ_QUIET;103             /*104              * Mark this request as started so we don't trigger105              * any debug logic in the end I/O path.106              */107             blk_start_request(rq);108             __blk_end_request_all(rq, -EIO);109         } else {110             printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);111             break;112         }113     }114 115     return rq;116 }

 

请求队列中的prep_rq_fn回调函数实现了从请求构造SCSI命令的方法,prep_rq_fn回调函数关键有两个任务:
  1. 构造命令描述块
  2. 如果需要的话为数据传输准备聚散列表
命令描述块和聚散列表都被封装到SCSI命令描述符中,我们知道,请求至少有两个来源
  1. 来自上层bio
  2. 来自SCSI公共服务层
在刚找到SCSI设备为其初始化请求队列时,这个回调函数被设置为scsi_prep_fn
 
1 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) 2 { 3     struct request_queue *q; 4  5     q = __scsi_alloc_queue(sdev->host, scsi_request_fn); 6     if (!q) 7         return NULL; 8  9     blk_queue_prep_rq(q, scsi_prep_fn);10     blk_queue_softirq_done(q, scsi_softirq_done);11     blk_queue_rq_timed_out(q, scsi_times_out);12     blk_queue_lld_busy(q, scsi_lld_busy);13     return q;14 }15 16 /**17 * blk_queue_prep_rq - set a prepare_request function for queue18 * @q:        queue19 * @pfn:    prepare_request function20 *21 * It's possible for a queue to register a prepare_request callback which22 * is invoked before the request is handed to the request_fn. The goal of23 * the function is to prepare a request for I/O, it can be used to build a24 * cdb from the request data for instance.25 *26 */27 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)28 {29     q->prep_rq_fn = pfn;30 }
初始化回调

 

如果SCSI设备被高层驱动绑定,这个回调函数会被修改,例如,在sd_probe中被设置成sd_prep_fn
 
1 static void sd_probe_async(void *data, async_cookie_t cookie) 2 { 3     struct scsi_disk *sdkp = data; 4     struct scsi_device *sdp; 5     struct gendisk *gd; 6     u32 index; 7     struct device *dev; 8  9     sdp = sdkp->device;10     gd = sdkp->disk;11     index = sdkp->index;12     dev = &sdp->sdev_gendev;13 14     gd->major = sd_major((index & 0xf0) >> 4);15     gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);16     gd->minors = SD_MINORS;17 18     gd->fops = &sd_fops;19     gd->private_data = &sdkp->driver;20     gd->queue = sdkp->device->request_queue;21 22     /* defaults, until the device tells us otherwise */23     sdp->sector_size = 512;24     sdkp->capacity = 0;25     sdkp->media_present = 1;26     sdkp->write_prot = 0;27     sdkp->cache_override = 0;28     sdkp->WCE = 0;29     sdkp->RCD = 0;30     sdkp->ATO = 0;31     sdkp->first_scan = 1;32     sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS;33 34     sd_revalidate_disk(gd);35 36     blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);37     blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);38 39     gd->driverfs_dev = &sdp->sdev_gendev;40     gd->flags = GENHD_FL_EXT_DEVT;41     if (sdp->removable) {42         gd->flags |= GENHD_FL_REMOVABLE;43         gd->events |= DISK_EVENT_MEDIA_CHANGE;44     }45 46     add_disk(gd);47     if (sdkp->capacity)48         sd_dif_config_host(sdkp);49 50     sd_revalidate_disk(gd);51 52     sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",53           sdp->removable ? "removable " : "");54     blk_pm_runtime_init(sdp->request_queue, dev);55     scsi_autopm_put_device(sdp);56     put_device(&sdkp->dev);57 }
初始化回调

 

在前一种情况下,SCSI设备只能处理来自SCSI公共服务层的请求,后一种情况下,SCSI命令不仅能处理来自SCSI公共服务层的请求,还能够处理来自上层的bio请求,分析见下一节

 
 

 

转载于:https://www.cnblogs.com/luxiaodai/p/9266309.html

你可能感兴趣的文章
There is no accident
查看>>
springboot初学---rabbitmq的使用
查看>>
QTreeWidgetItem和QTreeWidgetItemIterator
查看>>
DevOps
查看>>
vim的纵向编辑(高级用法)
查看>>
再见,OI
查看>>
延时并自动关闭MessageBox
查看>>
指针转换(数组退化为指针的三种情况)
查看>>
C# Reflection exception Method not found
查看>>
Java面试笔记整理4
查看>>
什么是REST架构(转)
查看>>
搭建 Android 开发环境,初试HelloWorld (win7) (下) (转)
查看>>
混合高斯模型(GMM)推导及实现
查看>>
cocos2d-js 3.0rc0加载游戏引擎时长时间黑屏
查看>>
搜索专题练习
查看>>
Django-MTV模型
查看>>
IE9 Windows7 x64
查看>>
xcode 工具栏中放大镜的替换的简单说明
查看>>
C# 语言规范_版本5.0 (第11章 结构)
查看>>
租用游艇
查看>>