Added proposed patch to solve SG IO count limitation issue in pass-through mode....
authorvlnb <vlnb@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Fri, 7 Nov 2008 18:47:17 +0000 (18:47 +0000)
committervlnb <vlnb@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Fri, 7 Nov 2008 18:47:17 +0000 (18:47 +0000)
of pages with order > 0, i.e. more than 1 page per SG entry. Web and doc updated correspondingly.

git-svn-id: https://scst.svn.sourceforge.net/svnroot/scst/trunk@563 d57e44dd-8a1f-0410-8b47-8ef2f437770f

scst/README
www/contributing.html
www/sgv_big_order_alloc.diff [new file with mode: 0644]

index 8771a50..e156470 100644 (file)
@@ -624,6 +624,10 @@ applications, so, if you experience large transfers stalls, you should
 check documentation for your application how to limit the transfer
 sizes.
 
+Another way to solve this issue is to build SG entries with more than 1
+page each. See the following patch as an example:
+http://scst.sf.net/sgv_big_order_alloc.diff
+
 User space mode using scst_user dev handler
 -------------------------------------------
 
index 11f9e8b..adf7016 100644 (file)
 {
        return vdisk_exec_fns[cmd->cdb[0]](cmd);
 }</p></listing>
+
+                               <h3>Solve SG IO count limitation issue in pass-through mode</h3>
+                               
+                               <p>In the pass-through mode (i.e. using the pass-through device handlers
+                               scst_disk, scst_tape, etc) SCSI commands, coming from remote initiators,
+                               are passed to local SCSI hardware on target as is, without any
+                               modifications. As any other hardware, the local SCSI hardware can not
+                               handle commands with amount of data and/or segments count in
+                               scatter-gather array bigger some values. If you have this issue you will see
+                               symptoms like small transfers work well, but large ones stall and
+                               messages like: "Unable to complete command due to SG IO count
+                               limitation" are printed in the kernel logs.</p>
+                               
+                               <p>In <a href="sgv_big_order_alloc.diff">sgv_big_order_alloc.diff</a> you
+                               can find a possible way to solve this issue.</p>
                        </div>
        </div>
 </div>
diff --git a/www/sgv_big_order_alloc.diff b/www/sgv_big_order_alloc.diff
new file mode 100644 (file)
index 0000000..46d24c6
--- /dev/null
@@ -0,0 +1,445 @@
+In the pass-through mode (i.e. using the pass-through device handlers
+scst_disk, scst_tape, etc) SCSI commands, coming from remote initiators,
+are passed to local SCSI hardware on target as is, without any
+modifications. As any other hardware, the local SCSI hardware can not
+handle commands with amount of data and/or segments count in
+scatter-gather array bigger some values. If you have this issue you will
+see symptoms like small transfers work well, but large ones stall and
+messages like: "Unable to complete command due to SG IO count
+limitation" are printed in the kernel logs.
+
+This is proposed patch to solve that. It allows SGV cache do allocation
+of pages with order > 0, i.e. more than 1 page per SG entry.
+
+Compile tested only.
+
+Index: scst/include/scst.h
+===================================================================
+--- scst/include/scst.h        (revision 558)
++++ scst/include/scst.h        (working copy)
+@@ -2649,12 +2649,13 @@ struct sgv_pool *sgv_pool_create(const c
+ void sgv_pool_destroy(struct sgv_pool *pool);
+ void sgv_pool_set_allocator(struct sgv_pool *pool,
+-      struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *),
+-      void (*free_pages_fn)(struct scatterlist *, int, void *));
++      struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *, int),
++      void (*free_pages_fn)(struct scatterlist *, int, void *, int));
+ struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size,
+       gfp_t gfp_mask, int flags, int *count,
+-      struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv);
++      struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv,
++      int max_sg_count);
+ void sgv_pool_free(struct sgv_pool_obj *sgv, struct scst_mem_lim *mem_lim);
+ void *sgv_get_priv(struct sgv_pool_obj *sgv);
+Index: scst/src/scst_mem.h
+===================================================================
+--- scst/src/scst_mem.h        (revision 558)
++++ scst/src/scst_mem.h        (working copy)
+@@ -36,6 +36,8 @@ struct sgv_pool_obj {
+       /* if <0 - pages, >0 - order */
+       int order_or_pages;
++      int alloc_order;
++
+       struct {
+               /* jiffies, protected by pool_mgr_lock */
+               unsigned long time_stamp;
+@@ -67,9 +69,9 @@ struct sgv_pool_cache_acc {
+ struct sgv_pool_alloc_fns {
+       struct page *(*alloc_pages_fn)(struct scatterlist *sg, gfp_t gfp_mask,
+-              void *priv);
++              void *priv, int alloc_order);
+       void (*free_pages_fn)(struct scatterlist *sg, int sg_count,
+-              void *priv);
++              void *priv, int alloc_order);
+ };
+ struct sgv_pool {
+Index: scst/src/scst_lib.c
+===================================================================
+--- scst/src/scst_lib.c        (revision 558)
++++ scst/src/scst_lib.c        (working copy)
+@@ -1663,34 +1663,18 @@ int scst_alloc_space(struct scst_cmd *cm
+               flags |= SCST_POOL_ALLOC_NO_CACHED;
+       cmd->sg = sgv_pool_alloc(tgt_dev->pool, cmd->bufflen, gfp_mask, flags,
+-                      &cmd->sg_cnt, &cmd->sgv, &cmd->dev->dev_mem_lim, NULL);
++                      &cmd->sg_cnt, &cmd->sgv, &cmd->dev->dev_mem_lim, NULL,
++                      tgt_dev->max_sg_cnt);
+       if (cmd->sg == NULL)
+               goto out;
+-      if (unlikely(cmd->sg_cnt > tgt_dev->max_sg_cnt)) {
+-              static int ll;
+-              if (ll < 10) {
+-                      PRINT_INFO("Unable to complete command due to "
+-                              "SG IO count limitation (requested %d, "
+-                              "available %d, tgt lim %d)", cmd->sg_cnt,
+-                              tgt_dev->max_sg_cnt, cmd->tgt->sg_tablesize);
+-                      ll++;
+-              }
+-              goto out_sg_free;
+-      }
++      EXTRACHECKS_BUG_ON(cmd->sg_cnt > tgt_dev->max_sg_cnt);
+       res = 0;
+ out:
+       TRACE_EXIT();
+       return res;
+-
+-out_sg_free:
+-      sgv_pool_free(cmd->sgv, &cmd->dev->dev_mem_lim);
+-      cmd->sgv = NULL;
+-      cmd->sg = NULL;
+-      cmd->sg_cnt = 0;
+-      goto out;
+ }
+ void scst_release_space(struct scst_cmd *cmd)
+Index: scst/src/scst_mem.c
+===================================================================
+--- scst/src/scst_mem.c        (revision 558)
++++ scst/src/scst_mem.c        (working copy)
+@@ -118,7 +118,7 @@ out_head:
+ }
+ static void scst_free_sys_sg_entries(struct scatterlist *sg, int sg_count,
+-      void *priv)
++      void *priv, int alloc_order)
+ {
+       int i;
+@@ -134,7 +134,7 @@ static void scst_free_sys_sg_entries(str
+                       (unsigned long)p, len, pages);
+               while (pages > 0) {
+-                      int order = 0;
++                      int order = alloc_order;
+ /*
+  * __free_pages() doesn't like freeing pages with not that order with
+@@ -159,9 +159,9 @@ static void scst_free_sys_sg_entries(str
+ }
+ static struct page *scst_alloc_sys_pages(struct scatterlist *sg,
+-      gfp_t gfp_mask, void *priv)
++      gfp_t gfp_mask, void *priv, int alloc_order)
+ {
+-      struct page *page = alloc_pages(gfp_mask, 0);
++      struct page *page = alloc_pages(gfp_mask, alloc_order);
+       sg_set_page(sg, page, PAGE_SIZE, 0);
+       TRACE_MEM("page=%p, sg=%p, priv=%p", page, sg, priv);
+@@ -174,10 +174,10 @@ static struct page *scst_alloc_sys_pages
+ static int scst_alloc_sg_entries(struct scatterlist *sg, int pages,
+       gfp_t gfp_mask, int clustered, struct trans_tbl_ent *trans_tbl,
+-      const struct sgv_pool_alloc_fns *alloc_fns, void *priv)
++      const struct sgv_pool_alloc_fns *alloc_fns, void *priv, int alloc_order)
+ {
+       int sg_count = 0;
+-      int pg, i, j;
++      int pg, i, j, pg_inc = 1 << alloc_order;
+       int merged = -1;
+       TRACE_MEM("pages=%d, clustered=%d", pages, clustered);
+@@ -189,7 +189,7 @@ static int scst_alloc_sg_entries(struct 
+       gfp_mask |= __GFP_ZERO;
+ #endif
+-      for (pg = 0; pg < pages; pg++) {
++      for (pg = 0; pg < pages; pg += pg_inc) {
+               void *rc;
+ #ifdef CONFIG_SCST_DEBUG_OOM
+               if (((gfp_mask & __GFP_NOFAIL) != __GFP_NOFAIL) &&
+@@ -198,7 +198,7 @@ static int scst_alloc_sg_entries(struct 
+               else
+ #endif
+                       rc = alloc_fns->alloc_pages_fn(&sg[sg_count], gfp_mask,
+-                              priv);
++                              priv, alloc_order);
+               if (rc == NULL)
+                       goto out_no_mem;
+               if (clustered) {
+@@ -229,7 +229,7 @@ out:
+       return sg_count;
+ out_no_mem:
+-      alloc_fns->free_pages_fn(sg, sg_count, priv);
++      alloc_fns->free_pages_fn(sg, sg_count, priv, alloc_order);
+       sg_count = 0;
+       goto out;
+ }
+@@ -292,7 +292,7 @@ static void sgv_dtor_and_free(struct sgv
+ {
+       if (obj->sg_count != 0) {
+               obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries,
+-                      obj->sg_count, obj->allocator_priv);
++                      obj->sg_count, obj->allocator_priv, obj->alloc_order);
+       }
+       if (obj->sg_entries != obj->sg_entries_data) {
+               if (obj->trans_tbl !=
+@@ -308,6 +308,36 @@ static void sgv_dtor_and_free(struct sgv
+       return;
+ }
++static struct sgv_pool_obj *sgv_pool_cached_create(struct sgv_pool *pool,
++      int order, gfp_t gfp_mask, bool locked)
++{
++      struct sgv_pool_obj *obj;
++      int pages = 1 << order;
++
++      if (!locked)
++              spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
++
++      pool->acc.cached_entries++;
++      pool->acc.cached_pages += pages;
++
++      spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
++
++      obj = kmem_cache_alloc(pool->caches[order],
++              gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
++      if (likely(obj)) {
++              memset(obj, 0, sizeof(*obj));
++              obj->order_or_pages = order;
++              obj->owner_pool = pool;
++      } else {
++              spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
++              pool->acc.cached_entries--;
++              pool->acc.cached_pages -= pages;
++              spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
++      }
++
++      return obj;
++}
++
+ static struct sgv_pool_obj *sgv_pool_cached_get(struct sgv_pool *pool,
+       int order, gfp_t gfp_mask)
+ {
+@@ -332,23 +362,7 @@ static struct sgv_pool_obj *sgv_pool_cac
+               goto out;
+       }
+-      pool->acc.cached_entries++;
+-      pool->acc.cached_pages += pages;
+-
+-      spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
+-
+-      obj = kmem_cache_alloc(pool->caches[order],
+-              gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
+-      if (likely(obj)) {
+-              memset(obj, 0, sizeof(*obj));
+-              obj->order_or_pages = order;
+-              obj->owner_pool = pool;
+-      } else {
+-              spin_lock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
+-              pool->acc.cached_entries--;
+-              pool->acc.cached_pages -= pages;
+-              spin_unlock_bh(&sgv_pools_mgr.mgr.pool_mgr_lock);
+-      }
++      obj = sgv_pool_cached_create(pool, order, gfp_mask, true);
+ out:
+       return obj;
+@@ -546,12 +560,13 @@ static void scst_uncheck_allowed_mem(str
+ struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size,
+       gfp_t gfp_mask, int flags, int *count,
+-      struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv)
++      struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv,
++      int max_sg_count)
+ {
+       struct sgv_pool_obj *obj;
+       int order, pages, cnt;
+       struct scatterlist *res = NULL;
+-      int pages_to_alloc;
++      int pages_to_alloc, alloc_order;
+       struct kmem_cache *cache;
+       int no_cached = flags & SCST_POOL_ALLOC_NO_CACHED;
+       bool allowed_mem_checked = false, hiwmk_checked = false;
+@@ -605,7 +620,23 @@ struct scatterlist *sgv_pool_alloc(struc
+               if (obj->sg_count != 0) {
+                       TRACE_MEM("Cached sgv_obj %p", obj);
+                       EXTRACHECKS_BUG_ON(obj->order_or_pages != order);
+-                      atomic_inc(&pool->cache_acc[order].hit_alloc);
++
++                      if (unlikely(max_sg_count < obj->sg_count)) {
++                              TRACE_MEM("Too many SG entries %d (max %d)",
++                                      obj->sg_count, max_sg_count);
++
++                              sgv_pool_cached_put(obj);
++
++                              obj = sgv_pool_cached_create(pool, order,
++                                              gfp_mask, false);
++                              if (obj == NULL) {
++                                      TRACE(TRACE_OUT_OF_MEM, "Allocation of "
++                                              "sgv_pool_obj failed (size %d)",
++                                              size);
++                                      goto out_fail;
++                              }
++                      } else
++                              atomic_inc(&pool->cache_acc[order].hit_alloc);
+                       goto success;
+               }
+@@ -682,15 +713,27 @@ struct scatterlist *sgv_pool_alloc(struc
+               TRACE_MEM("Big or no_cached sgv_obj %p (size %d)", obj, sz);
+       }
+-      obj->sg_count = scst_alloc_sg_entries(obj->sg_entries,
+-              pages_to_alloc, gfp_mask, pool->clustered, obj->trans_tbl,
+-              &pool->alloc_fns, priv);
+-      if (unlikely(obj->sg_count <= 0)) {
+-              obj->sg_count = 0;
+-              if ((flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL) && cache)
+-                      goto out_return1;
+-              else
+-                      goto out_fail_free_sg_entries;
++      alloc_order = 0;
++      while (1) {
++              obj->sg_count = scst_alloc_sg_entries(obj->sg_entries,
++                      pages_to_alloc, gfp_mask, pool->clustered,
++                      obj->trans_tbl, &pool->alloc_fns, priv, alloc_order);
++              if (unlikely(obj->sg_count <= 0)) {
++                      obj->sg_count = 0;
++                      if ((flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL) && cache)
++                              goto out_return1;
++                      else
++                              goto out_fail_free_sg_entries;
++              }
++              obj->alloc_order = alloc_order;
++
++              if (max_sg_count >= obj->sg_count)
++                      break;
++
++              obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries,
++                      obj->sg_count, obj->allocator_priv,
++                      obj->alloc_order);
++              alloc_order++;
+       }
+       if (cache) {
+@@ -815,7 +858,7 @@ void sgv_pool_free(struct sgv_pool_obj *
+               sgv_pool_cached_put(sgv);
+       } else {
+               sgv->owner_pool->alloc_fns.free_pages_fn(sgv->sg_entries,
+-                      sgv->sg_count, sgv->allocator_priv);
++                      sgv->sg_count, sgv->allocator_priv, sgv->alloc_order);
+               pages = (sgv->sg_count != 0) ? -sgv->order_or_pages : 0;
+               kfree(sgv);
+               sgv_pool_hiwmk_uncheck(pages);
+@@ -861,7 +904,7 @@ struct scatterlist *scst_alloc(int size,
+        * So, always don't use clustering.
+        */
+       *count = scst_alloc_sg_entries(res, pages, gfp_mask, 0, NULL,
+-                      &sys_alloc_fns, NULL);
++                      &sys_alloc_fns, NULL, 0);
+       if (*count <= 0)
+               goto out_free;
+@@ -888,7 +931,7 @@ void scst_free(struct scatterlist *sg, i
+       sgv_pool_hiwmk_uncheck(count);
+-      scst_free_sys_sg_entries(sg, count, NULL);
++      scst_free_sys_sg_entries(sg, count, NULL, 0);
+       kfree(sg);
+       return;
+ }
+@@ -1060,8 +1103,8 @@ void sgv_pool_deinit(struct sgv_pool *po
+ }
+ void sgv_pool_set_allocator(struct sgv_pool *pool,
+-      struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *),
+-      void (*free_pages_fn)(struct scatterlist *, int, void *))
++      struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *, int),
++      void (*free_pages_fn)(struct scatterlist *, int, void *, int))
+ {
+       pool->alloc_fns.alloc_pages_fn = alloc_pages_fn;
+       pool->alloc_fns.free_pages_fn = free_pages_fn;
+Index: scst/src/dev_handlers/scst_user.c
+===================================================================
+--- scst/src/dev_handlers/scst_user.c  (revision 559)
++++ scst/src/dev_handlers/scst_user.c  (working copy)
+@@ -168,9 +168,9 @@ static int dev_user_disk_done(struct scs
+ static int dev_user_tape_done(struct scst_cmd *cmd);
+ static struct page *dev_user_alloc_pages(struct scatterlist *sg,
+-      gfp_t gfp_mask, void *priv);
++      gfp_t gfp_mask, void *priv, int alloc_order);
+ static void dev_user_free_sg_entries(struct scatterlist *sg, int sg_count,
+-                                   void *priv);
++                                   void *priv, int alloc_order);
+ static void dev_user_add_to_ready(struct scst_user_cmd *ucmd);
+@@ -368,7 +368,7 @@ static void dev_user_free_ucmd(struct sc
+ }
+ static struct page *dev_user_alloc_pages(struct scatterlist *sg,
+-      gfp_t gfp_mask, void *priv)
++      gfp_t gfp_mask, void *priv, int alloc_order)
+ {
+       struct scst_user_cmd *ucmd = (struct scst_user_cmd *)priv;
+       int offset = 0;
+@@ -377,8 +377,11 @@ static struct page *dev_user_alloc_pages
+       /* *sg supposed to be zeroed */
+-      TRACE_MEM("ucmd %p, ubuff %lx, ucmd->cur_data_page %d", ucmd,
+-              ucmd->ubuff, ucmd->cur_data_page);
++      TRACE_MEM("ucmd %p, ubuff %lx, ucmd->cur_data_page %d, alloc_order %d",
++              ucmd, ucmd->ubuff, ucmd->cur_data_page, alloc_order);
++
++      if (unlikely(alloc_order != 0))
++              goto out;
+       if (ucmd->cur_data_page == 0) {
+               TRACE_MEM("ucmd->first_page_offset %d",
+@@ -468,7 +471,7 @@ static void __dev_user_free_sg_entries(s
+ }
+ static void dev_user_free_sg_entries(struct scatterlist *sg, int sg_count,
+-      void *priv)
++      void *priv, int alloc_order)
+ {
+       struct scst_user_cmd *ucmd = (struct scst_user_cmd *)priv;
+@@ -537,7 +540,8 @@ static int dev_user_alloc_sg(struct scst
+       ucmd->buff_cached = cached_buff;
+       cmd->sg = sgv_pool_alloc(dev->pool, bufflen, gfp_mask, flags,
+-                      &cmd->sg_cnt, &ucmd->sgv, &dev->udev_mem_lim, ucmd);
++                      &cmd->sg_cnt, &ucmd->sgv, &dev->udev_mem_lim, ucmd,
++                      cmd->tgt_dev->max_sg_cnt);
+       if (cmd->sg != NULL) {
+               struct scst_user_cmd *buf_ucmd =
+                       (struct scst_user_cmd *)sgv_get_priv(ucmd->sgv);
+@@ -559,21 +563,7 @@ static int dev_user_alloc_sg(struct scst
+                       "last_len %d, l %d)", ucmd, cached_buff, ucmd->ubuff,
+                       last_len, cmd->sg[cmd->sg_cnt-1].length);
+-              if (unlikely(cmd->sg_cnt > cmd->tgt_dev->max_sg_cnt)) {
+-                      static int ll;
+-                      if (ll < 10) {
+-                              PRINT_INFO("Unable to complete command due to "
+-                                      "SG IO count limitation (requested %d, "
+-                                      "available %d, tgt lim %d)",
+-                                      cmd->sg_cnt,
+-                                      cmd->tgt_dev->max_sg_cnt,
+-                                      cmd->tgt->sg_tablesize);
+-                              ll++;
+-                      }
+-                      cmd->sg = NULL;
+-                      /* sgv will be freed in dev_user_free_sgv() */
+-                      res = -1;
+-              }
++              EXTRACHECKS_BUG_ON(cmd->sg_cnt > cmd->tgt_dev->max_sg_cnt);
+       } else {
+               TRACE_MEM("Buf not alloced (ucmd %p, h %d, buff_cached, %d, "
+                       "sg_cnt %d, ubuff %lx, sgv %p", ucmd, ucmd->h,