[MLX4] Fixing WQE Prefetch bug for LSO less than 60 bytes (Cache line-DWORD)
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 26 Jan 2009 10:37:32 +0000 (10:37 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 26 Jan 2009 10:37:32 +0000 (10:37 +0000)
Signed-off-by: Alex Naslednikov
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1883 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

hw/mlx4/kernel/bus/ib/qp.c

index 16527d2..d7528c4 100644 (file)
@@ -1507,10 +1507,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
                                }\r
 #define I64_CACHE_LINE                 64\r
 #define OPCODE_INVALID_BIT     6\r
                                }\r
 #define I64_CACHE_LINE                 64\r
 #define OPCODE_INVALID_BIT     6\r
-                               // WQE bug treatment for LSO case, when LSO header is large enough\r
-                               if (unlikely (seglen > I64_CACHE_LINE)) {\r
+                               // WQE bug treatment for LSO case\r
+                               // If LSO segment is large enough (exceeds one cache block in size)\r
+                               // or if it small enough such that S/G element will be placed within the same cache block,\r
+                               // OPCODE_INVALID_BIT should be on in order to reread this WQE \r
+                               // More correct solution is \r
+                               //      (unlikely (seglen % I64_CACHE_LINE || seglen % (I64_CACHE_LINE-2) )) \r
+                               // but it will not be used in order to reduce calculations within Datapath\r
+                               // If LSO segment consists of 15 DWORDS, S/G elements block will nevertheless start from \r
+                               // the next cache block\r
+                               if (unlikely (seglen < I64_CACHE_LINE-4  || seglen > I64_CACHE_LINE ))\r
                                        ctrl->owner_opcode |= cpu_to_be32 ( 1 << OPCODE_INVALID_BIT);\r
                                        ctrl->owner_opcode |= cpu_to_be32 ( 1 << OPCODE_INVALID_BIT);\r
-                               }\r
                                wqe  += seglen;\r
                                size += seglen / 16;\r
                                j=1;\r
                                wqe  += seglen;\r
                                size += seglen / 16;\r
                                j=1;\r