[HW] Remove FUNC_PTR64.
[mirror/winof/.git] / hw / mlx4 / kernel_patches / mlx4_0050_lso.patch
1 From 33c5e1a802583cd84b55a4c5270e9d7753ac29bf Mon Sep 17 00:00:00 2001
2 From: Eli Cohen <eli@mellanox.co.il>
3 Date: Tue, 15 Jan 2008 18:57:09 +0200
4 Subject: [PATCH] Add LSO support to mlx4
5
6 mlx4: Add LSO support.
7
8 Changes:
9 Adjusted setting "reserve" value in set_kernel_sq_size to fit changes
10 in qp_flags patch.
11
12 Signed-off-by: Eli Cohen <eli@mellnaox.co.il>
13 Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
14
15 ---
16  drivers/infiniband/hw/mlx4/cq.c   |    3 ++
17  drivers/infiniband/hw/mlx4/main.c |    4 +++
18  drivers/infiniband/hw/mlx4/qp.c   |   52 +++++++++++++++++++++++++++++++++---
19  drivers/net/mlx4/fw.c             |    9 ++++++
20  drivers/net/mlx4/fw.h             |    1 +
21  drivers/net/mlx4/main.c           |    1 +
22  include/linux/mlx4/device.h       |    1 +
23  include/linux/mlx4/qp.h           |    5 +++
24  8 files changed, 71 insertions(+), 5 deletions(-)
25
26 Index: ofed_kernel/drivers/infiniband/hw/mlx4/cq.c
27 ===================================================================
28 --- ofed_kernel.orig/drivers/infiniband/hw/mlx4/cq.c    2008-01-23 16:01:48.392614000 +0200
29 +++ ofed_kernel/drivers/infiniband/hw/mlx4/cq.c 2008-01-23 16:05:20.076983000 +0200
30 @@ -408,6 +408,9 @@ static int mlx4_ib_poll_one(struct mlx4_
31                 case MLX4_OPCODE_BIND_MW:
32                         wc->opcode    = IB_WC_BIND_MW;
33                         break;
34 +               case MLX4_OPCODE_LSO:
35 +                       wc->opcode    = IB_WC_LSO;
36 +                       break;
37                 }
38         } else {
39                 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
40 Index: ofed_kernel/drivers/infiniband/hw/mlx4/main.c
41 ===================================================================
42 --- ofed_kernel.orig/drivers/infiniband/hw/mlx4/main.c  2008-01-23 16:01:48.398613000 +0200
43 +++ ofed_kernel/drivers/infiniband/hw/mlx4/main.c       2008-01-23 16:05:20.081982000 +0200
44 @@ -102,6 +102,8 @@ static int mlx4_ib_query_device(struct i
45                 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
46         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
47                 props->device_cap_flags |= IB_DEVICE_IP_CSUM;
48 +       if (dev->dev->caps.max_gso_sz)
49 +               props->device_cap_flags |= IB_DEVICE_TCP_TSO;
50  
51         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
52                 0xffffff;
53 @@ -617,6 +619,8 @@ static void *mlx4_ib_add(struct mlx4_dev
54  
55         if (ibdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
56                 ibdev->ib_dev.flags |= IB_DEVICE_IP_CSUM;
57 +       if (ibdev->dev->caps.max_gso_sz)
58 +               ibdev->ib_dev.flags |= IB_DEVICE_TCP_TSO;
59  
60         if (init_node_data(ibdev))
61                 goto err_map;
62 Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c
63 ===================================================================
64 --- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c    2008-01-23 16:01:51.101506000 +0200
65 +++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2008-01-23 16:08:04.078114000 +0200
66 @@ -69,6 +69,7 @@ enum {
67  
68  static const __be32 mlx4_ib_opcode[] = {
69         [IB_WR_SEND]                    = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
70 +       [IB_WR_LSO]                     = __constant_cpu_to_be32(MLX4_OPCODE_LSO),
71         [IB_WR_SEND_WITH_IMM]           = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
72         [IB_WR_RDMA_WRITE]              = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
73         [IB_WR_RDMA_WRITE_WITH_IMM]     = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
74 @@ -244,6 +245,7 @@ static int set_kernel_sq_size(struct mlx
75  {
76         struct ib_qp_cap *cap = &init_attr->cap;
77         enum ib_qp_type type = init_attr->qp_type;
78 +       int reserve = 0;
79  
80         /* Sanity check SQ size before proceeding */
81         if (cap->max_send_wr     > dev->dev->caps.max_wqes  ||
82 @@ -260,12 +262,16 @@ static int set_kernel_sq_size(struct mlx
83             cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
84                 return -EINVAL;
85  
86 +       if (qp->flags & MLX4_QP_LSO)
87 +               reserve = 64;
88 +
89         qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
90 -                                                       sizeof (struct mlx4_wqe_data_seg),
91 +                                                       sizeof (struct mlx4_wqe_data_seg) +
92 +                                                               reserve,
93                                                         cap->max_inline_data +
94                                                         sizeof (struct mlx4_wqe_inline_seg)) +
95                                                     send_wqe_overhead(type)));
96 -       qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
97 +       qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) -reserve - send_wqe_overhead(type)) /
98                 sizeof (struct mlx4_wqe_data_seg);
99  
100         /*
101 @@ -756,9 +764,11 @@ static int __mlx4_ib_modify_qp(struct ib
102                 }
103         }
104  
105 -       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
106 -           ibqp->qp_type == IB_QPT_UD)
107 +       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
108                 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
109 +       else if (ibqp->qp_type == IB_QPT_UD)
110 +               context->mtu_msgmax = (IB_MTU_4096 << 5) |
111 +                       ilog2(dev->dev->caps.max_gso_sz);
112         else if (attr_mask & IB_QP_PATH_MTU) {
113                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
114                         printk(KERN_ERR "path MTU (%u) is invalid\n",
115 @@ -1276,6 +1286,28 @@ static void __set_data_seg(struct mlx4_w
116         dseg->addr       = cpu_to_be64(sg->addr);
117  }
118  
119 +static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
120 +                        struct mlx4_ib_qp *qp, int *lso_seg_len)
121 +{
122 +       int halign;
123 +
124 +       halign = ALIGN(wr->wr.ud.hlen, 16);
125 +       if (unlikely(!(qp->flags & MLX4_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4)))
126 +                return -EINVAL;
127 +
128 +       memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
129 +
130 +       /* make sure LSO header is written before
131 +          overwriting stamping */
132 +       wmb();
133 +
134 +       wqe->mss_hdr_size = cpu_to_be32(((wr->wr.ud.mss - wr->wr.ud.hlen)
135 +                                        << 16) | wr->wr.ud.hlen);
136 +
137 +       *lso_seg_len = halign;
138 +       return 0;
139 +}
140 +
141  int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
142                       struct ib_send_wr **bad_wr)
143  {
144 @@ -1366,6 +1398,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
145                         set_datagram_seg(wqe, wr);
146                         wqe  += sizeof (struct mlx4_wqe_datagram_seg);
147                         size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
148 +
149 +                       if (wr->opcode == IB_WR_LSO) {
150 +                               int hlen;
151 +
152 +                               err = build_lso_seg(wqe, wr, qp, &hlen);
153 +                               if (err) {
154 +                                       *bad_wr = wr;
155 +                                       goto out;
156 +                               }
157 +                               wqe += hlen;
158 +                               size += hlen >> 4;
159 +                       }
160 +
161                         break;
162  
163                 case IB_QPT_SMI:
164 Index: ofed_kernel/drivers/net/mlx4/fw.c
165 ===================================================================
166 --- ofed_kernel.orig/drivers/net/mlx4/fw.c      2008-01-23 16:01:48.430615000 +0200
167 +++ ofed_kernel/drivers/net/mlx4/fw.c   2008-01-23 16:05:20.106981000 +0200
168 @@ -133,6 +133,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
169  #define QUERY_DEV_CAP_MAX_AV_OFFSET            0x27
170  #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET                0x29
171  #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET                0x2b
172 +#define QUERY_DEV_CAP_MAX_GSO_OFFSET           0x2d
173  #define QUERY_DEV_CAP_MAX_RDMA_OFFSET          0x2f
174  #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET           0x33
175  #define QUERY_DEV_CAP_ACK_DELAY_OFFSET         0x35
176 @@ -215,6 +216,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
177         dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
178         MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
179         dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
180 +       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET);
181 +       field &= 0x1f;
182 +       if (!field)
183 +               dev_cap->max_gso_sz = 0;
184 +       else
185 +               dev_cap->max_gso_sz = 1 << field;
186 +
187         MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
188         dev_cap->max_rdma_global = 1 << (field & 0x3f);
189         MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
190 @@ -377,6 +385,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
191                  dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
192         mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
193                  dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
194 +       mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
195  
196         dump_dev_cap_flags(dev, dev_cap->flags);
197  
198 Index: ofed_kernel/drivers/net/mlx4/fw.h
199 ===================================================================
200 --- ofed_kernel.orig/drivers/net/mlx4/fw.h      2008-01-23 15:58:48.837059000 +0200
201 +++ ofed_kernel/drivers/net/mlx4/fw.h   2008-01-23 16:05:20.109984000 +0200
202 @@ -96,6 +96,7 @@ struct mlx4_dev_cap {
203         u8  bmme_flags;
204         u32 reserved_lkey;
205         u64 max_icm_sz;
206 +       int max_gso_sz;
207  };
208  
209  struct mlx4_adapter {
210 Index: ofed_kernel/drivers/net/mlx4/main.c
211 ===================================================================
212 --- ofed_kernel.orig/drivers/net/mlx4/main.c    2008-01-23 15:58:48.841058000 +0200
213 +++ ofed_kernel/drivers/net/mlx4/main.c 2008-01-23 16:05:20.115981000 +0200
214 @@ -159,6 +159,7 @@ static int mlx4_dev_cap(struct mlx4_dev 
215         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
216         dev->caps.flags              = dev_cap->flags;
217         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
218 +       dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
219  
220         return 0;
221  }
222 Index: ofed_kernel/include/linux/mlx4/device.h
223 ===================================================================
224 --- ofed_kernel.orig/include/linux/mlx4/device.h        2008-01-23 15:58:48.844060000 +0200
225 +++ ofed_kernel/include/linux/mlx4/device.h     2008-01-23 16:05:20.138984000 +0200
226 @@ -181,6 +181,7 @@ struct mlx4_caps {
227         u32                     flags;
228         u16                     stat_rate_support;
229         u8                      port_width_cap[MLX4_MAX_PORTS + 1];
230 +       int                     max_gso_sz;
231  };
232  
233  struct mlx4_buf_list {
234 Index: ofed_kernel/include/linux/mlx4/qp.h
235 ===================================================================
236 --- ofed_kernel.orig/include/linux/mlx4/qp.h    2008-01-23 16:01:48.448613000 +0200
237 +++ ofed_kernel/include/linux/mlx4/qp.h 2008-01-23 16:05:20.142981000 +0200
238 @@ -215,6 +215,11 @@ struct mlx4_wqe_datagram_seg {
239         __be32                  reservd[2];
240  };
241  
242 +struct mlx4_lso_seg {
243 +       __be32                  mss_hdr_size;
244 +       __be32                  header[0];
245 +};
246 +
247  struct mlx4_wqe_bind_seg {
248         __be32                  flags1;
249         __be32                  flags2;