[mlx4_bus] Add support for RoCEE to the low level driver.
authortzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 4 Jan 2010 17:34:19 +0000 (17:34 +0000)
committertzachid <tzachid@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 4 Jan 2010 17:34:19 +0000 (17:34 +0000)
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@2647 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

21 files changed:
hw/mlx4/kernel/bus/core/ud_header.c
hw/mlx4/kernel/bus/core/verbs.c
hw/mlx4/kernel/bus/drv/drv.c
hw/mlx4/kernel/bus/drv/stat.c
hw/mlx4/kernel/bus/ib/ah.c
hw/mlx4/kernel/bus/ib/main.c
hw/mlx4/kernel/bus/ib/mlx4_ib.h
hw/mlx4/kernel/bus/ib/qp.c
hw/mlx4/kernel/bus/inc/cmd.h
hw/mlx4/kernel/bus/inc/device.h
hw/mlx4/kernel/bus/inc/ib_pack.h
hw/mlx4/kernel/bus/inc/ib_verbs.h
hw/mlx4/kernel/bus/inc/qp.h
hw/mlx4/kernel/bus/net/SOURCES
hw/mlx4/kernel/bus/net/main.c
hw/mlx4/kernel/bus/net/port.c
hw/mlx4/kernel/hca/av.c
hw/mlx4/kernel/hca/data.c
hw/mlx4/kernel/inc/l2w.h
inc/iba/ib_types.h
ulp/opensm/user/include/iba/ib_types.h

index 4be128e..3585fef 100644 (file)
@@ -62,6 +62,15 @@ static const struct ib_field lrh_table[]  = {
        { STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) }
 };
 
+static const struct ib_field eth_table[]  = {
+       { STRUCT_FIELD_INIT(eth, dmac_h, 0, 0, 32) },
+       { STRUCT_FIELD_INIT(eth, dmac_l, 1, 0, 16) },
+       { STRUCT_FIELD_INIT(eth, smac_h, 1, 16,16) },
+       { STRUCT_FIELD_INIT(eth, smac_l, 2, 0 ,32) },
+       { STRUCT_FIELD_INIT(eth, type, 3, 0, 16)}
+};
+
+
 static const struct ib_field grh_table[]  = {
        { STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) },
        { STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) },
@@ -279,3 +288,93 @@ int ib_ud_header_unpack(u8                *buf,
        return 0;
 }
 EXPORT_SYMBOL(ib_ud_header_unpack);
+
+/**
+ * ib_rdmaoe_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @header:Structure to initialize
+ *
+ * ib_rdmaoe_ud_header_init() initializes the grh.ip_version, grh.payload_length,
+ * grh.next_header, bth.opcode, bth.pad_count and
+ * bth.transport_header_version fields of a &struct eth_ud_header given
+ * the payload length and whether a GRH will be included.
+ */
+void ib_rdmaoe_ud_header_init(int                  payload_bytes,
+                          int                      grh_present,
+                          struct eth_ud_header    *header)
+{
+       int header_len;
+
+       memset(header, 0, sizeof *header);
+
+       header_len =
+               sizeof header->eth  +
+               IB_BTH_BYTES  +
+               IB_DETH_BYTES;
+       if (grh_present)
+               header_len += IB_GRH_BYTES;
+
+       header->grh_present          = grh_present;
+       if (grh_present) {
+               header->grh.ip_version      = 6;
+               header->grh.payload_length  =
+                       cpu_to_be16((IB_BTH_BYTES     +
+                                    IB_DETH_BYTES    +
+                                    payload_bytes    +
+                                    4                + /* ICRC     */
+                                    3) & ~3);          /* round up */
+               header->grh.next_header     = 0x1b;
+       }
+
+       if (header->immediate_present)
+               header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+       else
+               header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY;
+       header->bth.pad_count                =(u8) ((4 - payload_bytes) & 3);
+       header->bth.transport_header_version = 0;
+}
+
+
+
+/**
+ * rdmaoe_ud_header_pack - Pack UD header struct into eth wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header into wire
+ * format in the buffer @buf.
+ */
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+                      void                 *buf)
+{
+       int len = 0;
+
+       ib_pack(eth_table, ARRAY_SIZE(eth_table),
+               &header->eth, buf);
+       len += IB_ETH_BYTES;
+
+       if (header->grh_present) {
+               ib_pack(grh_table, ARRAY_SIZE(grh_table),
+                       &header->grh, (u8*)buf + len);
+               len += IB_GRH_BYTES;
+       }
+
+       ib_pack(bth_table, ARRAY_SIZE(bth_table),
+               &header->bth, (u8*)buf + len);
+       len += IB_BTH_BYTES;
+
+       ib_pack(deth_table, ARRAY_SIZE(deth_table),
+               &header->deth, (u8*)buf + len);
+       len += IB_DETH_BYTES;
+
+       if (header->immediate_present) {
+               memcpy((u8*)buf + len, &header->immediate_data,
+                      sizeof header->immediate_data);
+               len += sizeof header->immediate_data;
+       }
+
+       return len;
+}
+
+
index 33e08f5..1f7845b 100644 (file)
@@ -336,3 +336,28 @@ int ib_destroy_ah(struct ib_ah *ah)
 }
 EXPORT_SYMBOL(ib_destroy_ah);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+       switch (node_type) {
+       case RDMA_NODE_IB_CA:
+       case RDMA_NODE_IB_SWITCH:
+       case RDMA_NODE_IB_ROUTER:
+               return RDMA_TRANSPORT_IB;
+       case RDMA_NODE_RNIC:
+               return RDMA_TRANSPORT_IWARP;
+       default:
+               ASSERT(FALSE);
+               return 0;
+       }
+}
+
+enum rdma_transport_type rdma_port_get_transport(struct ib_device *device,
+                                                u8 port_num)
+{
+       return device->get_port_transport ?
+               device->get_port_transport(device, port_num) :
+               rdma_node_get_transport(device->node_type);
+}
+EXPORT_SYMBOL(rdma_port_get_transport);
+
index 7a2f615..eebf3cc 100644 (file)
@@ -95,7 +95,6 @@ EvtInterruptIsr(
 \r
 #endif\r
 \r
-static \r
 NTSTATUS\r
 __create_child(\r
        __in WDFDEVICE  Device,\r
@@ -228,44 +227,54 @@ Routine Description:
 \r
        if ( p_fdo->children_created )\r
                goto end;\r
-       \r
+\r
        // eventually we'll have all information about children in Registry\r
        // DriverEntry will read it into a Global storage and\r
        // this routine will create all the children on base on this info\r
        number_of_ib_ports = mlx4_count_ib_ports(mdev);\r
        ASSERT(number_of_ib_ports >=0 && number_of_ib_ports <=2);\r
 \r
+#if 0\r
+       //For now we it's either IB or ETH, and we always create LLE if it's ETH\r
+       if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] == MLX4_PORT_TYPE_IB) ) {\r
+               status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 );\r
+               if (!NT_SUCCESS(status)) {\r
+                        MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status));\r
+               }\r
+       }\r
+#endif\r
+\r
        for (i = 1; i <= mdev->caps.num_ports; i++) {\r
-        if (mlx4_is_enabled_port(mdev, i)) {\r
-            if(mlx4_is_eth_port(mdev, i)) {\r
-                status = __create_child(Device, ETH_HARDWARE_IDS, ETH_HARDWARE_DESCRIPTION, i);\r
-                if (!NT_SUCCESS(status)) {\r
-                     MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (eth) failed with 0x%x\n", status));\r
-                     break;\r
-                }\r
-                eth_created = TRUE;\r
-            } else {\r
-                if (eth_created){\r
-                    //\r
-                    // Illegal configuration the IB should be the first port\r
-                    //\r
-                    status = STATUS_INVALID_PARAMETER;\r
-                    MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (IB) failed. Invalid configuration, IB should be the first port."));\r
-                    break;                    \r
-                }\r
-                \r
-                if (ib_created){\r
-                    continue;\r
-                }\r
-\r
-                status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 );\r
-                if (!NT_SUCCESS(status)) {\r
-                     MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status));\r
-                     break;\r
-                }\r
-                ib_created = TRUE;\r
-            }\r
-        }\r
+               if (mlx4_is_enabled_port(mdev, i)) {\r
+                       if(mlx4_is_eth_port(mdev, i)) {\r
+                               status = __create_child(Device, ETH_HARDWARE_IDS, ETH_HARDWARE_DESCRIPTION, i);\r
+                               if (!NT_SUCCESS(status)) {\r
+                                       MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (eth) failed with 0x%x\n", status));\r
+                                       break;\r
+                               }\r
+                               eth_created = TRUE;\r
+                       } else {\r
+                               if (eth_created){\r
+                                       //\r
+                                       // Illegal configuration the IB should be the first port\r
+                                       //\r
+                                       status = STATUS_INVALID_PARAMETER;\r
+                                       MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (IB) failed. Invalid configuration, IB should be the first port."));\r
+                                       break;                    \r
+                               }\r
+\r
+                               if (ib_created){\r
+                                       continue;\r
+                               }\r
+\r
+                               status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 );\r
+                               if (!NT_SUCCESS(status)) {\r
+                                       MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status));\r
+                                       break;\r
+                               }\r
+                               ib_created = TRUE;\r
+                       }\r
+               }\r
        }\r
 \r
        p_fdo->children_created = TRUE;\r
@@ -869,6 +878,9 @@ EvtPrepareHardware(
                goto err;\r
        }\r
 \r
+       pdev->p_wdf_device = Device;\r
+       pdev->ib_hca_created = 0;\r
+\r
        // start the card\r
        status = __start_card(Device, p_fdo );\r
        if( !NT_SUCCESS( status ) ) \r
index 10c3b11..727bdd2 100644 (file)
@@ -113,7 +113,7 @@ static void __print_mlx( struct mlx4_dev *mdev, struct mlx4_wqe_mlx_seg *p)
 void st_print_mlx_header( struct mlx4_dev *mdev, struct mlx4_ib_sqp *sqp, struct mlx4_wqe_mlx_seg *mlx )
 {
        if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_UDH )
-       __print_ud_header( mdev, &sqp->ud_header );
+               __print_ud_header( mdev, &sqp->hdr.ib );
        if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_WQE )
                __print_mlx( mdev, mlx );
 }
index 85a0da1..cffd92c 100644 (file)
 
 #include "mlx4_ib.h"
 
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static inline int rdma_link_local_addr(struct in6_addr *addr)
+{
+       if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
+           addr->s6_addr32[1] == 0)
+               return 1;
+       else
+               return 0;
+}
+
+inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+       memcpy(mac, &addr->s6_addr[8], 3);
+       memcpy(mac + 3, &addr->s6_addr[13], 3);
+       mac[0] ^= 2;   
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+       return addr->s6_addr[0] == 0xff ? 1 : 0;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+       int i;
+
+       mac[0] = 0x33;
+       mac[1] = 0x33;
+       for (i = 2; i < 6; ++i)
+               mac[i] = addr->s6_addr[i + 10];
+
+}
+
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
+                       u8 *mac, int *is_mcast)
+{
+       int err = 0;
+       struct sockaddr_in6 dst;
+
+       UNREFERENCED_PARAMETER(dev);
+
+       *is_mcast = 0;
+       memcpy(dst.sin6_addr.s6_addr, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw));
+
+       if (rdma_link_local_addr(&dst.sin6_addr))
+               rdma_get_ll_mac(&dst.sin6_addr, mac);
+       else if (rdma_is_multicast_addr(&dst.sin6_addr)) {
+               rdma_get_mcast_mac(&dst.sin6_addr, mac);
+               *is_mcast = 1;
+       } else {
+               err = -EINVAL; //jyang:todo
+               ASSERT(FALSE);
+       }
+       return err;
+}
+
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+                                 struct mlx4_ib_ah *ah)
 {
        struct mlx4_dev *dev = to_mdev(pd->device)->dev;
-       struct mlx4_ib_ah *ah;
 
        if (mlx4_is_barred(pd->device->dma_device))
                return ERR_PTR(-EFAULT);
 
-       ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-       if (!ah)
-               return ERR_PTR(-ENOMEM);
 
-       memset(&ah->av, 0, sizeof ah->av);
-
-       ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
-       ah->av.g_slid  = ah_attr->src_path_bits;
-       ah->av.dlid    = cpu_to_be16(ah_attr->dlid);
-       if (ah_attr->static_rate) {
-               ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
-               while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
-                      !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
-                       --ah->av.stat_rate;
-       }
-       ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+       ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+       ah->av.ib.g_slid  = ah_attr->src_path_bits;
        if (ah_attr->ah_flags & IB_AH_GRH) {
-               ah->av.g_slid   |= 0x80;
-               ah->av.gid_index = ah_attr->grh.sgid_index;
-               ah->av.hop_limit = ah_attr->grh.hop_limit;
-               ah->av.sl_tclass_flowlabel |=
+               ah->av.ib.g_slid   |= 0x80;
+               ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+               ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+               ah->av.ib.sl_tclass_flowlabel |=
                        cpu_to_be32((ah_attr->grh.traffic_class << 20) |
                                    ah_attr->grh.flow_label);
-               memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+               memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
        }
 
+       ah->av.ib.dlid    = cpu_to_be16(ah_attr->dlid);
+       if (ah_attr->static_rate) {
+               ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+               while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+                      !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+                       --ah->av.ib.stat_rate;
+       }
+       ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
+       return &ah->ibah;
+}
+
+struct ib_ah *create_rdmaoe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+                                  struct mlx4_ib_ah *ah)
+{
+       struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+       struct mlx4_dev *dev = ibdev->dev;
+       u8 mac[6];
+       int err;
+       int is_mcast;
+
+       if (mlx4_is_barred(pd->device->dma_device))
+               return ERR_PTR(-EFAULT);
+
+       err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast);
+       if (err)
+               return ERR_PTR(err);
+
+       memcpy(ah->av.eth.mac_0_1, mac, 2);
+       memcpy(ah->av.eth.mac_2_5, mac + 2, 4);
+       ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+       ah->av.ib.g_slid = 0x80;
+       if (ah_attr->static_rate) {
+               ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+               while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+                      !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+                       --ah->av.ib.stat_rate;
+       }
+
+       /*
+        * HW requires multicast LID so we just choose one.
+        */
+       if (is_mcast)
+               ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+       memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+       ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
        return &ah->ibah;
 }
 
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+       struct mlx4_ib_ah *ah;
+       enum rdma_transport_type transport;
+
+       struct ib_ah *ret;
+
+       ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+       if (!ah)
+               return ERR_PTR(-ENOMEM);
+
+       transport = rdma_port_get_transport(pd->device, ah_attr->port_num);
+       if (transport == RDMA_TRANSPORT_RDMAOE) {
+               if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+                       ret = ERR_PTR(-EINVAL);
+                       goto out;
+               } else {
+                       /* TBD: need to handle the case when we get called
+                       in an atomic context and there we might sleep. We
+                       don't expect this currently since we're working with
+                       link local addresses which we can translate without
+                       going to sleep */
+                       ret = create_rdmaoe_ah(pd, ah_attr, ah);
+                       if (IS_ERR(ret))
+                               goto out;
+                       else
+                               return ret;
+               }
+       } else
+               return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+out:
+       kfree(ah);
+       return ret;
+}
+
+
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 {
        struct mlx4_ib_ah *ah = to_mah(ibah);
+       enum rdma_transport_type transport;
+
+       transport = rdma_port_get_transport(ibah->device, ah_attr->port_num);
 
        if (mlx4_is_barred(ibah->device->dma_device))
                return -EFAULT;
 
        memset(ah_attr, 0, sizeof *ah_attr);
-       ah_attr->dlid          = be16_to_cpu(ah->av.dlid);
-       ah_attr->sl            = (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
-       ah_attr->port_num      = (u8)(be32_to_cpu(ah->av.port_pd) >> 24);
-       if (ah->av.stat_rate)
-               ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
-       ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+       ah_attr->dlid          = transport == RDMA_TRANSPORT_IB ? be16_to_cpu(ah->av.ib.dlid) : 0;
+       ah_attr->sl            = (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);
+       ah_attr->port_num      = (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24);
+       if (ah->av.ib.stat_rate)
+               ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
+       ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
 
        if (mlx4_ib_ah_grh_present(ah)) {
                ah_attr->ah_flags = IB_AH_GRH;
 
                ah_attr->grh.traffic_class =
-                       (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20);
+                       (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20);
                ah_attr->grh.flow_label =
-                       be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
-               ah_attr->grh.hop_limit  = ah->av.hop_limit;
-               ah_attr->grh.sgid_index = ah->av.gid_index;
-               memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+                       be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+               ah_attr->grh.hop_limit  = ah->av.ib.hop_limit;
+               ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+               memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
        }
 
        return 0;
@@ -108,7 +239,7 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah)
 // Leo: temporary 
 int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr )
 {
-       struct mlx4_av *av       = &to_mah(ibah)->av;
+       struct mlx4_av *av       = &to_mah(ibah)->av.ib;
        struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev;
 
        if (mlx4_is_barred(dev))
index 79375d9..98d38e4 100644 (file)
@@ -133,31 +133,21 @@ out:
        return err;
 }
 
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
-                             struct ib_port_attr *props)
-{
-       struct ib_smp *in_mad  = NULL;
-       struct ib_smp *out_mad = NULL;
-       int err = -ENOMEM;
 
-       if (mlx4_is_barred(ibdev->dma_device))
-               return -EFAULT;
-       
-       in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-       out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-       if (!in_mad || !out_mad)
-               goto out;
-
-       memset(props, 0, sizeof *props);
+static enum rdma_transport_type
+mlx4_ib_port_get_transport(struct ib_device *device, u8 port_num)
+{
+       struct mlx4_dev *dev = to_mdev(device)->dev;
 
-       init_query_mad(in_mad);
-       in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
-       in_mad->attr_mod = cpu_to_be32(port);
+       return dev->caps.port_mask & (1 << (port_num - 1)) ?
+               RDMA_TRANSPORT_IB : RDMA_TRANSPORT_RDMAOE;
+}
 
-       err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
-       if (err)
-               goto out;
 
+static void ib_link_query_port(struct ib_device *ibdev, u8 port,
+                              struct ib_port_attr *props,
+                              struct ib_smp *out_mad)
+{
        props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
        props->lmc              = out_mad->data[34] & 0x7;
        props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -177,6 +167,63 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
        props->subnet_timeout   = out_mad->data[51] & 0x1f;
        props->max_vl_num       = out_mad->data[37] >> 4;
        props->init_type_reply  = out_mad->data[41] >> 4;
+       props->transport= RDMA_TRANSPORT_IB;
+}
+
+static void eth_link_query_port(struct ib_device *ibdev, u8 port,
+                               struct ib_port_attr *props,
+                               struct ib_smp *out_mad)
+{
+
+       props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
+       props->gid_tbl_len      = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+       props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
+       props->pkey_tbl_len     = (u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port];
+       props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
+       props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
+       props->active_width     = out_mad->data[31] & 0xf;
+       props->active_speed     = out_mad->data[35] >> 4;
+       props->max_mtu          = out_mad->data[41] & 0xf;
+       //props->active_mtu     = rdmaoe->mtu[port - 1];
+       props->active_mtu       = 1500; //jyang:hardcoded
+       props->subnet_timeout   = out_mad->data[51] & 0x1f;
+       props->max_vl_num       = out_mad->data[37] >> 4;
+       props->init_type_reply  = out_mad->data[41] >> 4;
+       props->transport= RDMA_TRANSPORT_RDMAOE;
+
+       //props->state          = netif_running(ndev) &&  netif_oper_up(ndev) ?
+       //                              IB_PORT_ACTIVE : IB_PORT_DOWN;
+       props->state            = IB_PORT_ACTIVE; //jyang: just hardcoded it now
+       props->phys_state       = props->state;
+}
+
+
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+                             struct ib_port_attr *props)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       int err = -ENOMEM;
+
+       in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
+       out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+       if (!in_mad || !out_mad)
+               goto out;
+
+       memset(props, 0, sizeof *props);
+
+       init_query_mad(in_mad);
+       in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+       in_mad->attr_mod = cpu_to_be32(port);
+
+       err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+       if (err)
+               goto out;
+
+       mlx4_ib_port_get_transport(ibdev, port) == RDMA_TRANSPORT_IB ?
+               ib_link_query_port(ibdev, port, props, out_mad) :
+               eth_link_query_port(ibdev, port, props, out_mad);
 
 out:
        kfree(in_mad);
@@ -522,6 +569,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.uverbs_abi_ver    = MLX4_IB_UVERBS_ABI_VERSION;
        ibdev->ib_dev.query_device      = mlx4_ib_query_device;
        ibdev->ib_dev.query_port        = mlx4_ib_query_port;
+       ibdev->ib_dev.get_port_transport = mlx4_ib_port_get_transport;
        ibdev->ib_dev.query_gid_chunk   = mlx4_ib_query_gid_chunk;
        ibdev->ib_dev.query_pkey_chunk  = mlx4_ib_query_pkey_chunk;
        ibdev->ib_dev.modify_device     = mlx4_ib_modify_device;
index 92255af..c2a2cc8 100644 (file)
@@ -165,14 +165,15 @@ struct mlx4_ib_srq {
 
 struct mlx4_ib_ah {
        struct ib_ah            ibah;
-       struct mlx4_av          av;
+       union mlx4_ext_av   av;
 };
 
+
 enum {
        /*
         * Largest possible UD header: send with GRH and immediate data.
         */
-       MLX4_IB_UD_HEADER_SIZE          = 72
+       MLX4_IB_UD_HEADER_SIZE          = 76
 };
 
 struct mlx4_ib_sqp {
@@ -180,7 +181,10 @@ struct mlx4_ib_sqp {
        int                     pkey_index;
        u32                     qkey;
        u32                     send_psn;
-       struct ib_ud_header     ud_header;
+       union {
+               struct ib_ud_header     ib;
+               struct eth_ud_header    eth;
+       } hdr;
        u8                      header_buf[MLX4_IB_UD_HEADER_SIZE];
 };
 
@@ -340,9 +344,14 @@ void mlx4_ib_qp_init();
 int __init mlx4_ib_init(void);
 void __exit mlx4_ib_cleanup(void);
 
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
+                       u8 *mac, int *is_mcast);
+
+
 static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
-       return !!(ah->av.g_slid & 0x80);
+       return !!(ah->av.ib.g_slid & 0x80);
+
 }
 
 #endif /* MLX4_IB_H */
index 8ffca0f..263a47a 100644 (file)
@@ -46,7 +46,13 @@ enum {
 \r
 enum {\r
        MLX4_IB_DEFAULT_SCHED_QUEUE     = 0x83,\r
-       MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f\r
+       MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,\r
+       MLX4_IB_LINK_TYPE_IB            = 0,\r
+       MLX4_IB_LINK_TYPE_ETH           = 1\r
+};\r
+\r
+enum {\r
+       MLX4_RDMAOE_ETHERTYPE = 0x8915\r
 };\r
 \r
 enum {\r
@@ -62,9 +68,23 @@ static const __be32 mlx4_ib_opcode[] = {
        __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),          /*      [IB_WR_ATOMIC_CMP_AND_SWP]      */\r
        __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),          /*      [IB_WR_ATOMIC_FETCH_AND_ADD]*/\r
        __constant_cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)),     /*      [IB_WR_LSO]                                     */\r
+\r
+\r
+       __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),         /*      [IB_WR_SEND_WITH_INV]   */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),          /*      [IB_WR_RDMA_READ_WITH_INV]      */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),        /*      [IB_WR_LOCAL_INV]       */\r
+       __constant_cpu_to_be32(MLX4_OPCODE_FMR),                        /*      [IB_WR_FAST_REG_MR]     */\r
+\r
+\r
+\r
        __constant_cpu_to_be32(MLX4_OPCODE_NOP)                         /*      [IB_WR_NOP]                                     */\r
 };\r
 \r
+\r
+//?????????????????    IB_WR_RDMA_READ_WITH_INV,  //???????????????\r
+\r
+extern inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac);\r
+\r
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)\r
 {\r
        return container_of(mqp, struct mlx4_ib_sqp, qp);\r
@@ -724,6 +744,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,\r
                         struct mlx4_qp_path *path, u8 port)\r
 {\r
+       int err;\r
+       int is_eth = rdma_port_get_transport(&dev->ib_dev, port) ==\r
+               RDMA_TRANSPORT_RDMAOE ? 1 : 0;\r
+       u8 mac[6];\r
+       int is_mcast;\r
+\r
        path->grh_mylmc     = ah->src_path_bits & 0x7f;\r
        path->rlid          = cpu_to_be16(ah->dlid);\r
        if (ah->static_rate) {\r
@@ -754,7 +780,21 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
        path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |\r
                ((port - 1) << 6) | ((ah->sl & 0xf) << 2);\r
 \r
-       return 0;\r
+       if (is_eth) {\r
+               if (!(ah->ah_flags & IB_AH_GRH))\r
+                       return -1;\r
+\r
+               err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast);\r
+               if (err)\r
+                       return err;\r
+\r
+               memcpy(path->dmac, mac, 6);\r
+               path->ackto = MLX4_IB_LINK_TYPE_ETH;\r
+               /* use index 0 into MAC table for RDMAoE */\r
+               path->grh_mylmc &= 0x80;\r
+       }\r
+\r
+    return 0;\r
 }\r
 \r
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,\r
@@ -1146,79 +1186,132 @@ static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr)
        return opcode;\r
 }\r
 \r
+\r
+\r
+\r
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,\r
-                           void *wqe)\r
+                           void *wqe, unsigned *mlx_seg_len)\r
 {\r
        enum ib_wr_opcode opcode = to_wr_opcode(wr);\r
        struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;\r
        struct mlx4_wqe_mlx_seg *mlx = wqe;\r
        struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);\r
        struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);\r
-       __be16 pkey;\r
+       u16 pkey;\r
        int send_size;\r
        int header_size;\r
        int spc;\r
-       u32 i;\r
+       u16 i;\r
+       struct ib_ud_header *ib = NULL;\r
+       struct eth_ud_header *eth = NULL;\r
+       struct ib_unpacked_grh *grh;\r
+       struct ib_unpacked_bth  *bth;\r
+       struct ib_unpacked_deth *deth;\r
+       u8 *tmp;\r
+       u8 mac[6];\r
 \r
        send_size = 0;\r
        for (i = 0; i < wr->num_ds; ++i)\r
                send_size += wr->ds_array[i].length;\r
 \r
-       ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);\r
+       if (rdma_port_get_transport(sqp->qp.ibqp.device, sqp->qp.port) == RDMA_TRANSPORT_IB) {\r
+\r
+               ib = &sqp->hdr.ib;\r
+               grh = &ib->grh;\r
+               bth = &ib->bth;\r
+               deth = &ib->deth;\r
+               ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), ib);\r
+               ib->lrh.service_level   =\r
+                       (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);\r
+               ib->lrh.destination_lid = ah->av.ib.dlid;\r
+               ib->lrh.source_lid      = cpu_to_be16(ah->av.ib.g_slid & 0x7f);\r
+       } else {\r
+               eth = &sqp->hdr.eth;\r
+               grh = &eth->grh;\r
+               bth = &eth->bth;\r
+               deth = &eth->deth;\r
+               ib_rdmaoe_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), eth);\r
+       }\r
 \r
-       sqp->ud_header.lrh.service_level   =\r
-               (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);\r
-       sqp->ud_header.lrh.destination_lid = ah->av.dlid;\r
-       sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);\r
+       \r
        if (mlx4_ib_ah_grh_present(ah)) {\r
-               sqp->ud_header.grh.traffic_class =\r
-                       (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);\r
-               sqp->ud_header.grh.flow_label    =\r
-                       ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
-               sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;\r
-               ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),\r
-                                 ah->av.gid_index, &sqp->ud_header.grh.source_gid);\r
-               memcpy(sqp->ud_header.grh.destination_gid.raw,\r
-                      ah->av.dgid, 16);\r
+               grh->traffic_class =\r
+                       (u8)((be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff);\r
+               grh->flow_label    =\r
+                       ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);\r
+               grh->hop_limit     = ah->av.ib.hop_limit;\r
+               ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24),\r
+                                 ah->av.ib.gid_index, &grh->source_gid);\r
+               memcpy(grh->destination_gid.raw,\r
+                          ah->av.ib.dgid, 16);\r
        }\r
 \r
        mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);\r
-       mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
-                                 (sqp->ud_header.lrh.destination_lid ==\r
-                                  XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
-                                 (sqp->ud_header.lrh.service_level << 8));\r
-       mlx->rlid   = sqp->ud_header.lrh.destination_lid;\r
+\r
+       if (ib) {\r
+               mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |\r
+                                         (ib->lrh.destination_lid ==\r
+                                          IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |\r
+                                         (ib->lrh.service_level << 8));\r
+               mlx->rlid   = ib->lrh.destination_lid;\r
+\r
+       }\r
 \r
        switch (opcode) {\r
        case IB_WR_SEND:\r
-               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;\r
-               sqp->ud_header.immediate_present = 0;\r
+               bth->opcode      = IB_OPCODE_UD_SEND_ONLY;\r
+               if (ib)\r
+                       ib->immediate_present = 0;\r
+               else\r
+                       eth->immediate_present = 0;\r
                break;\r
        case IB_WR_SEND_WITH_IMM:\r
-               sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
-               sqp->ud_header.immediate_present = 1;\r
-               sqp->ud_header.immediate_data    = wr->immediate_data;\r
+               bth->opcode      = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;\r
+               if (ib) {\r
+                       ib->immediate_present = 1;\r
+                       ib->immediate_data    = wr->immediate_data;\r
+               } else {\r
+                       eth->immediate_present = 1;\r
+                       eth->immediate_data    = wr->immediate_data;\r
+               }\r
                break;\r
        default:\r
                return -EINVAL;\r
        }\r
 \r
-       sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
-       if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)\r
-               sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;\r
-       sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED));\r
+       if (ib) {\r
+               ib->lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;\r
+               if (ib->lrh.destination_lid == IB_LID_PERMISSIVE)\r
+                       ib->lrh.source_lid = IB_LID_PERMISSIVE;\r
+       } else {\r
+               memcpy(eth->eth.dmac_h, ah->av.eth.mac_0_1, 2);\r
+               memcpy(eth->eth.dmac_h + 2, ah->av.eth.mac_2_5, 2);\r
+               memcpy(eth->eth.dmac_l, ah->av.eth.mac_2_5 + 2, 2);\r
+               rdma_get_ll_mac((struct in6_addr *)&grh->source_gid, mac);\r
+\r
+               tmp = mac;\r
+               memcpy(eth->eth.smac_h, tmp, 2);\r
+               memcpy(eth->eth.smac_l, tmp + 2, 4);\r
+               eth->eth.type = cpu_to_be16(MLX4_RDMAOE_ETHERTYPE);\r
+       }\r
+\r
+       bth->solicited_event = (u8)(!!(wr->send_opt & IB_SEND_SOLICITED));\r
+\r
        if (!sqp->qp.ibqp.qp_num)\r
                ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);\r
        else\r
                ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey);\r
-       sqp->ud_header.bth.pkey = pkey;\r
-       sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;\r
-       sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
-       sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?\r
-               cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
-       sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
-\r
-       header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);\r
+       bth->pkey = pkey;\r
+       bth->destination_qpn = wr->dgrm.ud.remote_qp;\r
+       bth->psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));\r
+       deth->qkey = wr->dgrm.ud.remote_qkey & 0x80000000 ?\r
+                                                  cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;\r
+       deth->source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);\r
+\r
+       if (ib)\r
+               header_size = ib_ud_header_pack(ib, sqp->header_buf);\r
+       else\r
+               header_size = rdmaoe_ud_header_pack(eth, sqp->header_buf);\r
 \r
 #if 0\r
        {\r
@@ -1271,7 +1364,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,
                i = 2;\r
        }\r
 \r
-       return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
+       *mlx_seg_len =\r
+               ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);\r
+       return 0;\r
+\r
 }\r
 \r
 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)\r
@@ -1314,9 +1410,13 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr)
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,\r
                             ib_send_wr_t *wr)\r
 {\r
+\r
        memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));\r
        dseg->dqpn = wr->dgrm.ud.remote_qp;\r
        dseg->qkey = wr->dgrm.ud.remote_qkey;\r
+       dseg->vlan = to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.vlan;\r
+       memcpy(dseg->mac_0_1, to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.mac_0_1, 6);\r
+\r
 }\r
 \r
 static void set_mlx_icrc_seg(void *dseg)\r
@@ -1398,7 +1498,7 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, ib_send_wr_t *wr,
 int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,\r
                      ib_send_wr_t **bad_wr)\r
 {\r
-       enum ib_wr_opcode opcode;\r
+       enum ib_wr_opcode opcode;// = to_wr_opcode(wr);\r
        struct mlx4_ib_qp *qp = to_mqp(ibqp);\r
        struct mlx4_dev *dev = to_mdev(ibqp->device)->dev;\r
        u8 *wqe /*, *wqe_start*/;\r
@@ -1525,16 +1625,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
 \r
                case IB_QPT_SMI:\r
                case IB_QPT_GSI:\r
-                       err = build_mlx_header(to_msqp(qp), wr, ctrl);\r
+                       err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);\r
                        if (err < 0) {\r
                                if (bad_wr)\r
                                        *bad_wr = wr;\r
                                goto out;\r
                        }\r
-                       \r
-                       wqe  += err;\r
-                       size += err / 16;\r
-\r
+                       wqe  += seglen;\r
+                       size += seglen / 16;\r
                        err = 0;\r
                        break;\r
 \r
index 94f01a4..56e2be5 100644 (file)
@@ -138,6 +138,7 @@ enum {
        MLX4_SET_PORT_MAC_TABLE = 0x2,
        MLX4_SET_PORT_VLAN_TABLE = 0x3,
        MLX4_SET_PORT_PRIO_MAP  = 0x4,
+       MLX4_SET_PORT_GID_TABLE = 0x5,
 };
 
 struct mlx4_dev;
index 5f6f134..daf2bee 100644 (file)
@@ -208,8 +208,9 @@ struct mlx4_caps {
        int                     log_num_prios;
        int                     num_fc_exch;
        enum mlx4_port_type     port_type[MLX4_MAX_PORTS + 1];
-    enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
-    int                        reserved_fexch_mpts_base;   
+       u32                     port_mask;
+       enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
+       int                     reserved_fexch_mpts_base;   
        int                     total_reserved_qps;
 };
 
@@ -343,6 +344,28 @@ struct mlx4_av {
        u8                      dgid[16];
 };
 
+struct mlx4_eth_av {
+       __be32          port_pd;
+       u8              reserved1;
+       u8              smac_idx;
+       u16             reserved2;
+       u8              reserved3;
+       u8              gid_index;
+       u8              stat_rate;
+       u8              hop_limit;
+       __be32          sl_tclass_flowlabel;
+       u8              dgid[16];
+       u32             reserved4[2];
+       __be16          vlan;
+       u8              mac_0_1[2];
+       u8              mac_2_5[4];
+};
+
+union mlx4_ext_av {
+       struct mlx4_av          ib;
+       struct mlx4_eth_av      eth;
+};
+
 #define MLX4_DEV_SIGNATURE     0xf1b34a6e
 
 struct mlx4_dev_params {
index ac7283d..6c50e11 100644 (file)
@@ -39,6 +39,7 @@
 
 enum {
        IB_LRH_BYTES  = 8,
+       IB_ETH_BYTES  = 14,
        IB_GRH_BYTES  = 40,
        IB_BTH_BYTES  = 12,
        IB_DETH_BYTES = 8
@@ -212,6 +213,15 @@ struct ib_unpacked_deth {
        __be32       source_qpn;
 };
 
+struct ib_unpacked_eth {
+       u8      dmac_h[4];
+       u8      dmac_l[2];
+       u8      smac_h[2];
+       u8      smac_l[4];
+       __be16  type;
+};
+
+
 struct ib_ud_header {
        struct ib_unpacked_lrh  lrh;
        int                     grh_present;
@@ -222,6 +232,19 @@ struct ib_ud_header {
        __be32                  immediate_data;
 };
 
+
+
+struct eth_ud_header {
+       struct ib_unpacked_eth  eth;
+       int                     grh_present;
+       struct ib_unpacked_grh  grh;
+       struct ib_unpacked_bth  bth;
+       struct ib_unpacked_deth deth;
+       int                     immediate_present;
+       __be32                  immediate_data;
+};
+
+
 void ib_pack(const struct ib_field        *desc,
             int                           desc_len,
             void                         *structure,
@@ -236,10 +259,18 @@ void ib_ud_header_init(int                   payload_bytes,
                       int                 grh_present,
                       struct ib_ud_header *header);
 
+void ib_rdmaoe_ud_header_init(int                 payload_bytes,
+                          int                     grh_present,
+                          struct eth_ud_header   *header);
+
 int ib_ud_header_pack(struct ib_ud_header *header,
                      void                *buf);
 
 int ib_ud_header_unpack(void                *buf,
                        struct ib_ud_header *header);
 
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+                      void                 *buf);
+
+
 #endif /* IB_PACK_H */
index a832ae8..c8bd01b 100644 (file)
@@ -53,6 +53,34 @@ union ib_gid {
 \r
 #include "ib_verbs_ex.h"\r
 \r
+/*\r
+ *     IPv6 address structure\r
+ */\r
+\r
+struct in6_addr\r
+{\r
+       union \r
+       {\r
+               __u8            u6_addr8[16];\r
+               __be16          u6_addr16[8];\r
+               __be32          u6_addr32[4];\r
+       } in6_u;\r
+#define s6_addr                        in6_u.u6_addr8\r
+#define s6_addr16              in6_u.u6_addr16\r
+#define s6_addr32              in6_u.u6_addr32\r
+};\r
+\r
+\r
+struct sockaddr_in6 {\r
+       unsigned short int      sin6_family;    /* AF_INET6 */\r
+       __be16                  sin6_port;      /* Transport layer port # */\r
+       __be32                  sin6_flowinfo;  /* IPv6 flow information */\r
+       struct in6_addr         sin6_addr;      /* IPv6 address */\r
+       __u32                   sin6_scope_id;  /* scope id (new in RFC2553) */\r
+};\r
+\r
+#define AF_INET6       10      /* IP version 6                 */\r
+\r
 enum rdma_node_type {\r
        /* IB values map to NodeInfo:NodeType. */\r
        RDMA_NODE_IB_CA         = 1,\r
@@ -63,7 +91,8 @@ enum rdma_node_type {
 \r
 enum rdma_transport_type {\r
        RDMA_TRANSPORT_IB,\r
-       RDMA_TRANSPORT_IWARP\r
+       RDMA_TRANSPORT_IWARP,\r
+       RDMA_TRANSPORT_RDMAOE\r
 };\r
 \r
 enum rdma_transport_type\r
@@ -231,6 +260,7 @@ struct ib_port_attr {
        u8                      active_width;\r
        u8                      active_speed;\r
        u8                      phys_state;\r
+       enum rdma_transport_type        transport;\r
 };\r
 \r
 enum ib_device_modify_flags {\r
@@ -633,6 +663,10 @@ enum ib_wr_opcode {
        IB_WR_ATOMIC_CMP_AND_SWP,\r
        IB_WR_ATOMIC_FETCH_AND_ADD,\r
        IB_WR_LSO,\r
+       IB_WR_SEND_WITH_INV,\r
+       IB_WR_RDMA_READ_WITH_INV,\r
+       IB_WR_LOCAL_INV,\r
+       IB_WR_FAST_REG_MR,\r
        IB_WR_NOP\r
 };\r
 \r
@@ -920,6 +954,9 @@ struct ib_device {
        int                        (*query_port)(struct ib_device *device,\r
                                                 u8 port_num,\r
                                                 struct ib_port_attr *port_attr);\r
+       enum rdma_transport_type   (*get_port_transport)(struct ib_device *device,\r
+                                                        u8 port_num);\r
+\r
        int                        (*query_gid_chunk)(struct ib_device *device,\r
                                                u8 port_num, int index,\r
                                                union ib_gid gid[8], int size);\r
@@ -1127,6 +1164,11 @@ int ib_query_device(struct ib_device *device,
 int ib_query_port(struct ib_device *device,\r
                  u8 port_num, struct ib_port_attr *port_attr);\r
 \r
+enum rdma_transport_type rdma_port_get_transport(struct ib_device *device,\r
+                                                u8 port_num);\r
+int rdma_is_transport_supported(struct ib_device *device,\r
+                               enum rdma_transport_type transport);\r
+\r
 int ib_query_gid_chunk(struct ib_device *device,\r
                 u8 port_num, int index, union ib_gid gid[8], int size);\r
 \r
index 89e3295..a6ba237 100644 (file)
@@ -113,7 +113,9 @@ struct mlx4_qp_path {
        u8                      snooper_flags;
        u8                      reserved3[2];
        u8                      counter_index;
-       u8                      reserved4[7];
+       u8                      reserved4;
+       u8                      dmac[6];
+
 };
 
 struct mlx4_qp_context {
@@ -213,7 +215,9 @@ struct mlx4_wqe_datagram_seg {
        __be32                  av[8];
        __be32                  dqpn;
        __be32                  qkey;
-       __be32                  reservd[2];
+       __be16                  vlan;
+       u8                      mac_0_1[2];
+       u8                      mac_2_5[4];
 };
 
 #pragma warning( disable : 4200)
index 37441cf..e06a125 100644 (file)
@@ -31,7 +31,7 @@ SOURCES= net.rc               \
        srq.c                   \\r
         port.c                  \\r
 \r
-INCLUDES=..;..\inc;..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\..\inc\kernel;\r
+INCLUDES=..;..\inc;..\..\inc;..\..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\..\inc\kernel;\r
 \r
 C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS \r
 #-DFORCE_LIVEFISH\r
index c9fdc1d..54fd42c 100644 (file)
@@ -170,6 +170,16 @@ BOOLEAN mlx4_is_enabled_port(struct mlx4_dev *dev, int port_number)
        return FALSE;
 }
 
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+       int i;
+
+       dev->caps.port_mask = 0;
+       for (i = 1; i <= dev->caps.num_ports; ++i)
+               if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
+                       dev->caps.port_mask |= 1 << (i - 1);
+}
+
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        int err;
@@ -309,6 +319,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                        ++num_eth_ports;
        }
 
+       mlx4_set_port_mask(dev);
+
        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
index 0381400..3bc3b80 100644 (file)
@@ -33,7 +33,9 @@
 \r
 #include "mlx4.h"\r
 #include "cmd.h"\r
+#include "public.h"\r
 \r
+extern NTSTATUS __create_child();\r
 \r
 void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port)\r
 {\r
@@ -88,6 +90,52 @@ static int mlx4_SET_PORT_mac_table(struct mlx4_dev *dev, u8 port,
        return err;\r
 }\r
 \r
+static void mlx4_addrconf_ifid_eui48_win(u8 *eui, u64 mac)\r
+{\r
+    u8 *p = (u8*)&mac+2; //mac 6 bytes\r
+       memcpy(eui, p, 3);\r
+       memcpy(eui + 5, p + 3, 3);\r
+       eui[3] = 0xFF;\r
+       eui[4] = 0xFE;\r
+       eui[0] ^= 2;\r
+}\r
+\r
+\r
+static int update_ipv6_gids_win(struct mlx4_dev *dev, int port, int clear, u64 mac)\r
+{\r
+       struct mlx4_cmd_mailbox *mailbox;\r
+       union ib_gid *gids, *tmpgids;\r
+       int err;\r
+\r
+       tmpgids = kzalloc(128 * sizeof *gids, GFP_ATOMIC);\r
+       if (!tmpgids)\r
+               return -ENOMEM;\r
+\r
+       if (!clear) {\r
+               mlx4_addrconf_ifid_eui48_win(&tmpgids[0].raw[8], cpu_to_be64(mac));\r
+               tmpgids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);\r
+       }\r
+\r
+       mailbox = mlx4_alloc_cmd_mailbox(dev);\r
+       if (IS_ERR(mailbox)) {\r
+               err = PTR_ERR(mailbox);\r
+               goto out;\r
+       }\r
+\r
+       gids = mailbox->buf;\r
+       memcpy(gids, tmpgids, 128 * sizeof *gids);\r
+\r
+       err = mlx4_cmd(dev, mailbox->dma.da, MLX4_SET_PORT_GID_TABLE << 8 | port,\r
+                      1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);\r
+\r
+       mlx4_free_cmd_mailbox(dev, mailbox);\r
+\r
+out:\r
+       kfree(tmpgids);\r
+       return err;\r
+}\r
+\r
+\r
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)\r
 {\r
        struct mlx4_mac_table *table =\r
@@ -136,6 +184,26 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
 \r
        *index = free;\r
        ++table->total;\r
+\r
+       //update port guid with mac address\r
+       update_ipv6_gids_win(dev, port, 0, mac);\r
+\r
+#if 0\r
+\r
+// TODO: Tzachid 9/12/2009 Need to think of a better way of how to create the LLE\r
+// interface\r
+\r
+   \r
+       if(!InterlockedExchange(&dev->pdev->ib_hca_created, 1))\r
+       {\r
+       NTSTATUS status = STATUS_SUCCESS;\r
+               status = __create_child(dev->pdev->p_wdf_device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 );\r
+               if (!NT_SUCCESS(status)) {\r
+                        mlx4_err(dev, "__create_child (ib)failed with 0x%x\n", status);\r
+                        dev->pdev->ib_hca_created = FALSE;\r
+               }\r
+       }\r
+#endif\r
 out:\r
        up(&table->mac_sem);\r
        return err;\r
index ccd8393..3184874 100644 (file)
@@ -74,6 +74,7 @@ mlnx_create_av (
        p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr);
        if (IS_ERR(p_ib_ah)) {
                err = PTR_ERR(p_ib_ah);
+               status = errno_to_iberr(err);
                HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n", err));
                goto err_create_ah;
        }
index 5962a14..52630b7 100644 (file)
@@ -339,6 +339,7 @@ from_hca_cap(
                        ibal_port_p->max_vls    = mthca_port_p->max_vl_num;\r
                        ibal_port_p->sm_lid     = cl_ntoh16(mthca_port_p->sm_lid);\r
                        ibal_port_p->sm_sl      = mthca_port_p->sm_sl;\r
+                       ibal_port_p->transport  = mthca_port_p->transport;\r
                        ibal_port_p->link_state = (mthca_port_p->state != 0) ? (uint8_t)mthca_port_p->state : IB_LINK_DOWN;\r
                        ibal_port_p->num_gids   = (uint16_t)mthca_port_p->gid_tbl_len;\r
                        ibal_port_p->num_pkeys  = mthca_port_p->pkey_tbl_len;\r
index 819cb42..a46ea43 100644 (file)
@@ -185,6 +185,8 @@ struct pci_dev
        DMA_ADAPTER             *                               p_dma_adapter;  /* HCA adapter object */
        DEVICE_OBJECT   *                               p_self_do;              /* mlx4_bus's FDO */
        DEVICE_OBJECT   *                               pdo;                    /* mlx4_bus's PDO */
+       PVOID                           p_wdf_device;   /* wdf_device */
+       LONG                                                    ib_hca_created;
        // mlx4_ib: various objects and info    
        struct ib_device *                              ib_dev;
        // mlx4_net: various objects and info   
index c7cdf38..4f942ff 100644 (file)
@@ -9419,6 +9419,8 @@ typedef struct _ib_port_attr
        TO_LONG_PTR(ib_gid_t*,  p_gid_table);\r
        TO_LONG_PTR(ib_net16_t*,p_pkey_table);\r
 \r
+       enum rdma_transport_type        transport;\r
+\r
 }      ib_port_attr_t;\r
 /*\r
 * SEE ALSO\r
index 1b0f1f4..7465a92 100644 (file)
@@ -8861,7 +8861,11 @@ typedef struct _ib_port_attr {
         */\r
        ib_gid_t *p_gid_table;\r
        ib_net16_t *p_pkey_table;\r
+\r
+       enum rdma_transport_type        transport;\r
+\r
 } ib_port_attr_t;\r
+\r
 /*\r
 * SEE ALSO\r
 *      uint8_t, ib_port_cap_t, ib_link_states_t\r