[HW] improved memory allocation mechanism. (posix_memalign was implemented by Virtual...
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 1 Jul 2008 10:33:46 +0000 (10:33 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Tue, 1 Jul 2008 10:33:46 +0000 (10:33 +0000)
git-svn-id: svn://openib.tc.cornell.edu/gen1/trunk@1311 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

21 files changed:
hw/mlx4/kernel/bus/core/l2w_umem.c
hw/mlx4/kernel/bus/ib/cq.c
hw/mlx4/kernel/bus/ib/doorbell.c
hw/mlx4/kernel/bus/ib/mr.c
hw/mlx4/kernel/bus/ib/qp.c
hw/mlx4/kernel/bus/ib/srq.c
hw/mlx4/kernel/hca/mr.c
hw/mlx4/kernel/inc/l2w_umem.h
hw/mlx4/user/hca/buf.c
hw/mlx4/user/hca/l2w.h
hw/mlx4/user/hca/qp.c
hw/mlx4/user/hca/srq.c
hw/mlx4/user/hca/verbs.c
hw/mthca/kernel/hca_memory.c
hw/mthca/kernel/ib_verbs.h
hw/mthca/kernel/mt_verbs.c
hw/mthca/kernel/mthca_provider.c
hw/mthca/user/mlnx_ual_srq.c
hw/mthca/user/mlnx_uvp_memfree.c
hw/mthca/user/mlnx_uvp_verbs.c
hw/mthca/user/mt_l2w.h

index ebb5061..e2906ef 100644 (file)
@@ -9,8 +9,20 @@
 void ib_umem_release(struct ib_umem *p_ib_umem)
 {
        MLX4_ENTER(MLX4_DBG_MEMORY);
-       if (p_ib_umem->secure_handle)
-               MmUnsecureVirtualMemory( p_ib_umem->secure_handle );
+       if (p_ib_umem->secure_handle) {
+               __try {
+                       MmUnsecureVirtualMemory( p_ib_umem->secure_handle );
+                       p_ib_umem->secure_handle = NULL;
+               }
+               __except (EXCEPTION_EXECUTE_HANDLER) {
+                       NTSTATUS Status = GetExceptionCode();
+                       UNUSED_PARAM_WOWPP(Status);
+                       MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,
+                               ("Exception 0x%x on MmUnsecureVirtualMemory(), addr %I64x, size %I64x, seg_num %d, nr_pages %d\n", 
+                               Status, p_ib_umem->iobuf.va, (u64)p_ib_umem->iobuf.size, 
+                               p_ib_umem->iobuf.seg_num, p_ib_umem->iobuf.nr_pages ));
+               }
+       }
        if (p_ib_umem->iobuf_used)
                iobuf_deregister_with_cash(&p_ib_umem->iobuf);
        kfree(p_ib_umem);
@@ -26,7 +38,7 @@ void ib_umem_release(struct ib_umem *p_ib_umem)
  * @access: IB_ACCESS_xxx flags for memory being pinned
  */
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr,
-                           size_t size, enum ib_access_flags access)
+                           size_t size, enum ib_access_flags access, boolean_t secure)
 {
        int err;
        struct ib_umem *p_ib_umem;
@@ -52,7 +64,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr,
        // TODO: map the memory for DMA
        
        // secure memory
-       if (!context)
+       if (!context || !secure)
                goto done;
        __try {
                p_ib_umem->secure_handle = MmSecureVirtualMemory ( 
index 9bfcd91..c24a166 100644 (file)
@@ -142,7 +142,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
                }
 
                cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
-                                      IB_ACCESS_LOCAL_WRITE);
+                                      IB_ACCESS_LOCAL_WRITE, FALSE);
                if (IS_ERR(cq->umem)) {
                        err = PTR_ERR(cq->umem);
                        goto err_cq;
index 0d9a6f3..43f2621 100644 (file)
@@ -182,7 +182,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, u64 virt,
        page->user_virt = virt & (u64)PAGE_MASK;
        page->refcnt    = 0;
        page->umem      = ib_umem_get(&context->ibucontext, virt & (u64)PAGE_MASK,
-                                     PAGE_SIZE, 0);
+                                     PAGE_SIZE, 0, FALSE);
        if (IS_ERR(page->umem)) {
                err = PTR_ERR(page->umem);
                kfree(page);
index 4608e58..1475d9e 100644 (file)
@@ -129,7 +129,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       mr->umem = ib_umem_get(pd->p_uctx, start, (size_t)length, access_flags);
+       mr->umem = ib_umem_get(pd->p_uctx, start, (size_t)length, access_flags, TRUE);
        if (IS_ERR(mr->umem)) {
                // there can be also second reason of failue - insufficient memory,
                // but we can't get awared of that without changing ib_umem_get prototype
index 720bf17..7cb3244 100644 (file)
@@ -360,7 +360,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err;
 
                qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-                                      qp->buf_size, 0);
+                                      qp->buf_size, 0, FALSE);
                if (IS_ERR(qp->umem)) {
                        err = PTR_ERR(qp->umem);
                        goto err;
index 445265b..2148584 100644 (file)
@@ -116,7 +116,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                }
 
                srq->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-                                       buf_size, 0);
+                                       buf_size, 0, FALSE);
                if (IS_ERR(srq->umem)) {
                        err = PTR_ERR(srq->umem);
                        goto err_srq;
index 28afbbe..2ec55dc 100644 (file)
@@ -170,7 +170,7 @@ mlnx_register_pmr (
        if (IS_ERR(p_ib_mr)) {\r
                err = PTR_ERR(p_ib_mr);\r
                HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,\r
-                       ("mthca_reg_phys_mr failed (%d)\n", err));\r
+                       ("ib_reg_phys_mr failed (%d)\n", err));\r
                status = errno_to_iberr(err);\r
                goto err_reg_phys_mr;\r
        }\r
index e367eb4..682dcfd 100644 (file)
@@ -15,7 +15,7 @@ struct ib_umem {
 void ib_umem_release(struct ib_umem *p_ib_umem);
 
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr,
-                           size_t size, enum ib_access_flags access);
+                           size_t size, enum ib_access_flags access, boolean_t secure);
 
 int ib_umem_page_count(struct ib_umem *p_ib_umem);
 
index a54b38d..0f01bd9 100644 (file)
 int mlx4_alloc_buf(struct mlx4_buf *buf, int size, int page_size)
 {
        int ret;
-
        ret = posix_memalign(&buf->buf, page_size, align(size, page_size));
-       if (ret)
-               return ret;
-
-       buf->length = size;
-
-       return 0;
+       if (!ret)
+               buf->length = size;
+       return ret;
 }
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-       VirtualFree(buf->buf, 0, MEM_RELEASE);
+       posix_memfree(buf->buf);
 }
index 2c9cbfe..b2fff31 100644 (file)
@@ -74,15 +74,47 @@ typedef int32_t     __s32;
 // FUNCTIONS
 // ===========================================
 
+static inline BOOLEAN is_power_of_2(uint32_t n)
+{
+       return (!!n & !(n & (n-1))) ? TRUE : FALSE;
+}
+
+// Allocated memory is zeroed !
 static inline int posix_memalign(void **memptr, int alignment, int size)
 {
-       UNREFERENCED_PARAMETER(alignment);
+       int aligned_size, desc_size = sizeof(int);
+       char *real_addr, *aligned_addr;
+
+       // sanity check: alignment should a power of 2 and more then 2
+       if ( alignment < desc_size || !is_power_of_2((uint32_t)alignment) )
+               return -EINVAL;
+
+       // calculate size, needed for aligned allocation
+       aligned_size = size + alignment + desc_size;
+
+       // allocate
+       real_addr = cl_zalloc(aligned_size);
+       if ( real_addr == NULL )
+               return -ENOMEM;
+
+       // calculate aligned address
+       aligned_addr = (char *)(((ULONG_PTR)(real_addr + alignment-1)) & ~(alignment - 1));
+       if ( aligned_addr < real_addr + desc_size )
+               aligned_addr += alignment;
+
+       // store the descriptor
+       *(int*)(aligned_addr - desc_size) = (int)(aligned_addr - real_addr);
+       
+       *memptr = aligned_addr;
+       return 0;
+}
 
-       *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE,  PAGE_READWRITE );
-       if (*memptr) 
-               return 0;
-       else    
-               return ENOMEM;
+// there is no such POSIX function. Called so to be similar to the allocation one.
+static inline void posix_memfree(void *memptr)
+{
+       int *desc_addr = (int*)((char*)memptr - sizeof(int));
+       char *real_addr = (char*)memptr - *desc_addr;
+       cl_free(real_addr);
 }
 
 static inline int ffsl(uint32_t x)
index 96879e9..1e2a027 100644 (file)
@@ -685,7 +685,6 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
                return -1;
        }
 
-       memset(qp->buf.buf, 0, qp->buf_size);
        mlx4_qp_init_sq_ownership(qp);
 
        return 0;
index 656d191..e0c2f4e 100644 (file)
@@ -146,8 +146,6 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
                return -1;
        }
 
-       // srq->buf.buf is zeroed in posix_memalign - memset(srq->buf.buf, 0, buf_size);
-
        /*
         * Now initialize the SRQ buffer so that all of the WQEs are
         * linked into the list of free WQEs.
index 89ca274..9a16c63 100644 (file)
@@ -373,8 +373,6 @@ mlx4_pre_create_cq (
                                                context->page_size))
                goto err_alloc_buf;
 
-       // cq->buf.buf is zeroed in posix_memalign - memset(cq->buf.buf, 0, buf_size);
-
        cq->ibv_cq.context = context;
        cq->cons_index = 0;
                
@@ -718,7 +716,7 @@ mlx4_pre_create_qp (
        attr.cap.max_recv_wr            = p_create_attr->rq_depth;
        attr.cap.max_send_sge           = p_create_attr->sq_sge;
        attr.cap.max_recv_sge           = p_create_attr->rq_sge;
-       attr.cap.max_inline_data        = p_create_attr->sq_max_inline;         /* absent in IBAL */
+       attr.cap.max_inline_data        = p_create_attr->sq_max_inline;
        attr.qp_type                            = __to_qp_type(p_create_attr->qp_type);
        attr.sq_sig_all                         = p_create_attr->sq_signaled;
 
index 474c62f..f73aee5 100644 (file)
@@ -88,7 +88,7 @@ mlnx_register_mr (
        // register mr \r
        mr_p = ibv_reg_mr(ib_pd_p, map_qp_ibal_acl(p_mr_create->access_ctrl), \r
                p_mr_create->vaddr, p_mr_create->length, \r
-               (uint64_t)p_mr_create->vaddr, um_call );\r
+               (uint64_t)p_mr_create->vaddr, um_call, TRUE );\r
        if (IS_ERR(mr_p)) {\r
                err = PTR_ERR(mr_p);\r
                HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,\r
index 7d09116..20d5d70 100644 (file)
@@ -729,7 +729,7 @@ struct ib_device {
                                                  u64 *iova_start);
        struct ib_mr *                     (*reg_virt_mr)(struct ib_pd *pd, 
                                                void* FUNC_PTR64        vaddr, uint64_t length, uint64_t hca_va,
-                                               mthca_qp_access_t acc, boolean_t um_call);
+                                               mthca_qp_access_t acc, boolean_t um_call, boolean_t secure);
        int                        (*query_mr)(struct ib_mr *mr,
                                               struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
@@ -1140,13 +1140,15 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
  * @hca_va: virtual address in HCA
  * @mr_access_flags: Specifies the memory access rights.
  * @um_call: call from user, when TRUE.
+ * @secure: secure the memory from releasing (only for um_call == TRUE)
  */
 struct ib_mr *ibv_reg_mr(struct ib_pd *pd, 
        mthca_qp_access_t mr_access_flags,
        void* FUNC_PTR64                        vaddr,
        uint64_t                                length,
        uint64_t                                hca_va,
-       boolean_t                       um_call
+       boolean_t                               um_call,
+       boolean_t                               secure
        );
 
 /**
index 43c6cfa..5233226 100644 (file)
@@ -171,7 +171,7 @@ struct ib_ah *ibv_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
                        pd, 
                        create_ah->mr.access_flags, 
                        (void*)(ULONG_PTR)create_ah->mr.start,
-                       create_ah->mr.length, create_ah->mr.hca_va, TRUE );
+                       create_ah->mr.length, create_ah->mr.hca_va, TRUE, FALSE );
                if (IS_ERR(ib_mr)) {
                        err = PTR_ERR(ib_mr);
                        HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV  ,("ibv_reg_mr failed (%d)\n", err));
@@ -331,7 +331,7 @@ struct ib_srq *ibv_create_srq(struct ib_pd *pd,
                        (struct ib_pd *)(ULONG_PTR)create_srp->mr.pd_handle, 
                        create_srp->mr.access_flags, 
                        (void*)(ULONG_PTR)create_srp->mr.start,
-                       create_srp->mr.length, create_srp->mr.hca_va, TRUE );
+                       create_srp->mr.length, create_srp->mr.hca_va, TRUE, FALSE );
                if (IS_ERR(ib_mr)) {
                        err = PTR_ERR(ib_mr);
                        HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed (%d)\n", err));
@@ -453,7 +453,7 @@ struct ib_qp *ibv_create_qp(struct ib_pd *pd,
                        (struct ib_pd *)(ULONG_PTR)create_qp->mr.pd_handle, 
                        create_qp->mr.access_flags, 
                        (void*)(ULONG_PTR)create_qp->mr.start,
-                       create_qp->mr.length, create_qp->mr.hca_va, TRUE );
+                       create_qp->mr.length, create_qp->mr.hca_va, TRUE, FALSE );
                if (IS_ERR(ib_mr)) {
                        err = PTR_ERR(ib_mr);
                        HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed (%d)\n", err));
@@ -598,7 +598,7 @@ struct ib_cq *ibv_create_cq(struct ib_device *device,
                        (struct ib_pd *)(ULONG_PTR)create_cq->mr.pd_handle, 
                        create_cq->mr.access_flags, 
                        (void*)(ULONG_PTR)create_cq->mr.start,
-                       create_cq->mr.length, create_cq->mr.hca_va, TRUE );
+                       create_cq->mr.length, create_cq->mr.hca_va, TRUE, FALSE );
                if (IS_ERR(ib_mr)) {
                        err = PTR_ERR(ib_mr);
                        HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("ibv_reg_mr failed (%d)\n", err));
@@ -688,14 +688,15 @@ struct ib_mr *ibv_reg_mr(struct ib_pd *pd,
        void* FUNC_PTR64                        vaddr,
        uint64_t                                length,
        uint64_t                                hca_va,
-       boolean_t                       um_call
+       boolean_t                               um_call,
+       boolean_t                               secure
        )
 {
        struct ib_mr *ib_mr;
        int                          err;
        HCA_ENTER(HCA_DBG_MEMORY);
 
-       ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va, mr_access_flags, um_call);
+       ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va, mr_access_flags, um_call, secure);
        if (IS_ERR(ib_mr)) {
                err = PTR_ERR(ib_mr);
                HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr failed (%d)\n", err));
index d65328b..be4ce53 100644 (file)
@@ -996,7 +996,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
 
 static struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd, 
        void* FUNC_PTR64        vaddr, uint64_t length, uint64_t hca_va,
-       mthca_qp_access_t acc, boolean_t um_call)
+       mthca_qp_access_t acc, boolean_t um_call, boolean_t secure)
 {
        struct mthca_dev *dev = to_mdev(pd->device);
        struct mthca_mr *mr;
@@ -1082,7 +1082,7 @@ static struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd,
                goto err_mt_alloc;
 
        // secure memory
-       if (!pd->ucontext)
+       if (!pd->ucontext || !secure)
                goto done;
        __try {
                mr->secure_handle = MmSecureVirtualMemory ( vaddr, (SIZE_T)length,
@@ -1129,8 +1129,19 @@ int mthca_dereg_mr(struct ib_mr *mr)
        struct mthca_mr *mmr = to_mmr(mr);
        struct mthca_dev* dev = to_mdev(mr->device);
 
-       if (mmr->secure_handle)
-               MmUnsecureVirtualMemory ( mmr->secure_handle );
+       if (mmr->secure_handle) {
+               __try {
+                       MmUnsecureVirtualMemory( mmr->secure_handle );
+                       mmr->secure_handle = NULL;
+               }
+               __except (EXCEPTION_EXECUTE_HANDLER) {
+                       NTSTATUS Status = GetExceptionCode();
+                       UNUSED_PARAM_WOWPP(Status);
+                       HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,
+                               ("Exception 0x%x on MmUnsecureVirtualMemory(), addr %I64x, size %I64x, seg_num %d, nr_pages %d\n", 
+                               Status, mmr->iobuf.va, (u64)mmr->iobuf.size, mmr->iobuf.seg_num, mmr->iobuf.nr_pages ));
+               }
+       }
        mthca_free_mr(dev, mmr);
        if (mmr->iobuf_used)
                iobuf_deregister_with_cash(&mmr->iobuf);
index a855085..e943a68 100644 (file)
@@ -54,11 +54,7 @@ static void __free_srq(struct mthca_srq *srq)
        }
 
        if (srq->buf) {
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-               cl_free(srq->buf);
-#else
-               VirtualFree( srq->buf, 0, MEM_RELEASE);
-#endif
+               posix_memfree(srq->buf);
        }
 
        if (srq->wrid) 
@@ -158,11 +154,7 @@ __pre_create_srq (
        goto end;
 
 err_alloc_db:
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-               cl_free(srq->buf);
-#else
-               VirtualFree( srq->buf, 0, MEM_RELEASE);
-#endif
+       posix_memfree(srq->buf);
        cl_free(srq->wrid);
 err_alloc_buf:
        cl_spinlock_destroy(&srq->lock);
index f08d5e4..c0b4a92 100644 (file)
@@ -201,11 +201,7 @@ void mthca_free_db_tab(struct mthca_db_table *db_tab)
 
        for (i = 0; i < db_tab->npages; ++i)
                if (db_tab->page[i].db_rec)
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-                       cl_free(db_tab->page[i].db_rec);
-#else
-                       VirtualFree( db_tab->page[i].db_rec, 0, MEM_RELEASE);
-#endif
+                       posix_memfree( db_tab->page[i].db_rec);
 
        cl_free(db_tab);
 }
index c9468ee..a817d1f 100644 (file)
@@ -80,11 +80,7 @@ int mthca_free_pd(struct ibv_pd *ibv_pd)
                WaitForSingleObject( pd->ah_mutex, INFINITE );
                for (page = pd->ah_list; page; page = next_page) {
                        next_page = page->next;
-                       #ifdef NOT_USE_VIRTUAL_ALLOC    
-                               cl_free(page->buf);
-                       #else
-                               VirtualFree( page->buf, 0, MEM_RELEASE);
-                       #endif
+                       posix_memfree(page->buf);
                        cl_free(page);
                }
                ReleaseMutex( pd->ah_mutex );
@@ -103,7 +99,7 @@ struct ibv_cq *mthca_create_cq_pre(struct ibv_context *context, int *p_cqe,
        int                         ret;
 
        /* Sanity check CQ size before proceeding */
-       if (*p_cqe > 131072)
+       if ((unsigned)*p_cqe > 131072)
                goto exit;
 
        cq = cl_zalloc(sizeof *cq);
@@ -181,7 +177,7 @@ err_set_db:
                        cq->set_ci_db_index);
 
 err_unreg:
-       cl_free(cq->buf);
+       posix_memfree(cq->buf);
 
 err_memalign:
        cl_spinlock_destroy(&cq->lock);
@@ -233,12 +229,7 @@ int mthca_destroy_cq(struct ibv_cq *cq)
                              to_mcq(cq)->arm_db_index);
        }
 
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-       cl_free(to_mcq(cq)->buf);
-#else
-       VirtualFree( to_mcq(cq)->buf, 0, MEM_RELEASE);
-#endif
-
+       posix_memfree(to_mcq(cq)->buf);
        
        cl_spinlock_destroy(&((struct mthca_cq *)cq)->lock);
        cl_free(to_mcq(cq));
@@ -380,11 +371,7 @@ err_spinlock_rq:
        
 err_spinlock_sq:
        cl_free(qp->wrid);
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-       cl_free(qp->buf);
-#else
-       VirtualFree( qp->buf, 0, MEM_RELEASE);
-#endif
+       posix_memfree(qp->buf);
 
 err_nomem:
        cl_free(qp);
@@ -501,11 +488,7 @@ void mthca_destroy_qp_post(struct ibv_qp *qp, int ret)
                cl_spinlock_destroy(&((struct mthca_qp *)qp)->sq.lock);
                cl_spinlock_destroy(&((struct mthca_qp *)qp)->rq.lock);
 
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-               cl_free(to_mqp(qp)->buf);
-#else
-               VirtualFree( to_mqp(qp)->buf, 0, MEM_RELEASE);
-#endif
+               posix_memfree(to_mqp(qp)->buf);
                cl_free(to_mqp(qp)->wrid);
                cl_free(to_mqp(qp));
        }
index 9f204d2..e7bb2ab 100644 (file)
 
 extern size_t g_page_size;
 
-static inline int posix_memalign(void **memptr, size_t alignment, size_t size)
+static inline BOOLEAN is_power_of_2(uint32_t n)
 {
-#ifdef NOT_USE_VIRTUAL_ALLOC   
-       // sanity checks
-       if (alignment % sizeof(void*))
-               return EINVAL;
-       if (alignment < g_page_size) {
-               fprintf(stderr, "mthca: Fatal (posix_memalign): alignment too small - %d \n",  alignment );
-               return EINVAL;
-       }
-
-       // allocation
-       *memptr = cl_malloc(size);
-       if (*memptr) 
-               return 0;
-       else    
-               return ENOMEM;
-#else
-       *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE,  PAGE_READWRITE );
-       if (*memptr) 
-               return 0;
-       else    
-               return ENOMEM;
-#endif
+       return (!!n & !(n & (n-1))) ? TRUE : FALSE;
+}
+
+// Allocated memory is zeroed !
+static inline int posix_memalign(void **memptr, int alignment, int size)
+{
+       int aligned_size, desc_size = sizeof(int);
+       char *real_addr, *aligned_addr;
+
+       // sanity check: alignment should a power of 2 and more then 2
+       if ( alignment < desc_size || !is_power_of_2((uint32_t)alignment) )
+               return -EINVAL;
+
+       // calculate size, needed for aligned allocation
+       aligned_size = size + alignment + desc_size;
+
+       // allocate
+       real_addr = cl_zalloc(aligned_size);
+       if ( real_addr == NULL )
+               return -ENOMEM;
+
+       // calculate aligned address
+       aligned_addr = (char *)(((ULONG_PTR)(real_addr + alignment-1)) & ~(alignment - 1));
+       if ( aligned_addr < real_addr + desc_size )
+               aligned_addr += alignment;
+
+       // store the descriptor
+       *(int*)(aligned_addr - desc_size) = (int)(aligned_addr - real_addr);
+       
+       *memptr = aligned_addr;
+       return 0;
+}
+
+// there is no such POSIX function. Called so to be similar to the allocation one.
+static inline void posix_memfree(void *memptr)
+{
+       int *desc_addr = (int*)((char*)memptr - sizeof(int));
+       char *real_addr = (char*)memptr - *desc_addr;
+       cl_free(real_addr);
 }
 
 // ===========================================