Update to work on 2.6.20 + minor fix
[mirror/scst/.git] / scst / src / scst_mem.c
1 /*
2  *  scst_sgv_pool.c
3  *  
4  *  Copyright (C) 2006 Vladislav Bolkhovitin <vst@vlnb.net>
5  *  
6  *  This program is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU General Public License
8  *  as published by the Free Software Foundation, version 2
9  *  of the License.
10  * 
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  *  GNU General Public License for more details.
15  */
16
17 #include <linux/init.h>
18 #include <linux/kernel.h>
19 #include <linux/errno.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/slab.h>
23 #include <linux/sched.h>
24 #include <linux/mm.h>
25 #include <asm/unistd.h>
26 #include <asm/string.h>
27
28 #ifdef SCST_HIGHMEM
29 #include <linux/highmem.h>
30 #endif
31
32 #include "scsi_tgt.h"
33 #include "scst_priv.h"
34 #include "scst_mem.h"
35
36 /*
37  * This implementation of sgv_pool is not the best, because the SLABs could get
38  * fragmented and too much undesirable memory could be kept, plus
39  * under memory pressure the cached objects could be purged too quickly.
40  * From other side it's simple, works well, and doesn't require any modifications
41  * of the existing SLAB code.
42  */
43
44 atomic_t sgv_big_total_alloc;
45 atomic_t sgv_other_total_alloc;
46
47 static int scst_check_clustering(struct scatterlist *sg, int cur, int hint)
48 {
49         int res = -1;
50         int i = hint;
51         unsigned long pfn_cur = page_to_pfn(sg[cur].page);
52         int len_cur = sg[cur].length;
53         unsigned long pfn_cur_next = pfn_cur + (len_cur >> PAGE_SHIFT);
54         int full_page_cur = (len_cur & (PAGE_SIZE - 1)) == 0;
55         unsigned long pfn, pfn_next, full_page;
56
57 #ifdef SCST_HIGHMEM
58         if (page >= highmem_start_page) {
59                 TRACE_MEM("%s", "HIGHMEM page allocated, no clustering")
60                 goto out;
61         }
62 #endif
63
64 #if 0
65         TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
66                 pfn_cur, pfn_cur_next, len_cur, full_page_cur);
67 #endif
68
69         /* check the hint first */
70         if (i >= 0) {
71                 pfn = page_to_pfn(sg[i].page);
72                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
73                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
74                 
75                 if ((pfn == pfn_cur_next) && full_page_cur)
76                         goto out_head;
77
78                 if ((pfn_next == pfn_cur) && full_page)
79                         goto out_tail;
80         }
81
82         /* ToDo: implement more intelligent search */
83         for (i = cur - 1; i >= 0; i--) {
84                 pfn = page_to_pfn(sg[i].page);
85                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
86                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
87                 
88                 if ((pfn == pfn_cur_next) && full_page_cur)
89                         goto out_head;
90
91                 if ((pfn_next == pfn_cur) && full_page)
92                         goto out_tail;
93         }
94
95 out:
96         return res;
97
98 out_tail:
99         TRACE_MEM("SG segment %d will be tail merged with segment %d", cur, i);
100         sg[i].length += len_cur;
101         memset(&sg[cur], 0, sizeof(sg[cur]));
102         res = i;
103         goto out;
104
105 out_head:
106         TRACE_MEM("SG segment %d will be head merged with segment %d", cur, i);
107         sg[i].page = sg[cur].page;
108         sg[i].length += len_cur;
109         memset(&sg[cur], 0, sizeof(sg[cur]));
110         res = i;
111         goto out;
112 }
113
114 static void scst_free_sg_entries(struct scatterlist *sg, int sg_count)
115 {
116         int i;
117
118         TRACE_MEM("sg=%p, sg_count=%d", sg, sg_count);
119
120         for (i = 0; i < sg_count; i++) {
121                 struct page *p = sg[i].page;
122                 int len = sg[i].length;
123                 int pages =
124                         (len >> PAGE_SHIFT) + ((len & ~PAGE_MASK) != 0);
125
126                 TRACE_MEM("page %lx, len %d, pages %d", 
127                         (unsigned long)p, len, pages);
128
129                 while (pages > 0) {
130                         int order = 0;
131
132 /* 
133  * __free_pages() doesn't like freeing pages with not that order with
134  * which they were allocated, so disable this small optimization.
135  */
136 #if 0
137                         if (len > 0) {
138                                 while(((1 << order) << PAGE_SHIFT) < len)
139                                         order++;
140                                 len = 0;
141                         }
142 #endif
143                         TRACE_MEM("free_pages(): order %d, page %lx",
144                                 order, (unsigned long)p);
145
146                         __free_pages(p, order);
147
148                         pages -= 1 << order;
149                         p += 1 << order;
150                 }
151         }
152 }
153
154 static int scst_alloc_sg_entries(struct scatterlist *sg, int pages,
155         unsigned long gfp_mask, int clustered, struct trans_tbl_ent *trans_tbl)
156 {
157         int sg_count = 0;
158         int pg, i, j;
159         int merged = -1;
160
161         TRACE_MEM("pages=%d, clustered=%d", pages, clustered);
162
163 #if 0
164         mask |= __GFP_COLD;
165 #endif
166 #ifdef SCST_STRICT_SECURITY
167         mask |= __GFP_ZERO;
168 #endif
169
170         for (pg = 0; pg < pages; pg++) {
171 #ifdef DEBUG_OOM
172                 if ((scst_random() % 10000) == 55)
173                         sg[sg_count].page = NULL;
174                 else
175 #endif
176                         sg[sg_count].page = alloc_pages(gfp_mask, 0);
177                 if (sg[sg_count].page == NULL) {
178                         TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of "
179                                 "sg page failed");
180                         goto out_no_mem;
181                 }
182                 sg[sg_count].length = PAGE_SIZE;
183                 if (clustered) {
184                         merged = scst_check_clustering(sg, sg_count, merged);
185                         if (merged == -1)
186                                 sg_count++;
187                 } else
188                         sg_count++;
189                 TRACE_MEM("pg=%d, merged=%d, sg_count=%d", pg, merged,
190                         sg_count);
191         }
192
193         if (clustered && trans_tbl) {
194                 pg = 0;
195                 for (i = 0; i < pages; i++) {
196                         int n = sg[i].length >> PAGE_SHIFT;
197                         trans_tbl[i].pg_count = pg;
198                         for (j = 0; j < n; j++)
199                                 trans_tbl[pg++].sg_num = i+1;
200                 }
201         }
202
203 out:
204         TRACE_MEM("sg_count=%d", sg_count);
205         return sg_count;
206
207 out_no_mem:
208         scst_free_sg_entries(sg, sg_count);
209         sg_count = 0;
210         goto out;
211 }
212
213 struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, int size,
214         unsigned long gfp_mask, int atomic, int *count,
215         struct sgv_pool_obj **sgv)
216 {
217         struct sgv_pool_obj *obj;
218         int order, pages, cnt, sg;
219         struct scatterlist *res = NULL;
220
221         if (unlikely(size == 0))
222                 return NULL;
223
224         pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
225         order = get_order(size);
226
227         TRACE_MEM("size=%d, pages=%d, order=%d", size, pages, order);
228
229         if (order >= SGV_POOL_ELEMENTS) {
230                 obj = NULL;
231                 if (atomic)
232                         goto out;
233                 atomic_inc(&sgv_big_total_alloc);
234                 atomic_dec(&sgv_other_total_alloc);
235                 res = scst_alloc(size, gfp_mask, pool->clustered, count);
236                 goto out;
237         }
238
239         obj = kmem_cache_alloc(pool->caches[order], 
240                         gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
241         if (obj == NULL) {
242                 if (!atomic) {
243                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
244                                 "failed (size %d)", size);
245                 }
246                 goto out;
247         }
248
249         if (obj->owner_cache != pool->caches[order]) {
250                 int esz, epg, eorder;
251
252                 if (atomic)
253                         goto out_free;
254
255                 esz = (1 << order) * sizeof(obj->entries[0]);
256                 epg = (esz >> PAGE_SHIFT) + ((esz & ~PAGE_MASK) != 0);
257                 eorder = get_order(esz);
258                 TRACE_MEM("Brand new sgv_obj %p (esz=%d, epg=%d, eorder=%d)",
259                         obj, esz, epg, eorder);
260
261                 obj->eorder = eorder;
262                 obj->entries = (struct scatterlist*)__get_free_pages(
263                                         gfp_mask|__GFP_ZERO, eorder);
264                 if (obj->entries == NULL) {
265                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
266                                 "SG vector order %d failed", eorder);
267                         goto out_free;
268                 }
269
270                 obj->sg_count = scst_alloc_sg_entries(obj->entries, (1 << order),
271                         gfp_mask, pool->clustered, obj->trans_tbl);
272                 if (obj->sg_count <= 0)
273                         goto out_free_entries;
274
275                 obj->owner_cache = pool->caches[order];
276         } else {
277                 TRACE_MEM("Cached sgv_obj %p", obj);
278                 atomic_inc(&pool->acc.hit_alloc);
279                 atomic_inc(&pool->cache_acc[order].hit_alloc);
280         }
281         atomic_inc(&pool->acc.total_alloc);
282         atomic_inc(&pool->cache_acc[order].total_alloc);
283         if (pool->clustered)
284                 cnt = obj->trans_tbl[pages-1].sg_num;
285         else
286                 cnt = pages;
287         sg = cnt-1;
288         obj->orig_sg = sg;
289         obj->orig_length = obj->entries[sg].length;
290         if (pool->clustered) {
291                 obj->entries[sg].length = 
292                         (pages - obj->trans_tbl[sg].pg_count) << PAGE_SHIFT;
293         }
294         if (size & ~PAGE_MASK) {
295                 obj->entries[sg].length -= PAGE_SIZE - (size & ~PAGE_MASK);
296         }
297         *count = cnt;
298
299         TRACE_MEM("sgv_obj=%p (size=%d, pages=%d, "
300                 "sg_count=%d, count=%d, last_len=%d)", obj, size, pages, 
301                 obj->sg_count, *count, obj->entries[obj->orig_sg].length);
302
303         res = obj->entries;
304         *sgv = obj;
305
306 out:
307         return res;
308
309 out_free_entries:
310         free_pages((unsigned long)obj->entries, obj->eorder);
311         obj->entries = NULL;
312
313 out_free:
314         kmem_cache_free(pool->caches[order], obj);
315         obj = NULL;
316         goto out;
317 }
318
319 static void sgv_ctor(void *data,  struct kmem_cache *c, unsigned long flags)
320 {
321         struct sgv_pool_obj *obj = data;
322
323         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) !=
324              SLAB_CTOR_CONSTRUCTOR)
325                 return;
326
327         TRACE_MEM("Constructor for sgv_obj %p", obj);
328         memset(obj, 0, sizeof(*obj));
329 }
330
331 static void __sgv_dtor(void *data, int pages)
332 {
333         struct sgv_pool_obj *obj = data;
334         TRACE_MEM("Destructor for sgv_obj %p", obj);
335         if (obj->entries) {
336                 scst_free_sg_entries(obj->entries, obj->sg_count);
337                 free_pages((unsigned long)obj->entries, obj->eorder);
338         }
339 }
340
341 #define SGV_DTOR_NAME(order) sgv_dtor##order
342 #define SGV_DTOR(order) static void sgv_dtor##order(void *d, struct kmem_cache *k, \
343                 unsigned long f) { __sgv_dtor(d, 1 << order); }
344
345 SGV_DTOR(0);
346 SGV_DTOR(1);
347 SGV_DTOR(2);
348 SGV_DTOR(3);
349 SGV_DTOR(4);
350 SGV_DTOR(5);
351 SGV_DTOR(6);
352 SGV_DTOR(7);
353 SGV_DTOR(8);
354 SGV_DTOR(9);
355 SGV_DTOR(10);
356
357 typedef void (*dtor_t)(void *, struct kmem_cache *, unsigned long);
358
359 dtor_t cache_dtors[SGV_POOL_ELEMENTS] =
360         { SGV_DTOR_NAME(0), SGV_DTOR_NAME(1), SGV_DTOR_NAME(2), SGV_DTOR_NAME(3),
361           SGV_DTOR_NAME(4), SGV_DTOR_NAME(5), SGV_DTOR_NAME(6), SGV_DTOR_NAME(7), 
362           SGV_DTOR_NAME(8), SGV_DTOR_NAME(9), SGV_DTOR_NAME(10) }; 
363
364 struct scatterlist *scst_alloc(int size, unsigned long gfp_mask,
365         int use_clustering, int *count)
366 {
367         struct scatterlist *res;
368         int pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
369
370         TRACE_ENTRY();
371
372         atomic_inc(&sgv_other_total_alloc);
373
374         res = kzalloc(pages*sizeof(*res), gfp_mask);
375         if (res == NULL)
376                 goto out;
377
378         *count = scst_alloc_sg_entries(res, pages, gfp_mask, use_clustering,
379                         NULL);
380         if (*count <= 0)
381                 goto out_free;
382
383 out:
384         TRACE_MEM("Alloced sg %p (count %d)", res, *count);
385
386         TRACE_EXIT_HRES((int)res);
387         return res;
388
389 out_free:
390         kfree(res);
391         res = NULL;
392         goto out;
393 }
394
395 void scst_free(struct scatterlist *sg, int count)
396 {
397         TRACE_MEM("Freeing sg=%p", sg);
398         scst_free_sg_entries(sg, count);
399         kfree(sg);
400 }
401
402 int sgv_pool_init(struct sgv_pool *pool, const char *name, int clustered)
403 {
404         int res = -ENOMEM;
405         int i;
406         struct sgv_pool_obj *obj;
407
408         TRACE_ENTRY();
409
410         memset(pool, 0, sizeof(*pool));
411         pool->clustered = clustered;
412
413         TRACE_MEM("sizeof(*obj)=%zd, clustered=%d, sizeof(obj->trans_tbl[0])=%zd",
414                 sizeof(*obj), clustered, sizeof(obj->trans_tbl[0]));
415
416         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
417                 int size, pages;
418
419                 atomic_set(&pool->cache_acc[i].total_alloc, 0);
420                 atomic_set(&pool->cache_acc[i].hit_alloc, 0);
421
422                 pages = 1 << i;
423                 size = sizeof(*obj) + pages *
424                         (clustered ? sizeof(obj->trans_tbl[0]) : 0);
425                 TRACE_MEM("pages=%d, size=%d", pages, size);
426
427                 scnprintf(pool->cache_names[i], sizeof(pool->cache_names[i]),
428                         "%s-%luK", name, (PAGE_SIZE >> 10) << i);
429                 pool->caches[i] = kmem_cache_create(pool->cache_names[i], 
430                         size, 0, SCST_SLAB_FLAGS, sgv_ctor, cache_dtors[i]);
431                 if (pool->caches[i] == NULL) {
432                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool cache "
433                                 "%s(%d) failed", name, i);
434                         goto out_free;
435                 }
436         }
437
438         res = 0;
439
440 out:
441         TRACE_EXIT_RES(res);
442         return res;
443
444 out_free:
445         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
446                 if (pool->caches[i]) {
447                         kmem_cache_destroy(pool->caches[i]);
448                         pool->caches[i] = NULL;
449                 } else
450                         break;
451         }
452         goto out;
453 }
454
455 void sgv_pool_deinit(struct sgv_pool *pool)
456 {
457         int i;
458
459         TRACE_ENTRY();
460
461         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
462                 if (pool->caches[i])
463                         kmem_cache_destroy(pool->caches[i]);
464                 pool->caches[i] = NULL;
465         }
466
467         TRACE_EXIT();
468 }
469
470 struct sgv_pool *sgv_pool_create(const char *name, int clustered)
471 {
472         struct sgv_pool *pool;
473         int rc;
474
475         TRACE_ENTRY();
476
477         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
478         if (pool == NULL) {
479                 TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of sgv_pool failed");
480                 goto out;
481         }
482
483         rc = sgv_pool_init(pool, name, clustered);
484         if (rc != 0)
485                 goto out_free;
486
487 out:
488         TRACE_EXIT_RES(pool != NULL);
489         return pool;
490
491 out_free:
492         kfree(pool);
493         pool = NULL;
494         goto out;
495 }
496
497 void sgv_pool_destroy(struct sgv_pool *pool)
498 {
499         TRACE_ENTRY();
500
501         sgv_pool_deinit(pool);
502         kfree(pool);
503
504         TRACE_EXIT();
505 }
506
507 int scst_sgv_pools_init(struct scst_sgv_pools *pools)
508 {
509         int res;
510
511         TRACE_ENTRY();
512
513         atomic_set(&sgv_big_total_alloc, 0);
514         atomic_set(&sgv_other_total_alloc, 0);
515
516         res = sgv_pool_init(&pools->norm, "sgv", 0);
517         if (res != 0)
518                 goto out;
519
520         res = sgv_pool_init(&pools->norm_clust, "sgv-clust", 1);
521         if (res != 0)
522                 goto out_free_clust;
523
524         res = sgv_pool_init(&pools->dma, "sgv-dma", 0);
525         if (res != 0)
526                 goto out_free_norm;
527
528 #ifdef SCST_HIGHMEM
529         res = sgv_pool_init(&pools->highmem, "sgv-high", 0);
530         if (res != 0)
531                 goto out_free_dma;
532 #endif
533
534 out:
535         TRACE_EXIT_RES(res);
536         return res;
537
538 #ifdef SCST_HIGHMEM
539 out_free_dma:
540         sgv_pool_deinit(&pools->dma);
541 #endif
542
543 out_free_norm:
544         sgv_pool_deinit(&pools->norm);
545
546 out_free_clust:
547         sgv_pool_deinit(&pools->norm_clust);
548         goto out;
549 }
550
551 void scst_sgv_pools_deinit(struct scst_sgv_pools *pools)
552 {
553         TRACE_ENTRY();
554
555 #ifdef SCST_HIGHMEM
556         sgv_pool_deinit(&pools->highmem);
557 #endif
558         sgv_pool_deinit(&pools->dma);
559         sgv_pool_deinit(&pools->norm);
560         sgv_pool_deinit(&pools->norm_clust);
561
562         TRACE_EXIT();
563         return;
564 }