Various changes and fixes
[mirror/scst/.git] / scst / src / scst_mem.c
1 /*
2  *  scst_sgv_pool.c
3  *  
4  *  Copyright (C) 2006 Vladislav Bolkhovitin <vst@vlnb.net>
5  *  
6  *  This program is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU General Public License
8  *  as published by the Free Software Foundation, version 2
9  *  of the License.
10  * 
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  *  GNU General Public License for more details.
15  */
16
17 #include <linux/init.h>
18 #include <linux/kernel.h>
19 #include <linux/errno.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/slab.h>
23 #include <linux/sched.h>
24 #include <linux/mm.h>
25 #include <asm/unistd.h>
26 #include <asm/string.h>
27
28 #ifdef SCST_HIGHMEM
29 #include <linux/highmem.h>
30 #endif
31
32 #include "scsi_tgt.h"
33 #include "scst_debug.h"
34 #include "scst_priv.h"
35 #include "scst_mem.h"
36
37 /*
38  * This implementation of sgv_pool is not the best, because the SLABs could get
39  * fragmented and too much undesirable memory could be kept, plus
40  * under memory pressure the cached objects could be purged too quickly.
41  * From other side it's simple, works well, and doesn't require any modifications
42  * of the existing SLAB code.
43  */
44
45 atomic_t sgv_big_total_alloc;
46 atomic_t sgv_other_total_alloc;
47
48 static int scst_check_clustering(struct scatterlist *sg, int cur, int hint)
49 {
50         int res = -1;
51         int i = hint;
52         unsigned long pfn_cur = page_to_pfn(sg[cur].page);
53         int len_cur = sg[cur].length;
54         unsigned long pfn_cur_next = pfn_cur + (len_cur >> PAGE_SHIFT);
55         int full_page_cur = (len_cur & (PAGE_SIZE - 1)) == 0;
56         unsigned long pfn, pfn_next, full_page;
57
58 #ifdef SCST_HIGHMEM
59         if (page >= highmem_start_page) {
60                 TRACE_MEM("%s", "HIGHMEM page allocated, no clustering")
61                 goto out;
62         }
63 #endif
64
65 #if 0
66         TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
67                 pfn_cur, pfn_cur_next, len_cur, full_page_cur);
68 #endif
69
70         /* check the hint first */
71         if (i >= 0) {
72                 pfn = page_to_pfn(sg[i].page);
73                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
74                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
75                 
76                 if ((pfn == pfn_cur_next) && full_page_cur)
77                         goto out_head;
78
79                 if ((pfn_next == pfn_cur) && full_page)
80                         goto out_tail;
81         }
82
83         /* ToDo: implement more intelligent search */
84         for (i = cur - 1; i >= 0; i--) {
85                 pfn = page_to_pfn(sg[i].page);
86                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
87                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
88                 
89                 if ((pfn == pfn_cur_next) && full_page_cur)
90                         goto out_head;
91
92                 if ((pfn_next == pfn_cur) && full_page)
93                         goto out_tail;
94         }
95
96 out:
97         return res;
98
99 out_tail:
100         TRACE_MEM("SG segment %d will be tail merged with segment %d", cur, i);
101         sg[i].length += len_cur;
102         memset(&sg[cur], 0, sizeof(sg[cur]));
103         res = i;
104         goto out;
105
106 out_head:
107         TRACE_MEM("SG segment %d will be head merged with segment %d", cur, i);
108         sg[i].page = sg[cur].page;
109         sg[i].length += len_cur;
110         memset(&sg[cur], 0, sizeof(sg[cur]));
111         res = i;
112         goto out;
113 }
114
115 static void scst_free_sg_entries(struct scatterlist *sg, int sg_count)
116 {
117         int i;
118
119         TRACE_MEM("sg=%p, sg_count=%d", sg, sg_count);
120
121         for (i = 0; i < sg_count; i++) {
122                 struct page *p = sg[i].page;
123                 int len = sg[i].length;
124                 int pages =
125                         (len >> PAGE_SHIFT) + ((len & ~PAGE_MASK) != 0);
126
127                 TRACE_MEM("page %lx, len %d, pages %d", 
128                         (unsigned long)p, len, pages);
129
130                 while (pages > 0) {
131                         int order = 0;
132
133 /* 
134  * __free_pages() doesn't like freeing pages with not that order with
135  * which they were allocated, so disable this small optimization.
136  */
137 #if 0
138                         if (len > 0) {
139                                 while(((1 << order) << PAGE_SHIFT) < len)
140                                         order++;
141                                 len = 0;
142                         }
143 #endif
144                         TRACE_MEM("free_pages(): order %d, page %lx",
145                                 order, (unsigned long)p);
146
147                         __free_pages(p, order);
148
149                         pages -= 1 << order;
150                         p += 1 << order;
151                 }
152         }
153 }
154
155 static int scst_alloc_sg_entries(struct scatterlist *sg, int pages,
156         unsigned long gfp_mask, int clustered, struct trans_tbl_ent *trans_tbl)
157 {
158         int sg_count = 0;
159         int pg, i, j;
160         int merged = -1;
161
162         TRACE_MEM("pages=%d, clustered=%d", pages, clustered);
163
164 #if 0
165         mask |= __GFP_COLD;
166 #endif
167 #ifdef SCST_STRICT_SECURITY
168         mask |= __GFP_ZERO;
169 #endif
170
171         for (pg = 0; pg < pages; pg++) {
172 #ifdef DEBUG_OOM
173                 if ((scst_random() % 10000) == 55)
174                         sg[sg_count].page = NULL;
175                 else
176 #endif
177                         sg[sg_count].page = alloc_pages(gfp_mask, 0);
178                 if (sg[sg_count].page == NULL) {
179                         TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of "
180                                 "sg page failed");
181                         goto out_no_mem;
182                 }
183                 sg[sg_count].length = PAGE_SIZE;
184                 if (clustered) {
185                         merged = scst_check_clustering(sg, sg_count, merged);
186                         if (merged == -1)
187                                 sg_count++;
188                 } else
189                         sg_count++;
190                 TRACE_MEM("pg=%d, merged=%d, sg_count=%d", pg, merged,
191                         sg_count);
192         }
193
194         if (clustered && trans_tbl) {
195                 pg = 0;
196                 for (i = 0; i < pages; i++) {
197                         int n = sg[i].length >> PAGE_SHIFT;
198                         trans_tbl[i].pg_count = pg;
199                         for (j = 0; j < n; j++)
200                                 trans_tbl[pg++].sg_num = i+1;
201                 }
202         }
203
204 out:
205         TRACE_MEM("sg_count=%d", sg_count);
206         return sg_count;
207
208 out_no_mem:
209         scst_free_sg_entries(sg, sg_count);
210         sg_count = 0;
211         goto out;
212 }
213
214 struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, int size,
215         unsigned long gfp_mask, int *count, struct sgv_pool_obj **sgv)
216 {
217         struct sgv_pool_obj *obj;
218         int order, pages, cnt, sg;
219         struct scatterlist *res = NULL;
220
221         if (unlikely(size == 0))
222                 return NULL;
223
224         pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
225         order = get_order(size);
226
227         TRACE_MEM("size=%d, pages=%d, order=%d", size, pages, order);
228
229         if (order >= SGV_POOL_ELEMENTS) {
230                 obj = NULL;
231                 if (gfp_mask & GFP_ATOMIC)
232                         goto out;
233                 atomic_inc(&sgv_big_total_alloc);
234                 atomic_dec(&sgv_other_total_alloc);
235                 res = scst_alloc(size, gfp_mask, pool->clustered, count);
236                 goto out;
237         }
238
239         obj = kmem_cache_alloc(pool->caches[order], 
240                         gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
241         if (obj == NULL) {
242                 if (!(gfp_mask & GFP_ATOMIC)) {
243                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
244                                 "failed (size %d)", size);
245                 }
246                 goto out;
247         }
248
249         if (obj->owner_cache != pool->caches[order]) {
250                 int esz, epg, eorder;
251
252                 if (gfp_mask & GFP_ATOMIC)
253                         goto out_free;
254
255                 esz = (1 << order) * sizeof(obj->entries[0]);
256                 epg = (esz >> PAGE_SHIFT) + ((esz & ~PAGE_MASK) != 0);
257                 eorder = get_order(esz);
258                 TRACE_MEM("Brand new sgv_obj %p (esz=%d, epg=%d, eorder=%d)",
259                         obj, esz, epg, eorder);
260
261                 obj->eorder = eorder;
262                 obj->entries = (struct scatterlist*)__get_free_pages(
263                                         gfp_mask|__GFP_ZERO, eorder);
264                 if (obj->entries == NULL) {
265                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
266                                 "SG vector order %d failed", eorder);
267                         goto out_free;
268                 }
269
270                 obj->sg_count = scst_alloc_sg_entries(obj->entries, (1 << order),
271                         gfp_mask, pool->clustered, obj->trans_tbl);
272                 if (obj->sg_count <= 0)
273                         goto out_free_entries;
274
275                 obj->owner_cache = pool->caches[order];
276         } else {
277                 TRACE_MEM("Cached sgv_obj %p", obj);
278                 atomic_inc(&pool->acc.hit_alloc);
279                 atomic_inc(&pool->cache_acc[order].hit_alloc);
280         }
281         atomic_inc(&pool->acc.total_alloc);
282         atomic_inc(&pool->cache_acc[order].total_alloc);
283         if (pool->clustered)
284                 cnt = obj->trans_tbl[pages-1].sg_num;
285         else
286                 cnt = pages;
287         sg = cnt-1;
288         obj->orig_sg = sg;
289         obj->orig_length = obj->entries[sg].length;
290         if (pool->clustered) {
291                 obj->entries[sg].length = 
292                         (pages - obj->trans_tbl[sg].pg_count) << PAGE_SHIFT;
293         }
294         if (size & ~PAGE_MASK) {
295                 obj->entries[sg].length -= PAGE_SIZE - (size & ~PAGE_MASK);
296         }
297         *count = cnt;
298
299         TRACE_MEM("sgv_obj=%p (size=%d, pages=%d, "
300                 "sg_count=%d, count=%d, last_len=%d)", obj, size, pages, 
301                 obj->sg_count, *count, obj->entries[obj->orig_sg].length);
302
303         res = obj->entries;
304         *sgv = obj;
305
306 out:
307         return res;
308
309 out_free_entries:
310         free_pages((unsigned long)obj->entries, obj->eorder);
311         obj->entries = NULL;
312
313 out_free:
314         kmem_cache_free(pool->caches[order], obj);
315         obj = NULL;
316         goto out;
317 }
318
319 static void sgv_ctor(void *data,  kmem_cache_t *c, unsigned long flags)
320 {
321         struct sgv_pool_obj *obj = data;
322
323         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) !=
324              SLAB_CTOR_CONSTRUCTOR)
325                 return;
326
327         TRACE_MEM("Constructor for sgv_obj %p", obj);
328         memset(obj, 0, sizeof(*obj));
329 }
330
331 static void __sgv_dtor(void *data, int pages)
332 {
333         struct sgv_pool_obj *obj = data;
334         TRACE_MEM("Destructor for sgv_obj %p", obj);
335         if (obj->entries) {
336                 scst_free_sg_entries(obj->entries, obj->sg_count);
337                 free_pages((unsigned long)obj->entries, obj->eorder);
338         }
339 }
340
341 #define SGV_DTOR_NAME(order) sgv_dtor##order
342 #define SGV_DTOR(order) static void sgv_dtor##order(void *d, kmem_cache_t *k, \
343                 unsigned long f) { __sgv_dtor(d, 1 << order); }
344
345 SGV_DTOR(0);
346 SGV_DTOR(1);
347 SGV_DTOR(2);
348 SGV_DTOR(3);
349 SGV_DTOR(4);
350 SGV_DTOR(5);
351 SGV_DTOR(6);
352 SGV_DTOR(7);
353 SGV_DTOR(8);
354 SGV_DTOR(9);
355 SGV_DTOR(10);
356
357 typedef void (*dtor_t)(void *, kmem_cache_t *, unsigned long);
358
359 dtor_t cache_dtors[SGV_POOL_ELEMENTS] =
360         { SGV_DTOR_NAME(0), SGV_DTOR_NAME(1), SGV_DTOR_NAME(2), SGV_DTOR_NAME(3),
361           SGV_DTOR_NAME(4), SGV_DTOR_NAME(5), SGV_DTOR_NAME(6), SGV_DTOR_NAME(7), 
362           SGV_DTOR_NAME(8), SGV_DTOR_NAME(9), SGV_DTOR_NAME(10) }; 
363
364 struct scatterlist *scst_alloc(int size, unsigned long gfp_mask,
365         int use_clustering, int *count)
366 {
367         struct scatterlist *res;
368         int pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
369
370         TRACE_ENTRY();
371
372         atomic_inc(&sgv_other_total_alloc);
373
374         res = kzalloc(pages*sizeof(*res), gfp_mask);
375         if (res == NULL)
376                 goto out;
377
378         *count = scst_alloc_sg_entries(res, pages, gfp_mask, use_clustering,
379                         NULL);
380         if (*count <= 0)
381                 goto out_free;
382
383 out:
384         TRACE_MEM("Alloced sg %p (count %d)", res, *count);
385
386         TRACE_EXIT_HRES((int)res);
387         return res;
388
389 out_free:
390         kfree(res);
391         res = NULL;
392         goto out;
393 }
394
395 void scst_free(struct scatterlist *sg, int count)
396 {
397         TRACE_MEM("Freeing sg=%p", sg);
398         scst_free_sg_entries(sg, count);
399         kfree(sg);
400 }
401
402 int sgv_pool_init(struct sgv_pool *pool, const char *name, int clustered)
403 {
404         int res = -ENOMEM;
405         int i;
406         struct sgv_pool_obj *obj;
407
408         TRACE_ENTRY();
409
410         memset(pool, 0, sizeof(*pool));
411         pool->clustered = clustered;
412
413         TRACE_MEM("sizeof(*obj)=%d, clustered=%d, sizeof(obj->trans_tbl[0])=%d",
414                 sizeof(*obj), clustered, sizeof(obj->trans_tbl[0]));
415
416         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
417                 int size, pages;
418
419                 atomic_set(&pool->cache_acc[i].total_alloc, 0);
420                 atomic_set(&pool->cache_acc[i].hit_alloc, 0);
421
422                 pages = 1 << i;
423                 size = sizeof(*obj) + pages *
424                         (clustered ? sizeof(obj->trans_tbl[0]) : 0);
425                 TRACE_MEM("pages=%d, size=%d", pages, size);
426
427                 scnprintf(pool->cache_names[i], sizeof(pool->cache_names[i]),
428                         "%s-%luK", name, (PAGE_SIZE >> 10) << i);
429                 pool->caches[i] = kmem_cache_create(pool->cache_names[i], 
430                         size, 0, SCST_SLAB_FLAGS, sgv_ctor, cache_dtors[i]);
431                 if (pool->caches[i] == NULL) {
432                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool cache "
433                                 "%s(%d) failed", name, i);
434                         goto out_free;
435                 }
436         }
437
438         res = 0;
439
440 out:
441         TRACE_EXIT_RES(res);
442         return res;
443
444 out_free:
445         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
446                 if (pool->caches[i]) {
447                         kmem_cache_destroy(pool->caches[i]);
448                         pool->caches[i] = NULL;
449                 } else
450                         break;
451         }
452         goto out;
453 }
454
455 void sgv_pool_deinit(struct sgv_pool *pool)
456 {
457         int i;
458
459         TRACE_ENTRY();
460
461         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
462                 if (pool->caches[i])
463                         kmem_cache_destroy(pool->caches[i]);
464                 pool->caches[i] = NULL;
465         }
466
467         TRACE_EXIT();
468 }
469
470 struct sgv_pool *sgv_pool_create(const char *name, int clustered)
471 {
472         struct sgv_pool *pool;
473         int rc;
474
475         TRACE_ENTRY();
476
477         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
478         if (pool == NULL) {
479                 TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of sgv_pool failed");
480                 goto out;
481         }
482
483         rc = sgv_pool_init(pool, name, clustered);
484         if (rc != 0)
485                 goto out_free;
486
487 out:
488         TRACE_EXIT_RES(pool != NULL);
489         return pool;
490
491 out_free:
492         kfree(pool);
493         pool = NULL;
494         goto out;
495 }
496
497 void sgv_pool_destroy(struct sgv_pool *pool)
498 {
499         TRACE_ENTRY();
500
501         sgv_pool_deinit(pool);
502         kfree(pool);
503
504         TRACE_EXIT();
505 }
506
507 int scst_sgv_pools_init(struct scst_sgv_pools *pools)
508 {
509         int res;
510
511         TRACE_ENTRY();
512
513         atomic_set(&sgv_big_total_alloc, 0);
514         atomic_set(&sgv_other_total_alloc, 0);
515
516         res = sgv_pool_init(&pools->norm, "sgv", 0);
517         if (res != 0)
518                 goto out_free_clust;
519
520         res = sgv_pool_init(&pools->norm_clust, "sgv-clust", 1);
521         if (res != 0)
522                 goto out;
523
524         res = sgv_pool_init(&pools->dma, "sgv-dma", 0);
525         if (res != 0)
526                 goto out_free_norm;
527
528 #ifdef SCST_HIGHMEM
529         res = sgv_pool_init(&pools->highmem, "sgv-high", 0);
530         if (res != 0)
531                 goto out_free_dma;
532 #endif
533
534 out:
535         TRACE_EXIT_RES(res);
536         return res;
537
538 #ifdef SCST_HIGHMEM
539 out_free_dma:
540         sgv_pool_deinit(&pools->dma);
541 #endif
542
543 out_free_norm:
544         sgv_pool_deinit(&pools->norm);
545
546 out_free_clust:
547         sgv_pool_deinit(&pools->norm_clust);
548         goto out;
549 }
550
551 void scst_sgv_pools_deinit(struct scst_sgv_pools *pools)
552 {
553         TRACE_ENTRY();
554
555 #ifdef SCST_HIGHMEM
556         sgv_pool_deinit(&pools->highmem);
557 #endif
558         sgv_pool_deinit(&pools->dma);
559         sgv_pool_deinit(&pools->norm);
560         sgv_pool_deinit(&pools->norm_clust);
561
562         TRACE_EXIT();
563         return;
564 }