1e686186a2091c5fe97daa1085e0c896eb80a59e
[mirror/scst/.git] / scst / src / scst_mem.c
1 /*
2  *  scst_sgv_pool.c
3  *  
4  *  Copyright (C) 2006 Vladislav Bolkhovitin <vst@vlnb.net>
5  *  
6  *  This program is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU General Public License
8  *  as published by the Free Software Foundation, version 2
9  *  of the License.
10  * 
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  *  GNU General Public License for more details.
15  */
16
17 #include <linux/init.h>
18 #include <linux/kernel.h>
19 #include <linux/errno.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/slab.h>
23 #include <linux/sched.h>
24 #include <linux/mm.h>
25 #include <asm/unistd.h>
26 #include <asm/string.h>
27
28 #ifdef SCST_HIGHMEM
29 #include <linux/highmem.h>
30 #endif
31
32 #include "scsi_tgt.h"
33 #include "scst_debug.h"
34 #include "scst_priv.h"
35 #include "scst_mem.h"
36
37 /*
38  * This implementation of sgv_pool is not the best, because the SLABs could get
39  * fragmented and too much undesirable memory could be kept, plus
40  * under memory pressure the cached objects could be purged too quickly.
41  * From other side it's simple, works well, and doesn't require any modifications
42  * of the existing SLAB code.
43  */
44
45 atomic_t sgv_big_total_alloc;
46
47 static int scst_check_clustering(struct scatterlist *sg, int cur, int hint)
48 {
49         int res = -1;
50         int i = hint;
51         unsigned long pfn_cur = page_to_pfn(sg[cur].page);
52         int len_cur = sg[cur].length;
53         unsigned long pfn_cur_next = pfn_cur + (len_cur >> PAGE_SHIFT);
54         int full_page_cur = (len_cur & (PAGE_SIZE - 1)) == 0;
55         unsigned long pfn, pfn_next, full_page;
56
57 #ifdef SCST_HIGHMEM
58         if (page >= highmem_start_page) {
59                 TRACE_MEM("%s", "HIGHMEM page allocated, no clustering")
60                 goto out;
61         }
62 #endif
63
64 #if 0
65         TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
66                 pfn_cur, pfn_cur_next, len_cur, full_page_cur);
67 #endif
68
69         /* check the hint first */
70         if (i >= 0) {
71                 pfn = page_to_pfn(sg[i].page);
72                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
73                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
74                 
75                 if ((pfn == pfn_cur_next) && full_page_cur)
76                         goto out_head;
77
78                 if ((pfn_next == pfn_cur) && full_page)
79                         goto out_tail;
80         }
81
82         /* ToDo: implement more intelligent search */
83         for (i = cur - 1; i >= 0; i--) {
84                 pfn = page_to_pfn(sg[i].page);
85                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
86                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
87                 
88                 if ((pfn == pfn_cur_next) && full_page_cur)
89                         goto out_head;
90
91                 if ((pfn_next == pfn_cur) && full_page)
92                         goto out_tail;
93         }
94
95 out:
96         return res;
97
98 out_tail:
99         TRACE_MEM("SG segment %d will be tail merged with segment %d", cur, i);
100         sg[i].length += len_cur;
101         memset(&sg[cur], 0, sizeof(sg[cur]));
102         res = i;
103         goto out;
104
105 out_head:
106         TRACE_MEM("SG segment %d will be head merged with segment %d", cur, i);
107         sg[i].page = sg[cur].page;
108         sg[i].length += len_cur;
109         memset(&sg[cur], 0, sizeof(sg[cur]));
110         res = i;
111         goto out;
112 }
113
114 static void sgv_free_sg(struct sgv_pool_obj *obj)
115 {
116         int i;
117
118         TRACE_MEM("obj=%p, sg_count=%d", obj, obj->sg_count);
119
120         for (i = 0; i < obj->sg_count; i++) {
121                 struct page *p = obj->entries[i].page;
122                 int len = obj->entries[i].length;
123                 int pages =
124                         (len >> PAGE_SHIFT) + ((len & ~PAGE_MASK) != 0);
125
126                 TRACE_MEM("page %lx, len %d, pages %d", 
127                         (unsigned long)p, len, pages);
128
129                 while (pages > 0) {
130                         int order = 0;
131
132 /* 
133  * __free_pages() doesn't like freeing pages with not that order with
134  * which they were allocated, so disable this small optimization.
135  */
136 #if 0
137                         if (len > 0) {
138                                 while(((1 << order) << PAGE_SHIFT) < len)
139                                         order++;
140                                 len = 0;
141                         }
142 #endif
143                         TRACE_MEM("free_pages(): order %d, page %lx",
144                                 order, (unsigned long)p);
145
146                         __free_pages(p, order);
147
148                         pages -= 1 << order;
149                         p += 1 << order;
150                 }
151         }
152         obj->sg_count = 0;
153 }
154
155 static int sgv_alloc_sg(struct sgv_pool_obj *obj, int pages,
156         unsigned long mask, int clustered)
157 {
158         int res = 0;
159         int pg, i, j;
160         int merged = -1;
161
162         TRACE_MEM("pages=%d, clustered=%d", pages, clustered);
163
164 #if 0
165         mask |= __GFP_COLD;
166 #endif
167 #ifdef SCST_STRICT_SECURITY
168         mask |= __GFP_ZERO;
169 #endif
170
171         obj->sg_count = 0;
172         for (pg = 0; pg < pages; pg++) {
173 #ifdef DEBUG_OOM
174                 if ((scst_random() % 10000) == 55)
175                         obj->entries[obj->sg_count].page = NULL;
176                 else
177 #endif
178                         obj->entries[obj->sg_count].page = alloc_pages(mask, 0);
179                 if (obj->entries[obj->sg_count].page == NULL) {
180                         TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of "
181                                 "sgv_pool_obj page failed");
182                         res = -ENOMEM;
183                         goto out_free;
184                 }
185                 obj->entries[obj->sg_count].length = PAGE_SIZE;
186                 if (clustered) {
187                         merged = scst_check_clustering(obj->entries, 
188                                 obj->sg_count, merged);
189                         if (merged == -1)
190                                 obj->sg_count++;
191                 } else
192                         obj->sg_count++;
193                 TRACE_MEM("pg=%d, merged=%d, sg_count=%d", pg, merged,
194                         obj->sg_count);
195         }
196
197         if (clustered) {
198                 pg = 0;
199                 for (i = 0; i < pages; i++) {
200                         int n = obj->entries[i].length >> PAGE_SHIFT;
201                         obj->trans_tbl[i].pg_count = pg;
202                         for (j = 0; j < n; j++)
203                                 obj->trans_tbl[pg++].sg_num = i+1;
204                 }
205         }
206
207 out:
208         TRACE_MEM("res=%d, sg_count=%d", res, obj->sg_count);
209         return res;
210
211 out_free:
212         sgv_free_sg(obj);
213         goto out;
214 }
215
216 struct sgv_pool_obj *sgv_pool_alloc_big(int size, int pages,
217         unsigned long mask, int *count, int clustered)
218 {
219         struct sgv_pool_obj *obj;
220         int elen, cnt = 0;
221
222         elen = sizeof(*obj) + pages * (sizeof(obj->entries[0]) +
223                 clustered ? sizeof(obj->trans_tbl[0]) : 0);
224         obj = kzalloc(elen, mask & ~(__GFP_HIGHMEM|GFP_DMA));
225         if (obj == NULL) {
226                 TRACE(TRACE_OUT_OF_MEM, "Allocation big of sgv_pool_obj "
227                                 "failed (elen=%d, size=%d)", elen, size);
228                 goto out;
229         }
230         obj->entries = (struct scatterlist*)&obj->trans_tbl[pages];
231
232         atomic_inc(&sgv_big_total_alloc);
233
234         if (sgv_alloc_sg(obj, pages, mask, clustered) != 0)
235                 goto out_free;
236         cnt = obj->sg_count;
237         if (size & ~PAGE_MASK) {
238                 obj->entries[cnt-1].length -= 
239                         PAGE_SIZE - (size & ~PAGE_MASK);
240         }
241         *count = cnt;
242
243 out:
244         TRACE_MEM("obj=%p (count=%d)", obj, cnt);
245         return obj;
246
247 out_free:
248         kfree(obj);
249         obj = NULL;
250         goto out;
251 }
252
253 void __sgv_pool_free_big(struct sgv_pool_obj *obj)
254 {
255         TRACE_MEM("obj=%p", obj);
256         sgv_free_sg(obj);
257         kfree(obj);
258 }
259
260 struct sgv_pool_obj *sgv_pool_alloc(struct sgv_pool *pool, int size,
261         unsigned long mask, int *count)
262 {
263         struct sgv_pool_obj *obj;
264         int order, pages, cnt, sg;
265
266         if (unlikely(size == 0))
267                 return NULL;
268
269         pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
270         order = get_order(size);
271
272         TRACE_MEM("size=%d, pages=%d, order=%d", size, pages, order);
273
274         if (order >= SGV_POOL_ELEMENTS) {
275                 obj = NULL;
276                 if (mask & GFP_ATOMIC)
277                         goto out;
278                 obj = sgv_pool_alloc_big(size, pages, mask, count,
279                                 pool->clustered);
280                 goto out;
281         }
282
283         obj = kmem_cache_alloc(pool->caches[order], 
284                         mask & ~(__GFP_HIGHMEM|GFP_DMA));
285         if (obj == NULL) {
286                 if (!(mask & GFP_ATOMIC)) {
287                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
288                                 "failed (size %d)", size);
289                 }
290                 goto out;
291         }
292
293         if (obj->owner_cache != pool->caches[order]) {
294                 int esz, epg, eorder;
295
296                 if (mask & GFP_ATOMIC)
297                         goto out_free;
298
299                 esz = (1 << order) * sizeof(obj->entries[0]);
300                 epg = (esz >> PAGE_SHIFT) + ((esz & ~PAGE_MASK) != 0);
301                 eorder = get_order(esz);
302                 TRACE_MEM("Brand new sgv_obj %p (esz=%d, epg=%d, eorder=%d)",
303                         obj, esz, epg, eorder);
304
305                 obj->eorder = eorder;
306                 obj->entries = (struct scatterlist*)__get_free_pages(
307                                         mask|__GFP_ZERO, eorder);
308                 if (obj->entries == NULL) {
309                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
310                                 "SG vector order %d failed", eorder);
311                         goto out_free;
312                 }
313
314                 if (sgv_alloc_sg(obj, (1 << order), mask, 
315                                         pool->clustered) != 0)
316                         goto out_free_entries;
317
318                 obj->owner_cache = pool->caches[order];
319         } else {
320                 TRACE_MEM("Cached sgv_obj %p", obj);
321                 atomic_inc(&pool->acc.hit_alloc);
322                 atomic_inc(&pool->cache_acc[order].hit_alloc);
323         }
324         atomic_inc(&pool->acc.total_alloc);
325         atomic_inc(&pool->cache_acc[order].total_alloc);
326         if (pool->clustered)
327                 cnt = obj->trans_tbl[pages-1].sg_num;
328         else
329                 cnt = pages;
330         sg = cnt-1;
331         obj->orig_sg = sg;
332         obj->orig_length = obj->entries[sg].length;
333         if (pool->clustered) {
334                 obj->entries[sg].length = 
335                         (pages - obj->trans_tbl[sg].pg_count) << PAGE_SHIFT;
336         }
337         if (size & ~PAGE_MASK) {
338                 obj->entries[sg].length -= PAGE_SIZE - (size & ~PAGE_MASK);
339         }
340         *count = cnt;
341
342         TRACE_MEM("sgv_obj=%p (size=%d, pages=%d, "
343                 "sg_count=%d, count=%d, last_len=%d)", obj, size, pages, 
344                 obj->sg_count, *count, obj->entries[obj->orig_sg].length);
345
346 out:
347         return obj;
348
349 out_free_entries:
350         free_pages((unsigned long)obj->entries, obj->eorder);
351         obj->entries = NULL;
352
353 out_free:
354         kmem_cache_free(pool->caches[order], obj);
355         obj = NULL;
356         goto out;
357 }
358
359 static void sgv_ctor(void *data,  kmem_cache_t *c, unsigned long flags)
360 {
361         struct sgv_pool_obj *obj = data;
362
363         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) !=
364              SLAB_CTOR_CONSTRUCTOR)
365                 return;
366
367         TRACE_MEM("Constructor for sgv_obj %p", obj);
368         memset(obj, 0, sizeof(*obj));
369 }
370
371 static void __sgv_dtor(void *data, int pages)
372 {
373         struct sgv_pool_obj *obj = data;
374         TRACE_MEM("Destructor for sgv_obj %p", obj);
375         if (obj->entries) {
376                 sgv_free_sg(obj);
377                 free_pages((unsigned long)obj->entries, obj->eorder);
378         }
379 }
380
381 #define SGV_DTOR_NAME(order) sgv_dtor##order
382 #define SGV_DTOR(order) static void sgv_dtor##order(void *d, kmem_cache_t *k, \
383                 unsigned long f) { __sgv_dtor(d, 1 << order); }
384
385 SGV_DTOR(0);
386 SGV_DTOR(1);
387 SGV_DTOR(2);
388 SGV_DTOR(3);
389 SGV_DTOR(4);
390 SGV_DTOR(5);
391 SGV_DTOR(6);
392 SGV_DTOR(7);
393 SGV_DTOR(8);
394 SGV_DTOR(9);
395 SGV_DTOR(10);
396
397 typedef void (*dtor_t)(void *, kmem_cache_t *, unsigned long);
398
399 dtor_t cache_dtors[SGV_POOL_ELEMENTS] =
400         { SGV_DTOR_NAME(0), SGV_DTOR_NAME(1), SGV_DTOR_NAME(2), SGV_DTOR_NAME(3),
401           SGV_DTOR_NAME(4), SGV_DTOR_NAME(5), SGV_DTOR_NAME(6), SGV_DTOR_NAME(7), 
402           SGV_DTOR_NAME(8), SGV_DTOR_NAME(9), SGV_DTOR_NAME(10) }; 
403
404 int sgv_pool_init(struct sgv_pool *pool, const char *name, int clustered)
405 {
406         int res = -ENOMEM;
407         int i;
408         struct sgv_pool_obj *obj;
409
410         TRACE_ENTRY();
411
412         memset(pool, 0, sizeof(*pool));
413         pool->clustered = clustered;
414
415         TRACE_MEM("sizeof(*obj)=%d, clustered=%d, sizeof(obj->trans_tbl[0])=%d",
416                 sizeof(*obj), clustered, sizeof(obj->trans_tbl[0]));
417
418         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
419                 int size, pages;
420
421                 atomic_set(&pool->cache_acc[i].total_alloc, 0);
422                 atomic_set(&pool->cache_acc[i].hit_alloc, 0);
423
424                 pages = 1 << i;
425                 size = sizeof(*obj) + pages *
426                         (clustered ? sizeof(obj->trans_tbl[0]) : 0);
427                 TRACE_MEM("pages=%d, size=%d", pages, size);
428
429                 scnprintf(pool->cache_names[i], sizeof(pool->cache_names[i]),
430                         "%s-%luK", name, (PAGE_SIZE >> 10) << i);
431                 pool->caches[i] = kmem_cache_create(pool->cache_names[i], 
432                         size, 0, SCST_SLAB_FLAGS, sgv_ctor, cache_dtors[i]);
433                 if (pool->caches[i] == NULL) {
434                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool cache "
435                                 "%s(%d) failed", name, i);
436                         goto out_free;
437                 }
438         }
439
440         res = 0;
441
442 out:
443         TRACE_EXIT_RES(res);
444         return res;
445
446 out_free:
447         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
448                 if (pool->caches[i]) {
449                         kmem_cache_destroy(pool->caches[i]);
450                         pool->caches[i] = NULL;
451                 } else
452                         break;
453         }
454         goto out;
455 }
456
457 void sgv_pool_deinit(struct sgv_pool *pool)
458 {
459         int i;
460
461         TRACE_ENTRY();
462
463         for(i = 0; i < SGV_POOL_ELEMENTS; i++) {
464                 if (pool->caches[i])
465                         kmem_cache_destroy(pool->caches[i]);
466                 pool->caches[i] = NULL;
467         }
468
469         TRACE_EXIT();
470 }
471
472 struct sgv_pool *sgv_pool_create(const char *name, int clustered)
473 {
474         struct sgv_pool *pool;
475         int rc;
476
477         TRACE_ENTRY();
478
479         pool = kmalloc(sizeof(*pool), GFP_KERNEL);
480         if (pool == NULL) {
481                 TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of sgv_pool failed");
482                 goto out;
483         }
484
485         rc = sgv_pool_init(pool, name, clustered);
486         if (rc != 0)
487                 goto out_free;
488
489 out:
490         TRACE_EXIT_RES(pool != NULL);
491         return pool;
492
493 out_free:
494         kfree(pool);
495         pool = NULL;
496         goto out;
497 }
498
499 void sgv_pool_destroy(struct sgv_pool *pool)
500 {
501         TRACE_ENTRY();
502
503         sgv_pool_deinit(pool);
504         kfree(pool);
505
506         TRACE_EXIT();
507 }
508
509 int scst_sgv_pools_init(struct scst_sgv_pools *pools)
510 {
511         int res;
512
513         TRACE_ENTRY();
514
515         atomic_set(&sgv_big_total_alloc, 0);
516
517         res = sgv_pool_init(&pools->norm, "sgv", 0);
518         if (res != 0)
519                 goto out_free_clust;
520
521         res = sgv_pool_init(&pools->norm_clust, "sgv-clust", 1);
522         if (res != 0)
523                 goto out;
524
525         res = sgv_pool_init(&pools->dma, "sgv-dma", 0);
526         if (res != 0)
527                 goto out_free_norm;
528
529 #ifdef SCST_HIGHMEM
530         res = sgv_pool_init(&pools->highmem, "sgv-high", 0);
531         if (res != 0)
532                 goto out_free_dma;
533 #endif
534
535 out:
536         TRACE_EXIT_RES(res);
537         return res;
538
539 #ifdef SCST_HIGHMEM
540 out_free_dma:
541         sgv_pool_deinit(&pools->dma);
542 #endif
543
544 out_free_norm:
545         sgv_pool_deinit(&pools->norm);
546
547 out_free_clust:
548         sgv_pool_deinit(&pools->norm_clust);
549         goto out;
550 }
551
552 void scst_sgv_pools_deinit(struct scst_sgv_pools *pools)
553 {
554         TRACE_ENTRY();
555
556 #ifdef SCST_HIGHMEM
557         sgv_pool_deinit(&pools->highmem);
558 #endif
559         sgv_pool_deinit(&pools->dma);
560         sgv_pool_deinit(&pools->norm);
561         sgv_pool_deinit(&pools->norm_clust);
562
563         TRACE_EXIT();
564         return;
565 }