b52a8f4b2f345cf24993a5f17e1284a5bceb6b72
[mirror/scst/.git] / scst / src / scst_mem.c
1 /*
2  *  scst_mem.c
3  *
4  *  Copyright (C) 2006 - 2009 Vladislav Bolkhovitin <vst@vlnb.net>
5  *  Copyright (C) 2007 - 2009 ID7 Ltd.
6  *
7  *  This program is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU General Public License
9  *  as published by the Free Software Foundation, version 2
10  *  of the License.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  */
17
18 #include <linux/init.h>
19 #include <linux/kernel.h>
20 #include <linux/errno.h>
21 #include <linux/list.h>
22 #include <linux/spinlock.h>
23 #include <linux/slab.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/unistd.h>
27 #include <linux/string.h>
28
29 #include "scst.h"
30 #include "scst_priv.h"
31 #include "scst_mem.h"
32
33 #define PURGE_INTERVAL          (60 * HZ)
34 #define PURGE_TIME_AFTER        PURGE_INTERVAL
35 #define SHRINK_TIME_AFTER       (1 * HZ)
36
37 /* Max pages freed from a pool per shrinking iteration */
38 #define MAX_PAGES_PER_POOL      50
39
40 static struct sgv_pool sgv_norm_clust_pool, sgv_norm_pool, sgv_dma_pool;
41
42 static atomic_t sgv_pages_total = ATOMIC_INIT(0);
43
44 /* Both read-only */
45 static int sgv_hi_wmk;
46 static int sgv_lo_wmk;
47
48 static int sgv_max_local_order, sgv_max_trans_order;
49
50 static DEFINE_SPINLOCK(sgv_pools_lock); /* inner lock for sgv_pool_lock! */
51 static DEFINE_MUTEX(sgv_pools_mutex);
52
53 /* Both protected by sgv_pools_lock */
54 static struct sgv_pool *sgv_cur_purge_pool;
55 static LIST_HEAD(sgv_active_pools_list);
56
57 static atomic_t sgv_releases_on_hiwmk = ATOMIC_INIT(0);
58 static atomic_t sgv_releases_on_hiwmk_failed = ATOMIC_INIT(0);
59
60 static atomic_t sgv_other_total_alloc = ATOMIC_INIT(0);
61
62 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
63 static struct shrinker *sgv_shrinker;
64 #else
65 static struct shrinker sgv_shrinker;
66 #endif
67
68 /*
69  * Protected by sgv_pools_mutex AND sgv_pools_lock for writes,
70  * either one for reads.
71  */
72 static LIST_HEAD(sgv_pools_list);
73
74 static void sgv_pool_get(struct sgv_pool *pool);
75 static void sgv_pool_put(struct sgv_pool *pool);
76
77 static inline bool sgv_pool_clustered(const struct sgv_pool *pool)
78 {
79         return pool->clustering_type != sgv_no_clustering;
80 }
81
82 void scst_sgv_pool_use_norm(struct scst_tgt_dev *tgt_dev)
83 {
84         tgt_dev->gfp_mask = __GFP_NOWARN;
85         tgt_dev->pool = &sgv_norm_pool;
86         clear_bit(SCST_TGT_DEV_CLUST_POOL, &tgt_dev->tgt_dev_flags);
87 }
88
89 void scst_sgv_pool_use_norm_clust(struct scst_tgt_dev *tgt_dev)
90 {
91         TRACE_MEM("%s", "Use clustering");
92         tgt_dev->gfp_mask = __GFP_NOWARN;
93         tgt_dev->pool = &sgv_norm_clust_pool;
94         set_bit(SCST_TGT_DEV_CLUST_POOL, &tgt_dev->tgt_dev_flags);
95 }
96
97 void scst_sgv_pool_use_dma(struct scst_tgt_dev *tgt_dev)
98 {
99         TRACE_MEM("%s", "Use ISA DMA memory");
100         tgt_dev->gfp_mask = __GFP_NOWARN | GFP_DMA;
101         tgt_dev->pool = &sgv_dma_pool;
102         clear_bit(SCST_TGT_DEV_CLUST_POOL, &tgt_dev->tgt_dev_flags);
103 }
104
105 /* Must be no locks */
106 static void sgv_dtor_and_free(struct sgv_pool_obj *obj)
107 {
108         struct sgv_pool *pool = obj->owner_pool;
109
110         TRACE_MEM("Destroying sgv obj %p", obj);
111
112         if (obj->sg_count != 0) {
113                 pool->alloc_fns.free_pages_fn(obj->sg_entries,
114                         obj->sg_count, obj->allocator_priv);
115         }
116         if (obj->sg_entries != obj->sg_entries_data) {
117                 if (obj->trans_tbl !=
118                     (struct trans_tbl_ent *)obj->sg_entries_data) {
119                         /* kfree() handles NULL parameter */
120                         kfree(obj->trans_tbl);
121                         obj->trans_tbl = NULL;
122                 }
123                 kfree(obj->sg_entries);
124         }
125
126         kmem_cache_free(pool->caches[obj->order_or_pages], obj);
127         return;
128 }
129
130 /* Might be called under sgv_pool_lock */
131 static inline void sgv_del_from_active(struct sgv_pool *pool)
132 {
133         struct list_head *next;
134
135         TRACE_MEM("Deleting sgv pool %p from the active list", pool);
136
137         spin_lock_bh(&sgv_pools_lock);
138
139         next = pool->sgv_active_pools_list_entry.next;
140         list_del(&pool->sgv_active_pools_list_entry);
141
142         if (sgv_cur_purge_pool == pool) {
143                 TRACE_MEM("Sgv pool %p is sgv cur purge pool", pool);
144
145                 if (next == &sgv_active_pools_list)
146                         next = next->next;
147
148                 if (next == &sgv_active_pools_list) {
149                         sgv_cur_purge_pool = NULL;
150                         TRACE_MEM("%s", "Sgv active list now empty");
151                 } else {
152                         sgv_cur_purge_pool = list_entry(next, typeof(*pool),
153                                 sgv_active_pools_list_entry);
154                         TRACE_MEM("New sgv cur purge pool %p",
155                                 sgv_cur_purge_pool);
156                 }
157         }
158
159         spin_unlock_bh(&sgv_pools_lock);
160         return;
161 }
162
163 /* Must be called under sgv_pool_lock held */
164 static void sgv_dec_cached_entries(struct sgv_pool *pool, int pages)
165 {
166         pool->cached_entries--;
167         pool->cached_pages -= pages;
168
169         if (pool->cached_entries == 0)
170                 sgv_del_from_active(pool);
171
172         return;
173 }
174
175 /* Must be called under sgv_pool_lock held */
176 static void __sgv_purge_from_cache(struct sgv_pool_obj *obj)
177 {
178         int pages = 1 << obj->order_or_pages;
179         struct sgv_pool *pool = obj->owner_pool;
180
181         TRACE_MEM("Purging sgv obj %p from pool %p (new cached_entries %d)",
182                 obj, pool, pool->cached_entries-1);
183
184         list_del(&obj->sorted_recycling_list_entry);
185         list_del(&obj->recycling_list_entry);
186
187         pool->inactive_cached_pages -= pages;
188         sgv_dec_cached_entries(pool, pages);
189
190         atomic_sub(pages, &sgv_pages_total);
191
192         return;
193 }
194
195 /* Must be called under sgv_pool_lock held */
196 static bool sgv_purge_from_cache(struct sgv_pool_obj *obj, int after,
197         unsigned long cur_time)
198 {
199         EXTRACHECKS_BUG_ON(after < 0);
200
201         TRACE_MEM("Checking if sgv obj %p should be purged (cur time %ld, "
202                 "obj time %ld, time to purge %ld)", obj, cur_time,
203                 obj->time_stamp, obj->time_stamp + after);
204
205         if (time_after_eq(cur_time, (obj->time_stamp + after))) {
206                 __sgv_purge_from_cache(obj);
207                 return true;
208         }
209         return false;
210 }
211
212 /* No locks */
213 static int sgv_shrink_pool(struct sgv_pool *pool, int nr, int after,
214         unsigned long cur_time)
215 {
216         int freed = 0;
217
218         TRACE_ENTRY();
219
220         TRACE_MEM("Trying to shrink pool %p (nr %d, after %d)", pool, nr,
221                 after);
222
223         spin_lock_bh(&pool->sgv_pool_lock);
224
225         while (!list_empty(&pool->sorted_recycling_list) &&
226                         (atomic_read(&sgv_pages_total) > sgv_lo_wmk)) {
227                 struct sgv_pool_obj *obj = list_entry(
228                         pool->sorted_recycling_list.next,
229                         struct sgv_pool_obj, sorted_recycling_list_entry);
230
231                 if (sgv_purge_from_cache(obj, after, cur_time)) {
232                         int pages = 1 << obj->order_or_pages;
233
234                         freed += pages;
235                         nr -= pages;
236
237                         TRACE_MEM("%d pages purged from pool %p (nr left %d, "
238                                 "total freed %d)", pages, pool, nr, freed);
239
240                         spin_unlock_bh(&pool->sgv_pool_lock);
241                         sgv_dtor_and_free(obj);
242                         spin_lock_bh(&pool->sgv_pool_lock);
243                 } else
244                         break;
245
246                 if ((nr <= 0) || (freed >= MAX_PAGES_PER_POOL)) {
247                         if (freed >= MAX_PAGES_PER_POOL)
248                                 TRACE_MEM("%d pages purged from pool %p, "
249                                         "leaving", freed, pool);
250                         break;
251                 }
252         }
253
254         spin_unlock_bh(&pool->sgv_pool_lock);
255
256         TRACE_EXIT_RES(nr);
257         return nr;
258 }
259
260 /* No locks */
261 static int __sgv_shrink(int nr, int after)
262 {
263         struct sgv_pool *pool;
264         unsigned long cur_time = jiffies;
265         int prev_nr = nr;
266         bool circle = false;
267
268         TRACE_ENTRY();
269
270         TRACE_MEM("Trying to shrink %d pages from all sgv pools (after %d)",
271                 nr, after);
272
273         while (nr > 0) {
274                 struct list_head *next;
275
276                 spin_lock_bh(&sgv_pools_lock);
277
278                 pool = sgv_cur_purge_pool;
279                 if (pool == NULL) {
280                         if (list_empty(&sgv_active_pools_list)) {
281                                 TRACE_MEM("%s", "Active pools list is empty");
282                                 goto out_unlock;
283                         }
284
285                         pool = list_entry(sgv_active_pools_list.next,
286                                         typeof(*pool),
287                                         sgv_active_pools_list_entry);
288                 }
289                 sgv_pool_get(pool);
290
291                 next = pool->sgv_active_pools_list_entry.next;
292                 if (next == &sgv_active_pools_list) {
293                         if (circle && (prev_nr == nr)) {
294                                 TRACE_MEM("Full circle done, but no progress, "
295                                         "leaving (nr %d)", nr);
296                                 goto out_unlock_put;
297                         }
298                         circle = true;
299                         prev_nr = nr;
300
301                         next = next->next;
302                 }
303
304                 sgv_cur_purge_pool = list_entry(next, typeof(*pool),
305                         sgv_active_pools_list_entry);
306                 TRACE_MEM("New cur purge pool %p", sgv_cur_purge_pool);
307
308                 spin_unlock_bh(&sgv_pools_lock);
309
310                 nr = sgv_shrink_pool(pool, nr, after, cur_time);
311
312                 sgv_pool_put(pool);
313         }
314
315 out:
316         TRACE_EXIT_RES(nr);
317         return nr;
318
319 out_unlock:
320         spin_unlock_bh(&sgv_pools_lock);
321         goto out;
322
323 out_unlock_put:
324         spin_unlock_bh(&sgv_pools_lock);
325         sgv_pool_put(pool);
326         goto out;
327 }
328
329 static int sgv_shrink(int nr, gfp_t gfpm)
330 {
331         TRACE_ENTRY();
332
333         if (nr > 0)
334                 nr = __sgv_shrink(nr, SHRINK_TIME_AFTER);
335         else {
336                 struct sgv_pool *pool;
337                 int inactive_pages = 0;
338
339                 spin_lock_bh(&sgv_pools_lock);
340                 list_for_each_entry(pool, &sgv_active_pools_list,
341                                 sgv_active_pools_list_entry) {
342                         inactive_pages += pool->inactive_cached_pages;
343                 }
344                 spin_unlock_bh(&sgv_pools_lock);
345
346                 nr = max((int)0, inactive_pages - sgv_lo_wmk);
347         }
348
349         TRACE_MEM("Returning %d", nr);
350
351         TRACE_EXIT_RES(nr);
352         return nr;
353 }
354
355 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
356 static void sgv_purge_work_fn(void *p)
357 #else
358 static void sgv_purge_work_fn(struct delayed_work *work)
359 #endif
360 {
361         unsigned long cur_time = jiffies;
362 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
363         struct sgv_pool *pool = (struct sgv_pool *)p;
364 #else
365         struct sgv_pool *pool = container_of(work, struct sgv_pool,
366                                         sgv_purge_work);
367 #endif
368
369         TRACE_ENTRY();
370
371         TRACE_MEM("Purge work for pool %p", pool);
372
373         spin_lock_bh(&pool->sgv_pool_lock);
374
375         pool->purge_work_scheduled = false;
376
377         while (!list_empty(&pool->sorted_recycling_list)) {
378                 struct sgv_pool_obj *obj = list_entry(
379                         pool->sorted_recycling_list.next,
380                         struct sgv_pool_obj, sorted_recycling_list_entry);
381
382                 if (sgv_purge_from_cache(obj, PURGE_TIME_AFTER, cur_time)) {
383                         spin_unlock_bh(&pool->sgv_pool_lock);
384                         sgv_dtor_and_free(obj);
385                         spin_lock_bh(&pool->sgv_pool_lock);
386                 } else {
387                         /*
388                          * Let's reschedule it for full period to not get here
389                          * too often. In the worst case we have shrinker
390                          * to reclaim buffers quickier.
391                          */
392                         TRACE_MEM("Rescheduling purge work for pool %p (delay "
393                                 "%d HZ/%d sec)", pool, PURGE_INTERVAL,
394                                 PURGE_INTERVAL/HZ);
395                         schedule_delayed_work(&pool->sgv_purge_work,
396                                 PURGE_INTERVAL);
397                         pool->purge_work_scheduled = true;
398                         break;
399                 }
400         }
401
402         spin_unlock_bh(&pool->sgv_pool_lock);
403
404         TRACE_MEM("Leaving purge work for pool %p", pool);
405
406         TRACE_EXIT();
407         return;
408 }
409
410 static int sgv_check_full_clustering(struct scatterlist *sg, int cur, int hint)
411 {
412         int res = -1;
413         int i = hint;
414         unsigned long pfn_cur = page_to_pfn(sg_page(&sg[cur]));
415         int len_cur = sg[cur].length;
416         unsigned long pfn_cur_next = pfn_cur + (len_cur >> PAGE_SHIFT);
417         int full_page_cur = (len_cur & (PAGE_SIZE - 1)) == 0;
418         unsigned long pfn, pfn_next;
419         bool full_page;
420
421 #if 0
422         TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
423                 pfn_cur, pfn_cur_next, len_cur, full_page_cur);
424 #endif
425
426         /* check the hint first */
427         if (i >= 0) {
428                 pfn = page_to_pfn(sg_page(&sg[i]));
429                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
430                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
431
432                 if ((pfn == pfn_cur_next) && full_page_cur)
433                         goto out_head;
434
435                 if ((pfn_next == pfn_cur) && full_page)
436                         goto out_tail;
437         }
438
439         /* ToDo: implement more intelligent search */
440         for (i = cur - 1; i >= 0; i--) {
441                 pfn = page_to_pfn(sg_page(&sg[i]));
442                 pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
443                 full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;
444
445                 if ((pfn == pfn_cur_next) && full_page_cur)
446                         goto out_head;
447
448                 if ((pfn_next == pfn_cur) && full_page)
449                         goto out_tail;
450         }
451
452 out:
453         return res;
454
455 out_tail:
456         TRACE_MEM("SG segment %d will be tail merged with segment %d", cur, i);
457         sg[i].length += len_cur;
458         sg_clear(&sg[cur]);
459         res = i;
460         goto out;
461
462 out_head:
463         TRACE_MEM("SG segment %d will be head merged with segment %d", cur, i);
464         sg_assign_page(&sg[i], sg_page(&sg[cur]));
465         sg[i].length += len_cur;
466         sg_clear(&sg[cur]);
467         res = i;
468         goto out;
469 }
470
471 static int sgv_check_tail_clustering(struct scatterlist *sg, int cur, int hint)
472 {
473         int res = -1;
474         unsigned long pfn_cur = page_to_pfn(sg_page(&sg[cur]));
475         int len_cur = sg[cur].length;
476         int prev;
477         unsigned long pfn_prev;
478         bool full_page;
479
480 #ifdef SCST_HIGHMEM
481         if (page >= highmem_start_page) {
482                 TRACE_MEM("%s", "HIGHMEM page allocated, no clustering")
483                 goto out;
484         }
485 #endif
486
487 #if 0
488         TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
489                 pfn_cur, pfn_cur_next, len_cur, full_page_cur);
490 #endif
491
492         if (cur == 0)
493                 goto out;
494
495         prev = cur - 1;
496         pfn_prev = page_to_pfn(sg_page(&sg[prev])) +
497                         (sg[prev].length >> PAGE_SHIFT);
498         full_page = (sg[prev].length & (PAGE_SIZE - 1)) == 0;
499
500         if ((pfn_prev == pfn_cur) && full_page) {
501                 TRACE_MEM("SG segment %d will be tail merged with segment %d",
502                         cur, prev);
503                 sg[prev].length += len_cur;
504                 sg_clear(&sg[cur]);
505                 res = prev;
506         }
507
508 out:
509         return res;
510 }
511
512 static void sgv_free_sys_sg_entries(struct scatterlist *sg, int sg_count,
513         void *priv)
514 {
515         int i;
516
517         TRACE_MEM("sg=%p, sg_count=%d", sg, sg_count);
518
519         for (i = 0; i < sg_count; i++) {
520                 struct page *p = sg_page(&sg[i]);
521                 int len = sg[i].length;
522                 int pages =
523                         (len >> PAGE_SHIFT) + ((len & ~PAGE_MASK) != 0);
524
525                 TRACE_MEM("page %lx, len %d, pages %d",
526                         (unsigned long)p, len, pages);
527
528                 while (pages > 0) {
529                         int order = 0;
530
531 /*
532  * __free_pages() doesn't like freeing pages with not that order with
533  * which they were allocated, so disable this small optimization.
534  */
535 #if 0
536                         if (len > 0) {
537                                 while (((1 << order) << PAGE_SHIFT) < len)
538                                         order++;
539                                 len = 0;
540                         }
541 #endif
542                         TRACE_MEM("free_pages(): order %d, page %lx",
543                                 order, (unsigned long)p);
544
545                         __free_pages(p, order);
546
547                         pages -= 1 << order;
548                         p += 1 << order;
549                 }
550         }
551 }
552
553 static struct page *sgv_alloc_sys_pages(struct scatterlist *sg,
554         gfp_t gfp_mask, void *priv)
555 {
556         struct page *page = alloc_pages(gfp_mask, 0);
557
558         sg_set_page(sg, page, PAGE_SIZE, 0);
559         TRACE_MEM("page=%p, sg=%p, priv=%p", page, sg, priv);
560         if (page == NULL) {
561                 TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of "
562                         "sg page failed");
563         }
564         return page;
565 }
566
567 static int sgv_alloc_sg_entries(struct scatterlist *sg, int pages,
568         gfp_t gfp_mask, enum sgv_clustering_types clustering_type,
569         struct trans_tbl_ent *trans_tbl,
570         const struct sgv_pool_alloc_fns *alloc_fns, void *priv)
571 {
572         int sg_count = 0;
573         int pg, i, j;
574         int merged = -1;
575
576         TRACE_MEM("pages=%d, clustering_type=%d", pages, clustering_type);
577
578 #if 0
579         gfp_mask |= __GFP_COLD;
580 #endif
581 #ifdef CONFIG_SCST_STRICT_SECURITY
582         gfp_mask |= __GFP_ZERO;
583 #endif
584
585         for (pg = 0; pg < pages; pg++) {
586                 void *rc;
587 #ifdef CONFIG_SCST_DEBUG_OOM
588                 if (((gfp_mask & __GFP_NOFAIL) != __GFP_NOFAIL) &&
589                     ((scst_random() % 10000) == 55))
590                         rc = NULL;
591                 else
592 #endif
593                         rc = alloc_fns->alloc_pages_fn(&sg[sg_count], gfp_mask,
594                                 priv);
595                 if (rc == NULL)
596                         goto out_no_mem;
597
598                 /*
599                  * This code allows compiler to see full body of the clustering
600                  * functions and gives it a chance to generate better code.
601                  * At least, the resulting code is smaller, comparing to
602                  * calling them using a function pointer.
603                  */
604                 if (clustering_type == sgv_full_clustering)
605                         merged = sgv_check_full_clustering(sg, sg_count, merged);
606                 else if (clustering_type == sgv_tail_clustering)
607                         merged = sgv_check_tail_clustering(sg, sg_count, merged);
608                 else
609                         merged = -1;
610
611                 if (merged == -1)
612                         sg_count++;
613
614                 TRACE_MEM("pg=%d, merged=%d, sg_count=%d", pg, merged,
615                         sg_count);
616         }
617
618         if ((clustering_type != sgv_no_clustering) && (trans_tbl != NULL)) {
619                 pg = 0;
620                 for (i = 0; i < pages; i++) {
621                         int n = (sg[i].length >> PAGE_SHIFT) +
622                                 ((sg[i].length & ~PAGE_MASK) != 0);
623                         trans_tbl[i].pg_count = pg;
624                         for (j = 0; j < n; j++)
625                                 trans_tbl[pg++].sg_num = i+1;
626                         TRACE_MEM("i=%d, n=%d, pg_count=%d", i, n,
627                                 trans_tbl[i].pg_count);
628                 }
629         }
630
631 out:
632         TRACE_MEM("sg_count=%d", sg_count);
633         return sg_count;
634
635 out_no_mem:
636         alloc_fns->free_pages_fn(sg, sg_count, priv);
637         sg_count = 0;
638         goto out;
639 }
640
641 static int sgv_alloc_arrays(struct sgv_pool_obj *obj,
642         int pages_to_alloc, int order, gfp_t gfp_mask)
643 {
644         int sz, tsz = 0;
645         int res = 0;
646
647         TRACE_ENTRY();
648
649         sz = pages_to_alloc * sizeof(obj->sg_entries[0]);
650
651         obj->sg_entries = kmalloc(sz, gfp_mask);
652         if (unlikely(obj->sg_entries == NULL)) {
653                 TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
654                         "SG vector failed (size %d)", sz);
655                 res = -ENOMEM;
656                 goto out;
657         }
658
659         sg_init_table(obj->sg_entries, pages_to_alloc);
660
661         if (sgv_pool_clustered(obj->owner_pool)) {
662                 if (order <= sgv_max_trans_order) {
663                         obj->trans_tbl =
664                                 (struct trans_tbl_ent *)obj->sg_entries_data;
665                         /*
666                          * No need to clear trans_tbl, if needed, it will be
667                          * fully rewritten in sgv_alloc_sg_entries()
668                          */
669                 } else {
670                         tsz = pages_to_alloc * sizeof(obj->trans_tbl[0]);
671                         obj->trans_tbl = kzalloc(tsz, gfp_mask);
672                         if (unlikely(obj->trans_tbl == NULL)) {
673                                 TRACE(TRACE_OUT_OF_MEM, "Allocation of "
674                                         "trans_tbl failed (size %d)", tsz);
675                                 res = -ENOMEM;
676                                 goto out_free;
677                         }
678                 }
679         }
680
681         TRACE_MEM("pages_to_alloc %d, order %d, sz %d, tsz %d, obj %p, "
682                 "sg_entries %p, trans_tbl %p", pages_to_alloc, order,
683                 sz, tsz, obj, obj->sg_entries, obj->trans_tbl);
684
685 out:
686         TRACE_EXIT_RES(res);
687         return res;
688
689 out_free:
690         kfree(obj->sg_entries);
691         obj->sg_entries = NULL;
692         goto out;
693 }
694
695 static struct sgv_pool_obj *sgv_get_obj(struct sgv_pool *pool, int order,
696         gfp_t gfp_mask)
697 {
698         struct sgv_pool_obj *obj;
699         int pages = 1 << order;
700
701         spin_lock_bh(&pool->sgv_pool_lock);
702         if (likely(!list_empty(&pool->recycling_lists[order]))) {
703                 obj = list_entry(pool->recycling_lists[order].next,
704                          struct sgv_pool_obj, recycling_list_entry);
705
706                 list_del(&obj->sorted_recycling_list_entry);
707                 list_del(&obj->recycling_list_entry);
708
709                 pool->inactive_cached_pages -= pages;
710
711                 spin_unlock_bh(&pool->sgv_pool_lock);
712
713                 EXTRACHECKS_BUG_ON(obj->order_or_pages != order);
714                 goto out;
715         }
716
717         if (pool->cached_entries == 0) {
718                 TRACE_MEM("Adding pool %p to the active list", pool);
719                 spin_lock_bh(&sgv_pools_lock);
720                 list_add_tail(&pool->sgv_active_pools_list_entry,
721                         &sgv_active_pools_list);
722                 spin_unlock_bh(&sgv_pools_lock);
723         }
724
725         pool->cached_entries++;
726         pool->cached_pages += pages;
727
728         spin_unlock_bh(&pool->sgv_pool_lock);
729
730         TRACE_MEM("New cached entries %d (pool %p)", pool->cached_entries,
731                 pool);
732
733         obj = kmem_cache_alloc(pool->caches[order],
734                 gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
735         if (likely(obj)) {
736                 memset(obj, 0, sizeof(*obj));
737                 obj->order_or_pages = order;
738                 obj->owner_pool = pool;
739         } else {
740                 spin_lock_bh(&pool->sgv_pool_lock);
741                 sgv_dec_cached_entries(pool, pages);
742                 spin_unlock_bh(&pool->sgv_pool_lock);
743         }
744
745 out:
746         return obj;
747 }
748
749 static void sgv_put_obj(struct sgv_pool_obj *obj)
750 {
751         struct sgv_pool *pool = obj->owner_pool;
752         struct list_head *entry;
753         struct list_head *list = &pool->recycling_lists[obj->order_or_pages];
754         int pages = 1 << obj->order_or_pages;
755
756         EXTRACHECKS_BUG_ON(obj->order_or_pages < 0);
757
758         spin_lock_bh(&pool->sgv_pool_lock);
759
760         TRACE_MEM("sgv %p, order %d, sg_count %d", obj, obj->order_or_pages,
761                 obj->sg_count);
762
763         if (sgv_pool_clustered(pool)) {
764                 /* Make objects with less entries more preferred */
765                 __list_for_each(entry, list) {
766                         struct sgv_pool_obj *tmp = list_entry(entry,
767                                 struct sgv_pool_obj, recycling_list_entry);
768
769                         TRACE_MEM("tmp %p, order %d, sg_count %d", tmp,
770                                 tmp->order_or_pages, tmp->sg_count);
771
772                         if (obj->sg_count <= tmp->sg_count)
773                                 break;
774                 }
775                 entry = entry->prev;
776         } else
777                 entry = list;
778
779         TRACE_MEM("Adding in %p (list %p)", entry, list);
780         list_add(&obj->recycling_list_entry, entry);
781
782         list_add_tail(&obj->sorted_recycling_list_entry,
783                 &pool->sorted_recycling_list);
784
785         obj->time_stamp = jiffies;
786
787         pool->inactive_cached_pages += pages;
788
789         if (!pool->purge_work_scheduled) {
790                 TRACE_MEM("Scheduling purge work for pool %p", pool);
791                 pool->purge_work_scheduled = true;
792                 schedule_delayed_work(&pool->sgv_purge_work, PURGE_INTERVAL);
793         }
794
795         spin_unlock_bh(&pool->sgv_pool_lock);
796         return;
797 }
798
799 /* No locks */
800 static int sgv_hiwmk_check(int pages_to_alloc)
801 {
802         int res = 0;
803         int pages = pages_to_alloc;
804
805         pages += atomic_read(&sgv_pages_total);
806
807         if (unlikely(pages > sgv_hi_wmk)) {
808                 pages -= sgv_hi_wmk;
809                 atomic_inc(&sgv_releases_on_hiwmk);
810
811                 pages = __sgv_shrink(pages, 0);
812                 if (pages > 0) {
813                         TRACE(TRACE_OUT_OF_MEM, "Requested amount of "
814                             "memory (%d pages) for being executed "
815                             "commands together with the already "
816                             "allocated memory exceeds the allowed "
817                             "maximum %d. Should you increase "
818                             "scst_max_cmd_mem?", pages_to_alloc,
819                            sgv_hi_wmk);
820                         atomic_inc(&sgv_releases_on_hiwmk_failed);
821                         res = -ENOMEM;
822                         goto out_unlock;
823                 }
824         }
825
826         atomic_add(pages_to_alloc, &sgv_pages_total);
827
828 out_unlock:
829         TRACE_MEM("pages_to_alloc %d, new total %d", pages_to_alloc,
830                 atomic_read(&sgv_pages_total));
831
832         return res;
833 }
834
835 /* No locks */
836 static void sgv_hiwmk_uncheck(int pages)
837 {
838         atomic_sub(pages, &sgv_pages_total);
839         TRACE_MEM("pages %d, new total %d", pages,
840                 atomic_read(&sgv_pages_total));
841         return;
842 }
843
844 /* No locks */
845 static bool sgv_check_allowed_mem(struct scst_mem_lim *mem_lim, int pages)
846 {
847         int alloced;
848         bool res = true;
849
850         alloced = atomic_add_return(pages, &mem_lim->alloced_pages);
851         if (unlikely(alloced > mem_lim->max_allowed_pages)) {
852                 TRACE(TRACE_OUT_OF_MEM, "Requested amount of memory "
853                         "(%d pages) for being executed commands on a device "
854                         "together with the already allocated memory exceeds "
855                         "the allowed maximum %d. Should you increase "
856                         "scst_max_dev_cmd_mem?", pages,
857                         mem_lim->max_allowed_pages);
858                 atomic_sub(pages, &mem_lim->alloced_pages);
859                 res = false;
860         }
861
862         TRACE_MEM("mem_lim %p, pages %d, res %d, new alloced %d", mem_lim,
863                 pages, res, atomic_read(&mem_lim->alloced_pages));
864
865         return res;
866 }
867
868 /* No locks */
869 static void sgv_uncheck_allowed_mem(struct scst_mem_lim *mem_lim, int pages)
870 {
871         atomic_sub(pages, &mem_lim->alloced_pages);
872
873         TRACE_MEM("mem_lim %p, pages %d, new alloced %d", mem_lim,
874                 pages, atomic_read(&mem_lim->alloced_pages));
875         return;
876 }
877
878 struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size,
879         gfp_t gfp_mask, int flags, int *count,
880         struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv)
881 {
882         struct sgv_pool_obj *obj;
883         int order, pages, cnt;
884         struct scatterlist *res = NULL;
885         int pages_to_alloc;
886         struct kmem_cache *cache;
887         int no_cached = flags & SCST_POOL_ALLOC_NO_CACHED;
888         bool allowed_mem_checked = false, hiwmk_checked = false;
889
890         TRACE_ENTRY();
891
892         if (unlikely(size == 0))
893                 goto out;
894
895         sBUG_ON((gfp_mask & __GFP_NOFAIL) == __GFP_NOFAIL);
896
897         pages = ((size + PAGE_SIZE - 1) >> PAGE_SHIFT);
898         order = get_order(size);
899
900         TRACE_MEM("size=%d, pages=%d, order=%d, flags=%x, *sgv %p", size, pages,
901                 order, flags, *sgv);
902
903         if (*sgv != NULL) {
904                 obj = *sgv;
905                 pages_to_alloc = (1 << order);
906                 cache = pool->caches[obj->order_or_pages];
907
908                 TRACE_MEM("Supplied obj %p, sgv_order %d", obj,
909                         obj->order_or_pages);
910
911                 EXTRACHECKS_BUG_ON(obj->order_or_pages != order);
912                 EXTRACHECKS_BUG_ON(obj->sg_count != 0);
913
914                 if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
915                         goto out_fail_free_sg_entries;
916                 allowed_mem_checked = true;
917
918                 if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
919                         goto out_fail_free_sg_entries;
920                 hiwmk_checked = true;
921         } else if ((order < SGV_POOL_ELEMENTS) && !no_cached) {
922                 pages_to_alloc = (1 << order);
923                 cache = pool->caches[order];
924
925                 if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
926                         goto out_fail;
927                 allowed_mem_checked = true;
928
929                 obj = sgv_get_obj(pool, order, gfp_mask);
930                 if (unlikely(obj == NULL)) {
931                         TRACE(TRACE_OUT_OF_MEM, "Allocation of "
932                                 "sgv_pool_obj failed (size %d)", size);
933                         goto out_fail;
934                 }
935
936                 if (obj->sg_count != 0) {
937                         TRACE_MEM("Cached obj %p", obj);
938                         EXTRACHECKS_BUG_ON(obj->order_or_pages != order);
939                         atomic_inc(&pool->cache_acc[order].hit_alloc);
940                         goto success;
941                 }
942
943                 if (flags & SCST_POOL_NO_ALLOC_ON_CACHE_MISS) {
944                         if (!(flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL))
945                                 goto out_fail_free;
946                 }
947
948                 TRACE_MEM("Brand new obj %p", obj);
949
950                 if (order <= sgv_max_local_order) {
951                         obj->sg_entries = obj->sg_entries_data;
952                         sg_init_table(obj->sg_entries, pages_to_alloc);
953                         TRACE_MEM("sg_entries %p", obj->sg_entries);
954                         if (sgv_pool_clustered(pool)) {
955                                 obj->trans_tbl = (struct trans_tbl_ent *)
956                                         (obj->sg_entries + pages_to_alloc);
957                                 TRACE_MEM("trans_tbl %p", obj->trans_tbl);
958                                 /*
959                                  * No need to clear trans_tbl, if needed, it
960                                  * will be fully rewritten in
961                                  * sgv_alloc_sg_entries(),
962                                  */
963                         }
964                 } else {
965                         if (unlikely(sgv_alloc_arrays(obj, pages_to_alloc,
966                                         order, gfp_mask) != 0))
967                                 goto out_fail_free;
968                 }
969
970                 if ((flags & SCST_POOL_NO_ALLOC_ON_CACHE_MISS) &&
971                     (flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL))
972                         goto out_return;
973
974                 obj->allocator_priv = priv;
975
976                 if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
977                         goto out_fail_free_sg_entries;
978                 hiwmk_checked = true;
979         } else {
980                 int sz;
981
982                 pages_to_alloc = pages;
983
984                 if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
985                         goto out_fail;
986                 allowed_mem_checked = true;
987
988                 if (flags & SCST_POOL_NO_ALLOC_ON_CACHE_MISS)
989                         goto out_return2;
990
991                 cache = NULL;
992                 sz = sizeof(*obj) + pages*sizeof(obj->sg_entries[0]);
993
994                 obj = kmalloc(sz, gfp_mask);
995                 if (unlikely(obj == NULL)) {
996                         TRACE(TRACE_OUT_OF_MEM, "Allocation of "
997                                 "sgv_pool_obj failed (size %d)", size);
998                         goto out_fail;
999                 }
1000                 memset(obj, 0, sizeof(*obj));
1001
1002                 obj->owner_pool = pool;
1003                 obj->order_or_pages = -pages_to_alloc;
1004                 obj->allocator_priv = priv;
1005
1006                 obj->sg_entries = obj->sg_entries_data;
1007                 sg_init_table(obj->sg_entries, pages);
1008
1009                 if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
1010                         goto out_fail_free_sg_entries;
1011                 hiwmk_checked = true;
1012
1013                 TRACE_MEM("Big or no_cached obj %p (size %d)", obj,     sz);
1014         }
1015
1016         obj->sg_count = sgv_alloc_sg_entries(obj->sg_entries,
1017                 pages_to_alloc, gfp_mask, pool->clustering_type,
1018                 obj->trans_tbl, &pool->alloc_fns, priv);
1019         if (unlikely(obj->sg_count <= 0)) {
1020                 obj->sg_count = 0;
1021                 if ((flags & SCST_POOL_RETURN_OBJ_ON_ALLOC_FAIL) && cache)
1022                         goto out_return1;
1023                 else
1024                         goto out_fail_free_sg_entries;
1025         }
1026
1027         if (cache) {
1028                 atomic_add(pages_to_alloc - obj->sg_count,
1029                         &pool->cache_acc[order].merged);
1030         } else {
1031                 if (no_cached) {
1032                         atomic_add(pages_to_alloc,
1033                                 &pool->other_pages);
1034                         atomic_add(pages_to_alloc - obj->sg_count,
1035                                 &pool->other_merged);
1036                 } else {
1037                         atomic_add(pages_to_alloc,
1038                                 &pool->big_pages);
1039                         atomic_add(pages_to_alloc - obj->sg_count,
1040                                 &pool->big_merged);
1041                 }
1042         }
1043
1044 success:
1045         if (cache) {
1046                 int sg;
1047                 atomic_inc(&pool->cache_acc[order].total_alloc);
1048                 if (sgv_pool_clustered(pool))
1049                         cnt = obj->trans_tbl[pages-1].sg_num;
1050                 else
1051                         cnt = pages;
1052                 sg = cnt-1;
1053                 obj->orig_sg = sg;
1054                 obj->orig_length = obj->sg_entries[sg].length;
1055                 if (sgv_pool_clustered(pool)) {
1056                         obj->sg_entries[sg].length =
1057                                 (pages - obj->trans_tbl[sg].pg_count) << PAGE_SHIFT;
1058                 }
1059         } else {
1060                 cnt = obj->sg_count;
1061                 if (no_cached)
1062                         atomic_inc(&pool->other_alloc);
1063                 else
1064                         atomic_inc(&pool->big_alloc);
1065         }
1066
1067         *count = cnt;
1068         res = obj->sg_entries;
1069         *sgv = obj;
1070
1071         if (size & ~PAGE_MASK)
1072                 obj->sg_entries[cnt-1].length -=
1073                         PAGE_SIZE - (size & ~PAGE_MASK);
1074
1075         TRACE_MEM("obj=%p, sg_entries %p (size=%d, pages=%d, sg_count=%d, "
1076                 "count=%d, last_len=%d)", obj, obj->sg_entries, size, pages,
1077                 obj->sg_count, *count, obj->sg_entries[obj->orig_sg].length);
1078
1079 out:
1080         TRACE_EXIT_HRES(res);
1081         return res;
1082
1083 out_return:
1084         obj->allocator_priv = priv;
1085         obj->owner_pool = pool;
1086
1087 out_return1:
1088         *sgv = obj;
1089         TRACE_MEM("Returning failed obj %p (count %d)", obj, *count);
1090
1091 out_return2:
1092         *count = pages_to_alloc;
1093         res = NULL;
1094         goto out_uncheck;
1095
1096 out_fail_free_sg_entries:
1097         if (obj->sg_entries != obj->sg_entries_data) {
1098                 if (obj->trans_tbl !=
1099                         (struct trans_tbl_ent *)obj->sg_entries_data) {
1100                         /* kfree() handles NULL parameter */
1101                         kfree(obj->trans_tbl);
1102                         obj->trans_tbl = NULL;
1103                 }
1104                 kfree(obj->sg_entries);
1105                 obj->sg_entries = NULL;
1106         }
1107
1108 out_fail_free:
1109         if (cache) {
1110                 spin_lock_bh(&pool->sgv_pool_lock);
1111                 sgv_dec_cached_entries(pool, pages_to_alloc);
1112                 spin_unlock_bh(&pool->sgv_pool_lock);
1113
1114                 kmem_cache_free(pool->caches[obj->order_or_pages], obj);
1115         } else
1116                 kfree(obj);
1117
1118 out_fail:
1119         res = NULL;
1120         *count = 0;
1121         *sgv = NULL;
1122         TRACE_MEM("%s", "Allocation failed");
1123
1124 out_uncheck:
1125         if (hiwmk_checked)
1126                 sgv_hiwmk_uncheck(pages_to_alloc);
1127         if (allowed_mem_checked)
1128                 sgv_uncheck_allowed_mem(mem_lim, pages_to_alloc);
1129         goto out;
1130 }
1131 EXPORT_SYMBOL(sgv_pool_alloc);
1132
1133 void *sgv_get_priv(struct sgv_pool_obj *obj)
1134 {
1135         return obj->allocator_priv;
1136 }
1137 EXPORT_SYMBOL(sgv_get_priv);
1138
1139 void sgv_pool_free(struct sgv_pool_obj *obj, struct scst_mem_lim *mem_lim)
1140 {
1141         int pages;
1142
1143         TRACE_MEM("Freeing obj %p, order %d, sg_entries %p, "
1144                 "sg_count %d, allocator_priv %p", obj, obj->order_or_pages,
1145                 obj->sg_entries, obj->sg_count, obj->allocator_priv);
1146
1147         if (obj->order_or_pages >= 0) {
1148                 obj->sg_entries[obj->orig_sg].length = obj->orig_length;
1149                 pages = (obj->sg_count != 0) ? 1 << obj->order_or_pages : 0;
1150                 sgv_put_obj(obj);
1151         } else {
1152                 obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries,
1153                         obj->sg_count, obj->allocator_priv);
1154                 pages = (obj->sg_count != 0) ? -obj->order_or_pages : 0;
1155                 kfree(obj);
1156                 sgv_hiwmk_uncheck(pages);
1157         }
1158
1159         sgv_uncheck_allowed_mem(mem_lim, pages);
1160
1161         return;
1162 }
1163 EXPORT_SYMBOL(sgv_pool_free);
1164
1165 struct scatterlist *scst_alloc(int size, gfp_t gfp_mask, int *count)
1166 {
1167         struct scatterlist *res;
1168         int pages = (size >> PAGE_SHIFT) + ((size & ~PAGE_MASK) != 0);
1169         struct sgv_pool_alloc_fns sys_alloc_fns = {
1170                 sgv_alloc_sys_pages, sgv_free_sys_sg_entries };
1171         int no_fail = ((gfp_mask & __GFP_NOFAIL) == __GFP_NOFAIL);
1172
1173         TRACE_ENTRY();
1174
1175         atomic_inc(&sgv_other_total_alloc);
1176
1177         if (unlikely(sgv_hiwmk_check(pages) != 0)) {
1178                 if (!no_fail) {
1179                         res = NULL;
1180                         goto out;
1181                 } else {
1182                         /*
1183                          * Update active_pages_total since alloc can't fail.
1184                          * If it wasn't updated then the counter would cross 0
1185                          * on free again.
1186                          */
1187                         sgv_hiwmk_uncheck(-pages);
1188                  }
1189         }
1190
1191         res = kmalloc(pages*sizeof(*res), gfp_mask);
1192         if (res == NULL) {
1193                 TRACE(TRACE_OUT_OF_MEM, "Unable to allocate sg for %d pages",
1194                         pages);
1195                 goto out_uncheck;
1196         }
1197
1198         sg_init_table(res, pages);
1199
1200         /*
1201          * If we allow use clustering here, we will have troubles in
1202          * scst_free() to figure out how many pages are in the SG vector.
1203          * So, always don't use clustering.
1204          */
1205         *count = sgv_alloc_sg_entries(res, pages, gfp_mask, sgv_no_clustering,
1206                         NULL, &sys_alloc_fns, NULL);
1207         if (*count <= 0)
1208                 goto out_free;
1209
1210 out:
1211         TRACE_MEM("Alloced sg %p (count %d) \"no fail\" %d", res, *count, no_fail);
1212
1213         TRACE_EXIT_HRES(res);
1214         return res;
1215
1216 out_free:
1217         kfree(res);
1218         res = NULL;
1219
1220 out_uncheck:
1221         if (!no_fail)
1222                 sgv_hiwmk_uncheck(pages);
1223         goto out;
1224 }
1225 EXPORT_SYMBOL(scst_alloc);
1226
1227 void scst_free(struct scatterlist *sg, int count)
1228 {
1229         TRACE_MEM("Freeing sg=%p", sg);
1230
1231         sgv_hiwmk_uncheck(count);
1232
1233         sgv_free_sys_sg_entries(sg, count, NULL);
1234         kfree(sg);
1235         return;
1236 }
1237 EXPORT_SYMBOL(scst_free);
1238
1239 /* Must be called under sgv_pools_mutex */
1240 int sgv_pool_init(struct sgv_pool *pool, const char *name,
1241         enum sgv_clustering_types clustering_type)
1242 {
1243         int res = -ENOMEM;
1244         int i;
1245         struct sgv_pool_obj *obj;
1246
1247         TRACE_ENTRY();
1248
1249         memset(pool, 0, sizeof(*pool));
1250
1251         atomic_set(&pool->big_alloc, 0);
1252         atomic_set(&pool->big_pages, 0);
1253         atomic_set(&pool->big_merged, 0);
1254         atomic_set(&pool->other_alloc, 0);
1255         atomic_set(&pool->other_pages, 0);
1256         atomic_set(&pool->other_merged, 0);
1257
1258         pool->clustering_type = clustering_type;
1259         pool->alloc_fns.alloc_pages_fn = sgv_alloc_sys_pages;
1260         pool->alloc_fns.free_pages_fn = sgv_free_sys_sg_entries;
1261
1262         TRACE_MEM("name %s, sizeof(*obj)=%zd, clustering_type=%d", name,
1263                 sizeof(*obj), clustering_type);
1264
1265         strncpy(pool->name, name, sizeof(pool->name)-1);
1266         pool->name[sizeof(pool->name)-1] = '\0';
1267
1268         pool->owner_mm = current->mm;
1269
1270         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1271                 int size;
1272
1273                 atomic_set(&pool->cache_acc[i].total_alloc, 0);
1274                 atomic_set(&pool->cache_acc[i].hit_alloc, 0);
1275                 atomic_set(&pool->cache_acc[i].merged, 0);
1276
1277                 if (i <= sgv_max_local_order) {
1278                         size = sizeof(*obj) + (1 << i) *
1279                                 (sizeof(obj->sg_entries[0]) +
1280                                  ((clustering_type != sgv_no_clustering) ?
1281                                         sizeof(obj->trans_tbl[0]) : 0));
1282                 } else if (i <= sgv_max_trans_order) {
1283                         /*
1284                          * sgv ie sg_entries is allocated outside object, but
1285                          * ttbl is still embedded.
1286                          */
1287                         size = sizeof(*obj) + (1 << i) *
1288                                 (((clustering_type != sgv_no_clustering) ?
1289                                         sizeof(obj->trans_tbl[0]) : 0));
1290                 } else {
1291                         size = sizeof(*obj);
1292                         /* both sgv and ttbl are kallocated() */
1293                 }
1294
1295                 TRACE_MEM("pages=%d, size=%d", 1 << i, size);
1296
1297                 scnprintf(pool->cache_names[i], sizeof(pool->cache_names[i]),
1298                         "%s-%luK", name, (PAGE_SIZE >> 10) << i);
1299                 pool->caches[i] = kmem_cache_create(pool->cache_names[i],
1300                         size, 0, SCST_SLAB_FLAGS, NULL
1301 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
1302                         , NULL);
1303 #else
1304                         );
1305 #endif
1306                 if (pool->caches[i] == NULL) {
1307                         TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool cache "
1308                                 "%s(%d) failed", name, i);
1309                         goto out_free;
1310                 }
1311         }
1312
1313         atomic_set(&pool->sgv_pool_ref, 1);
1314         spin_lock_init(&pool->sgv_pool_lock);
1315         INIT_LIST_HEAD(&pool->sorted_recycling_list);
1316         for (i = 0; i < SGV_POOL_ELEMENTS; i++)
1317                 INIT_LIST_HEAD(&pool->recycling_lists[i]);
1318
1319 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
1320         INIT_DELAYED_WORK(&pool->sgv_purge_work,
1321                 (void (*)(struct work_struct *))sgv_purge_work_fn);
1322 #else
1323         INIT_WORK(&pool->sgv_purge_work, sgv_purge_work_fn, pool);
1324 #endif
1325
1326         spin_lock_bh(&sgv_pools_lock);
1327         list_add_tail(&pool->sgv_pools_list_entry, &sgv_pools_list);
1328         spin_unlock_bh(&sgv_pools_lock);
1329
1330         res = 0;
1331
1332 out:
1333         TRACE_EXIT_RES(res);
1334         return res;
1335
1336 out_free:
1337         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1338                 if (pool->caches[i]) {
1339                         kmem_cache_destroy(pool->caches[i]);
1340                         pool->caches[i] = NULL;
1341                 } else
1342                         break;
1343         }
1344         goto out;
1345 }
1346
1347 static void sgv_evaluate_local_order(void)
1348 {
1349         int space4sgv_ttbl = PAGE_SIZE - sizeof(struct sgv_pool_obj);
1350
1351         sgv_max_local_order = get_order(
1352                 (((space4sgv_ttbl /
1353                   (sizeof(struct trans_tbl_ent) + sizeof(struct scatterlist))) *
1354                         PAGE_SIZE) & PAGE_MASK)) - 1;
1355
1356         sgv_max_trans_order = get_order(
1357                 (((space4sgv_ttbl / sizeof(struct trans_tbl_ent)) * PAGE_SIZE)
1358                  & PAGE_MASK)) - 1;
1359
1360         TRACE_MEM("sgv_max_local_order %d, sgv_max_trans_order %d",
1361                 sgv_max_local_order, sgv_max_trans_order);
1362         TRACE_MEM("max object size with embedded sgv & ttbl %zd",
1363                 (1 << sgv_max_local_order) * (sizeof(struct trans_tbl_ent) +
1364                                                 sizeof(struct scatterlist)) +
1365                 sizeof(struct sgv_pool_obj));
1366         TRACE_MEM("max object size with embedded sgv (!clustered) %zd",
1367                 (1 << sgv_max_local_order) * sizeof(struct scatterlist) +
1368                 sizeof(struct sgv_pool_obj));
1369         TRACE_MEM("max object size with embedded ttbl %zd",
1370                 (1 << sgv_max_trans_order) * sizeof(struct trans_tbl_ent)
1371                 + sizeof(struct sgv_pool_obj));
1372         return;
1373 }
1374
1375 void sgv_pool_flush(struct sgv_pool *pool)
1376 {
1377         int i;
1378
1379         TRACE_ENTRY();
1380
1381         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1382                 struct sgv_pool_obj *obj;
1383
1384                 spin_lock_bh(&pool->sgv_pool_lock);
1385
1386                 while (!list_empty(&pool->recycling_lists[i])) {
1387                         obj = list_entry(pool->recycling_lists[i].next,
1388                                 struct sgv_pool_obj, recycling_list_entry);
1389
1390                         __sgv_purge_from_cache(obj);
1391
1392                         spin_unlock_bh(&pool->sgv_pool_lock);
1393
1394                         EXTRACHECKS_BUG_ON(obj->owner_pool != pool);
1395                         sgv_dtor_and_free(obj);
1396
1397                         spin_lock_bh(&pool->sgv_pool_lock);
1398                 }
1399                 spin_unlock_bh(&pool->sgv_pool_lock);
1400         }
1401
1402         TRACE_EXIT();
1403         return;
1404 }
1405 EXPORT_SYMBOL(sgv_pool_flush);
1406
1407 void sgv_pool_deinit(struct sgv_pool *pool)
1408 {
1409         int i;
1410
1411         TRACE_ENTRY();
1412
1413         cancel_delayed_work_sync(&pool->sgv_purge_work);
1414
1415         sgv_pool_flush(pool);
1416
1417         mutex_lock(&sgv_pools_mutex);
1418         spin_lock_bh(&sgv_pools_lock);
1419         list_del(&pool->sgv_pools_list_entry);
1420         spin_unlock_bh(&sgv_pools_lock);
1421         mutex_unlock(&sgv_pools_mutex);
1422
1423         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1424                 if (pool->caches[i])
1425                         kmem_cache_destroy(pool->caches[i]);
1426                 pool->caches[i] = NULL;
1427         }
1428
1429         TRACE_EXIT();
1430         return;
1431 }
1432
1433 void sgv_pool_set_allocator(struct sgv_pool *pool,
1434         struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *),
1435         void (*free_pages_fn)(struct scatterlist *, int, void *))
1436 {
1437         pool->alloc_fns.alloc_pages_fn = alloc_pages_fn;
1438         pool->alloc_fns.free_pages_fn = free_pages_fn;
1439         return;
1440 }
1441 EXPORT_SYMBOL(sgv_pool_set_allocator);
1442
1443 struct sgv_pool *sgv_pool_create(const char *name,
1444         enum sgv_clustering_types clustering_type, bool shared)
1445 {
1446         struct sgv_pool *pool;
1447         int rc;
1448
1449         TRACE_ENTRY();
1450
1451         mutex_lock(&sgv_pools_mutex);
1452         list_for_each_entry(pool, &sgv_pools_list, sgv_pools_list_entry) {
1453                 if (strcmp(pool->name, name) == 0) {
1454                         if (shared) {
1455                                 if (pool->owner_mm != current->mm) {
1456                                         PRINT_ERROR("Attempt of a shared use "
1457                                                 "of SGV pool %s with "
1458                                                 "different MM", name);
1459                                         goto out_err_unlock;
1460                                 }
1461                                 sgv_pool_get(pool);
1462                                 goto out_unlock;
1463                         } else {
1464                                 PRINT_ERROR("SGV pool %s already exists", name);
1465                                 goto out_err_unlock;
1466                         }
1467                 }
1468         }
1469
1470         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
1471         if (pool == NULL) {
1472                 TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of sgv_pool failed");
1473                 goto out_unlock;
1474         }
1475
1476         rc = sgv_pool_init(pool, name, clustering_type);
1477         if (rc != 0)
1478                 goto out_free_unlock;
1479
1480 out_unlock:
1481         mutex_unlock(&sgv_pools_mutex);
1482
1483         TRACE_EXIT_RES(pool != NULL);
1484         return pool;
1485
1486 out_free_unlock:
1487         kfree(pool);
1488
1489 out_err_unlock:
1490         pool = NULL;
1491         goto out_unlock;
1492 }
1493 EXPORT_SYMBOL(sgv_pool_create);
1494
1495 static void sgv_pool_destroy(struct sgv_pool *pool)
1496 {
1497         TRACE_ENTRY();
1498
1499         sgv_pool_deinit(pool);
1500         kfree(pool);
1501
1502         TRACE_EXIT();
1503         return;
1504 }
1505
1506 static void sgv_pool_get(struct sgv_pool *pool)
1507 {
1508         atomic_inc(&pool->sgv_pool_ref);
1509         TRACE_MEM("Incrementing sgv pool %p ref (new value %d)",
1510                 pool, atomic_read(&pool->sgv_pool_ref));
1511         return;
1512 }
1513
1514 static void sgv_pool_put(struct sgv_pool *pool)
1515 {
1516         TRACE_MEM("Decrementing sgv pool %p ref (new value %d)",
1517                 pool, atomic_read(&pool->sgv_pool_ref)-1);
1518         if (atomic_dec_and_test(&pool->sgv_pool_ref))
1519                 sgv_pool_destroy(pool);
1520         return;
1521 }
1522
1523 void sgv_pool_del(struct sgv_pool *pool)
1524 {
1525         TRACE_ENTRY();
1526
1527         sgv_pool_put(pool);
1528
1529         TRACE_EXIT();
1530         return;
1531 }
1532 EXPORT_SYMBOL(sgv_pool_del);
1533
1534 /* Both parameters in pages */
1535 int scst_sgv_pools_init(unsigned long mem_hwmark, unsigned long mem_lwmark)
1536 {
1537         int res;
1538
1539         TRACE_ENTRY();
1540
1541         sgv_hi_wmk = mem_hwmark;
1542         sgv_lo_wmk = mem_lwmark;
1543
1544         sgv_evaluate_local_order();
1545
1546         mutex_lock(&sgv_pools_mutex);
1547
1548         res = sgv_pool_init(&sgv_norm_pool, "sgv", sgv_no_clustering);
1549         if (res != 0)
1550                 goto out_unlock;
1551
1552         res = sgv_pool_init(&sgv_norm_clust_pool, "sgv-clust",
1553                 sgv_full_clustering);
1554         if (res != 0)
1555                 goto out_free_norm;
1556
1557         res = sgv_pool_init(&sgv_dma_pool, "sgv-dma", sgv_no_clustering);
1558         if (res != 0)
1559                 goto out_free_clust;
1560
1561         mutex_unlock(&sgv_pools_mutex);
1562
1563 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
1564         sgv_shrinker = set_shrinker(DEFAULT_SEEKS, sgv_shrink);
1565 #else
1566         sgv_shrinker.shrink = sgv_shrink;
1567         sgv_shrinker.seeks = DEFAULT_SEEKS;
1568         register_shrinker(&sgv_shrinker);
1569 #endif
1570
1571 out:
1572         TRACE_EXIT_RES(res);
1573         return res;
1574
1575 out_free_clust:
1576         sgv_pool_deinit(&sgv_norm_clust_pool);
1577
1578 out_free_norm:
1579         sgv_pool_deinit(&sgv_norm_pool);
1580
1581 out_unlock:
1582         mutex_unlock(&sgv_pools_mutex);
1583         goto out;
1584 }
1585
1586 void scst_sgv_pools_deinit(void)
1587 {
1588         TRACE_ENTRY();
1589
1590 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
1591         remove_shrinker(sgv_shrinker);
1592 #else
1593         unregister_shrinker(&sgv_shrinker);
1594 #endif
1595
1596         sgv_pool_deinit(&sgv_dma_pool);
1597         sgv_pool_deinit(&sgv_norm_pool);
1598         sgv_pool_deinit(&sgv_norm_clust_pool);
1599
1600         flush_scheduled_work();
1601
1602         TRACE_EXIT();
1603         return;
1604 }
1605
1606 static void sgv_do_proc_read(struct seq_file *seq, const struct sgv_pool *pool)
1607 {
1608         int i, total = 0, hit = 0, merged = 0, allocated = 0;
1609         int oa, om;
1610
1611         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1612                 int t;
1613
1614                 hit += atomic_read(&pool->cache_acc[i].hit_alloc);
1615                 total += atomic_read(&pool->cache_acc[i].total_alloc);
1616
1617                 t = atomic_read(&pool->cache_acc[i].total_alloc) -
1618                         atomic_read(&pool->cache_acc[i].hit_alloc);
1619                 allocated += t * (1 << i);
1620                 merged += atomic_read(&pool->cache_acc[i].merged);
1621         }
1622
1623         seq_printf(seq, "\n%-30s %-11d %-11d %-11d %d/%d/%d\n", pool->name,
1624                 hit, total, (allocated != 0) ? merged*100/allocated : 0,
1625                 pool->cached_pages, pool->inactive_cached_pages,
1626                 pool->cached_entries);
1627
1628         for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
1629                 int t = atomic_read(&pool->cache_acc[i].total_alloc) -
1630                         atomic_read(&pool->cache_acc[i].hit_alloc);
1631                 allocated = t * (1 << i);
1632                 merged = atomic_read(&pool->cache_acc[i].merged);
1633
1634                 seq_printf(seq, "  %-28s %-11d %-11d %d\n",
1635                         pool->cache_names[i],
1636                         atomic_read(&pool->cache_acc[i].hit_alloc),
1637                         atomic_read(&pool->cache_acc[i].total_alloc),
1638                         (allocated != 0) ? merged*100/allocated : 0);
1639         }
1640
1641         allocated = atomic_read(&pool->big_pages);
1642         merged = atomic_read(&pool->big_merged);
1643         oa = atomic_read(&pool->other_pages);
1644         om = atomic_read(&pool->other_merged);
1645
1646         seq_printf(seq, "  %-40s %d/%-9d %d/%d\n", "big/other",
1647                 atomic_read(&pool->big_alloc), atomic_read(&pool->other_alloc),
1648                 (allocated != 0) ? merged*100/allocated : 0,
1649                 (oa != 0) ? om/oa : 0);
1650
1651         return;
1652 }
1653
1654 int sgv_procinfo_show(struct seq_file *seq, void *v)
1655 {
1656         struct sgv_pool *pool;
1657         int inactive_pages = 0;
1658
1659         TRACE_ENTRY();
1660
1661         spin_lock_bh(&sgv_pools_lock);
1662         list_for_each_entry(pool, &sgv_active_pools_list,
1663                         sgv_active_pools_list_entry) {
1664                 inactive_pages += pool->inactive_cached_pages;
1665         }
1666         spin_unlock_bh(&sgv_pools_lock);
1667
1668         seq_printf(seq, "%-42s %d/%d\n%-42s %d/%d\n%-42s %d/%d\n\n",
1669                 "Inactive/active pages", inactive_pages,
1670                 atomic_read(&sgv_pages_total) - inactive_pages,
1671                 "Hi/lo watermarks [pages]", sgv_hi_wmk, sgv_lo_wmk,
1672                 "Hi watermark releases/failures",
1673                 atomic_read(&sgv_releases_on_hiwmk),
1674                 atomic_read(&sgv_releases_on_hiwmk_failed));
1675
1676         seq_printf(seq, "%-30s %-11s %-11s %-11s %-11s", "Name", "Hit", "Total",
1677                 "% merged", "Cached (P/I/O)");
1678
1679         mutex_lock(&sgv_pools_mutex);
1680         list_for_each_entry(pool, &sgv_pools_list, sgv_pools_list_entry) {
1681                 sgv_do_proc_read(seq, pool);
1682         }
1683         mutex_unlock(&sgv_pools_mutex);
1684
1685         seq_printf(seq, "\n%-42s %-11d\n", "other",
1686                 atomic_read(&sgv_other_total_alloc));
1687
1688         TRACE_EXIT();
1689         return 0;
1690 }