- Update for 2.6.28
[mirror/scst/.git] / iscsi-scst / kernel / patches / put_page_callback-2.6.28.patch
1 diff -upr linux-2.6.28/include/linux/mm_types.h linux-2.6.28/include/linux/mm_types.h
2 --- linux-2.6.28/include/linux/mm_types.h       2008-07-14 01:51:29.000000000 +0400
3 +++ linux-2.6.28/include/linux/mm_types.h       2008-07-22 20:30:21.000000000 +0400
4 @@ -94,6 +94,18 @@ struct page {
5         void *virtual;                  /* Kernel virtual address (NULL if
6                                            not kmapped, ie. highmem) */
7  #endif /* WANT_PAGE_VIRTUAL */
8 +
9 +#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
10 +       /*
11 +        * Used to implement support for notification on zero-copy TCP transfer
12 +        * completion. It might look as not good to have this field here and
13 +        * it's better to have it in struct sk_buff, but it would make the code
14 +        * much more complicated and fragile, since all skb then would have to
15 +        * contain only pages with the same value in this field.
16 +        */
17 +        void *net_priv;
18 +#endif
19 +
20  };
21
22  /*
23 diff -upr linux-2.6.28/include/linux/net.h linux-2.6.28/include/linux/net.h
24 --- linux-2.6.28/include/linux/net.h    2008-07-14 01:51:29.000000000 +0400
25 +++ linux-2.6.28/include/linux/net.h    2008-07-29 20:48:07.000000000 +0400
26 @@ -57,6 +57,7 @@ typedef enum {
27  #include <linux/random.h>
28  #include <linux/wait.h>
29  #include <linux/fcntl.h>       /* For O_CLOEXEC and O_NONBLOCK */
30 +#include <linux/mm.h>
31
32  struct poll_table_struct;
33  struct pipe_inode_info;
34 @@ -352,5 +352,44 @@ extern int net_msg_cost;
35  extern struct ratelimit_state net_ratelimit_state;
36  #endif
37
38 +#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
39 +/* Support for notification on zero-copy TCP transfer completion */
40 +typedef void (*net_get_page_callback_t)(struct page *page);
41 +typedef void (*net_put_page_callback_t)(struct page *page);
42 +
43 +extern net_get_page_callback_t net_get_page_callback;
44 +extern net_put_page_callback_t net_put_page_callback;
45 +
46 +extern int net_set_get_put_page_callbacks(
47 +       net_get_page_callback_t get_callback,
48 +       net_put_page_callback_t put_callback);
49 +
50 +/*
51 + * See comment for net_set_get_put_page_callbacks() why those functions
52 + * don't need any protection.
53 + */
54 +static inline void net_get_page(struct page *page)
55 +{
56 +       if (page->net_priv != 0)
57 +               net_get_page_callback(page);
58 +       get_page(page);
59 +}
60 +static inline void net_put_page(struct page *page)
61 +{
62 +       if (page->net_priv != 0)
63 +               net_put_page_callback(page);
64 +       put_page(page);
65 +}
66 +#else
67 +static inline void net_get_page(struct page *page)
68 +{
69 +       get_page(page);
70 +}
71 +static inline void net_put_page(struct page *page)
72 +{
73 +       put_page(page);
74 +}
75 +#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
76 +
77  #endif /* __KERNEL__ */
78  #endif /* _LINUX_NET_H */
79 diff -upr linux-2.6.28/net/core/skbuff.c linux-2.6.28/net/core/skbuff.c
80 --- linux-2.6.28/net/core/skbuff.c      2008-07-14 01:51:29.000000000 +0400
81 +++ linux-2.6.28/net/core/skbuff.c      2008-07-22 20:28:41.000000000 +0400
82 @@ -339,7 +339,7 @@ static void skb_release_data(struct sk_b
83                 if (skb_shinfo(skb)->nr_frags) {
84                         int i;
85                         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
86 -                               put_page(skb_shinfo(skb)->frags[i].page);
87 +                               net_put_page(skb_shinfo(skb)->frags[i].page);
88                 }
89  
90                 if (skb_shinfo(skb)->frag_list)
91 @@ -727,7 +725,7 @@ struct sk_buff *pskb_copy(struct sk_buff
92  
93                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
94                         skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
95 -                       get_page(skb_shinfo(n)->frags[i].page);
96 +                       net_get_page(skb_shinfo(n)->frags[i].page);
97                 }
98                 skb_shinfo(n)->nr_frags = i;
99         }
100 @@ -792,7 +792,7 @@ int pskb_expand_head(struct sk_buff *skb
101                sizeof(struct skb_shared_info));
102  
103         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
104 -               get_page(skb_shinfo(skb)->frags[i].page);
105 +               net_get_page(skb_shinfo(skb)->frags[i].page);
106  
107         if (skb_shinfo(skb)->frag_list)
108                 skb_clone_fraglist(skb);
109 @@ -1061,7 +1061,7 @@ drop_pages:
110                 skb_shinfo(skb)->nr_frags = i;
111  
112                 for (; i < nfrags; i++)
113 -                       put_page(skb_shinfo(skb)->frags[i].page);
114 +                       net_put_page(skb_shinfo(skb)->frags[i].page);
115  
116                 if (skb_shinfo(skb)->frag_list)
117                         skb_drop_fraglist(skb);
118 @@ -1230,7 +1230,7 @@ pull_pages:
119         k = 0;
120         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
121                 if (skb_shinfo(skb)->frags[i].size <= eat) {
122 -                       put_page(skb_shinfo(skb)->frags[i].page);
123 +                       net_put_page(skb_shinfo(skb)->frags[i].page);
124                         eat -= skb_shinfo(skb)->frags[i].size;
125                 } else {
126                         skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
127 @@ -1987,7 +1987,7 @@ static inline void skb_split_no_header(s
128                                  *    where splitting is expensive.
129                                  * 2. Split is accurately. We make this.
130                                  */
131 -                               get_page(skb_shinfo(skb)->frags[i].page);
132 +                               net_get_page(skb_shinfo(skb)->frags[i].page);
133                                 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
134                                 skb_shinfo(skb1)->frags[0].size -= len - pos;
135                                 skb_shinfo(skb)->frags[i].size  = len - pos;
136 @@ -2355,7 +2355,7 @@ struct sk_buff *skb_segment(struct sk_bu
137                         BUG_ON(i >= nfrags);
138  
139                         *frag = skb_shinfo(skb)->frags[i];
140 -                       get_page(frag->page);
141 +                       net_get_page(frag->page);
142                         size = frag->size;
143  
144                         if (pos < offset) {
145 diff -upr linux-2.6.28/net/ipv4/ip_output.c linux-2.6.28/net/ipv4/ip_output.c
146 --- linux-2.6.28/net/ipv4/ip_output.c   2008-07-14 01:51:29.000000000 +0400
147 +++ linux-2.6.28/net/ipv4/ip_output.c   2008-07-22 20:28:41.000000000 +0400
148 @@ -1008,7 +1008,7 @@ alloc_new_skb:
149                                                 err = -EMSGSIZE;
150                                                 goto error;
151                                         }
152 -                                       get_page(page);
153 +                                       net_get_page(page);
154                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
155                                         frag = &skb_shinfo(skb)->frags[i];
156                                 }
157 @@ -1166,7 +1166,7 @@ ssize_t   ip_append_page(struct sock *sk, 
158                 if (skb_can_coalesce(skb, i, page, offset)) {
159                         skb_shinfo(skb)->frags[i-1].size += len;
160                 } else if (i < MAX_SKB_FRAGS) {
161 -                       get_page(page);
162 +                       net_get_page(page);
163                         skb_fill_page_desc(skb, i, page, offset, len);
164                 } else {
165                         err = -EMSGSIZE;
166 diff -upr linux-2.6.28/net/ipv4/Makefile linux-2.6.28/net/ipv4/Makefile
167 --- linux-2.6.28/net/ipv4/Makefile      2008-07-14 01:51:29.000000000 +0400
168 +++ linux-2.6.28/net/ipv4/Makefile      2008-07-22 20:35:05.000000000 +0400
169 @@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
170  obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
171  obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
172  obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
173 +obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
174  
175  obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
176                       xfrm4_output.o
177 diff -upr linux-2.6.28/net/ipv4/tcp.c linux-2.6.28/net/ipv4/tcp.c
178 --- linux-2.6.28/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
179 +++ linux-2.6.28/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
180 @@ -714,7 +714,7 @@ new_segment:
181                 if (can_coalesce) {
182                         skb_shinfo(skb)->frags[i - 1].size += copy;
183                 } else {
184 -                       get_page(page);
185 +                       net_get_page(page);
186                         skb_fill_page_desc(skb, i, page, offset, copy);
187                 }
188  
189 @@ -919,7 +919,7 @@ new_segment:
190                                         goto new_segment;
191                                 } else if (page) {
192                                         if (off == PAGE_SIZE) {
193 -                                               put_page(page);
194 +                                               net_put_page(page);
195                                                 TCP_PAGE(sk) = page = NULL;
196                                                 off = 0;
197                                         }
198 @@ -960,9 +960,9 @@ new_segment:
199                                 } else {
200                                         skb_fill_page_desc(skb, i, page, off, copy);
201                                         if (TCP_PAGE(sk)) {
202 -                                               get_page(page);
203 +                                               net_get_page(page);
204                                         } else if (off + copy < PAGE_SIZE) {
205 -                                               get_page(page);
206 +                                               net_get_page(page);
207                                                 TCP_PAGE(sk) = page;
208                                         }
209                                 }
210 diff -upr linux-2.6.28/net/ipv4/tcp_output.c linux-2.6.28/net/ipv4/tcp_output.c
211 --- linux-2.6.28/net/ipv4/tcp_output.c  2008-07-14 01:51:29.000000000 +0400
212 +++ linux-2.6.28/net/ipv4/tcp_output.c  2008-07-22 20:28:41.000000000 +0400
213 @@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
214         k = 0;
215         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
216                 if (skb_shinfo(skb)->frags[i].size <= eat) {
217 -                       put_page(skb_shinfo(skb)->frags[i].page);
218 +                       net_put_page(skb_shinfo(skb)->frags[i].page);
219                         eat -= skb_shinfo(skb)->frags[i].size;
220                 } else {
221                         skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
222 diff -upr linux-2.6.28/net/ipv4/tcp_zero_copy.c linux-2.6.28/net/ipv4/tcp_zero_copy.c
223 --- linux-2.6.28/net/ipv4/tcp_zero_copy.c       2008-07-22 20:12:35.000000000 +0400
224 +++ linux-2.6.28/net/ipv4/tcp_zero_copy.c       2008-07-31 21:21:13.000000000 +0400
225 @@ -0,0 +1,49 @@
226 +/*
227 + *     Support routines for TCP zero copy transmit
228 + *
229 + *     Created by Vladislav Bolkhovitin
230 + *
231 + *     This program is free software; you can redistribute it and/or
232 + *      modify it under the terms of the GNU General Public License
233 + *      version 2 as published by the Free Software Foundation.
234 + */
235 +
236 +#include <linux/skbuff.h>
237 +
238 +net_get_page_callback_t net_get_page_callback __read_mostly;
239 +EXPORT_SYMBOL(net_get_page_callback);
240 +
241 +net_put_page_callback_t net_put_page_callback __read_mostly;
242 +EXPORT_SYMBOL(net_put_page_callback);
243 +
244 +/*
245 + * Caller of this function must ensure that at the moment when it's called
246 + * there are no pages in the system with net_priv field set to non-zero
247 + * value. Hence, this function, as well as net_get_page() and net_put_page(),
248 + * don't need any protection.
249 + */
250 +int net_set_get_put_page_callbacks(
251 +       net_get_page_callback_t get_callback,
252 +       net_put_page_callback_t put_callback)
253 +{
254 +       int res = 0;
255 +
256 +       if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
257 +           (net_get_page_callback != get_callback)) {
258 +               res = -EBUSY;
259 +               goto out;
260 +       }
261 +
262 +       if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
263 +           (net_put_page_callback != put_callback)) {
264 +               res = -EBUSY;
265 +               goto out;
266 +       }
267 +
268 +       net_get_page_callback = get_callback;
269 +       net_put_page_callback = put_callback;
270 +
271 +out:
272 +       return res;
273 +}
274 +EXPORT_SYMBOL(net_set_get_put_page_callbacks);
275 diff -upr linux-2.6.28/net/ipv6/ip6_output.c linux-2.6.28/net/ipv6/ip6_output.c
276 --- linux-2.6.28/net/ipv6/ip6_output.c  2008-07-14 01:51:29.000000000 +0400
277 +++ linux-2.6.28/net/ipv6/ip6_output.c  2008-07-22 20:28:41.000000000 +0400
278 @@ -1362,7 +1362,7 @@ alloc_new_skb:
279                                                 err = -EMSGSIZE;
280                                                 goto error;
281                                         }
282 -                                       get_page(page);
283 +                                       net_get_page(page);
284                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
285                                         frag = &skb_shinfo(skb)->frags[i];
286                                 }
287 diff -upr linux-2.6.28/net/Kconfig linux-2.6.28/net/Kconfig
288 --- linux-2.6.28/net/Kconfig    2008-07-14 01:51:29.000000000 +0400
289 +++ linux-2.6.28/net/Kconfig    2008-07-29 21:15:39.000000000 +0400
290 @@ -59,6 +59,18 @@ config INET
291  
292           Short answer: say Y.
293  
294 +config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
295 +       bool "TCP/IP zero-copy transfer completion notification"
296 +        depends on INET
297 +        default SCST_ISCSI
298 +       ---help---
299 +         Adds support for sending a notification upon completion of a
300 +          zero-copy TCP/IP transfer. This can speed up certain TCP/IP
301 +          software. Currently this is only used by the iSCSI target driver
302 +          iSCSI-SCST.
303 +
304 +          If unsure, say N.
305 +
306  if INET
307  source "net/ipv4/Kconfig"
308  source "net/ipv6/Kconfig"