1 diff -upr linux-2.6.18/include/linux/mm.h linux-2.6.18/include/linux/mm.h
2 --- linux-2.6.18/include/linux/mm.h 2006-09-20 07:42:06.000000000 +0400
3 +++ linux-2.6.18/include/linux/mm.h 2007-08-07 19:35:51.000000000 +0400
4 @@ -277,6 +277,15 @@ struct page {
5 void *virtual; /* Kernel virtual address (NULL if
6 not kmapped, ie. highmem) */
7 #endif /* WANT_PAGE_VIRTUAL */
9 + * Used to implement support for notification on zero-copy TCP transfer
10 + * completion. Not good to have this field here, it's better to have
11 + * it in struct sk_buff, but it would make the code much more
12 + * complicated and fragile, if maintained as a separate patch, since all
13 + * skb then would have to contain only pages with the same value in this
19 #define page_private(page) ((page)->private)
20 diff -upr linux-2.6.18/include/linux/net.h linux-2.6.18/include/linux/net.h
21 --- linux-2.6.18/include/linux/net.h 2006-09-20 07:42:06.000000000 +0400
22 +++ linux-2.6.18/include/linux/net.h 2007-08-29 18:28:21.000000000 +0400
23 @@ -56,6 +56,7 @@ typedef enum {
26 #include <linux/stringify.h>
27 +#include <linux/mm.h>
29 #define SOCK_ASYNC_NOSPACE 0
30 #define SOCK_ASYNC_WAITDATA 1
31 @@ -324,5 +325,30 @@ extern int net_msg_cost;
32 extern int net_msg_burst;
35 +/* Support for notification on zero-copy TCP transfer completion */
36 +#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
37 +typedef void (*net_get_page_callback_t)(struct page *page);
38 +typedef void (*net_put_page_callback_t)(struct page *page);
40 +extern net_get_page_callback_t net_get_page_callback;
41 +extern net_put_page_callback_t net_put_page_callback;
43 +extern int net_set_get_put_page_callbacks(
44 + net_get_page_callback_t get_callback,
45 + net_put_page_callback_t put_callback);
47 +static inline void net_get_page(struct page *page)
49 + if (page->net_priv != 0)
50 + net_get_page_callback(page);
53 +static inline void net_put_page(struct page *page)
55 + if (page->net_priv != 0)
56 + net_put_page_callback(page);
60 #endif /* __KERNEL__ */
61 #endif /* _LINUX_NET_H */
62 diff -upr linux-2.6.18/net/core/skbuff.c linux-2.6.18/net/core/skbuff.c
63 --- linux-2.6.18/net/core/skbuff.c 2006-09-20 07:42:06.000000000 +0400
64 +++ linux-2.6.18/net/core/skbuff.c 2007-08-07 19:35:51.000000000 +0400
65 @@ -324,7 +324,7 @@ static void skb_release_data(struct sk_b
66 if (skb_shinfo(skb)->nr_frags) {
68 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
69 - put_page(skb_shinfo(skb)->frags[i].page);
70 + net_put_page(skb_shinfo(skb)->frags[i].page);
73 if (skb_shinfo(skb)->frag_list)
74 @@ -666,7 +666,7 @@ struct sk_buff *pskb_copy(struct sk_buff
76 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
77 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
78 - get_page(skb_shinfo(n)->frags[i].page);
79 + net_get_page(skb_shinfo(n)->frags[i].page);
81 skb_shinfo(n)->nr_frags = i;
83 @@ -720,7 +720,7 @@ int pskb_expand_head(struct sk_buff *skb
84 memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
86 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
87 - get_page(skb_shinfo(skb)->frags[i].page);
88 + net_get_page(skb_shinfo(skb)->frags[i].page);
90 if (skb_shinfo(skb)->frag_list)
91 skb_clone_fraglist(skb);
92 @@ -902,7 +902,7 @@ drop_pages:
93 skb_shinfo(skb)->nr_frags = i;
95 for (; i < nfrags; i++)
96 - put_page(skb_shinfo(skb)->frags[i].page);
97 + net_put_page(skb_shinfo(skb)->frags[i].page);
99 if (skb_shinfo(skb)->frag_list)
100 skb_drop_fraglist(skb);
101 @@ -1071,7 +1071,7 @@ pull_pages:
103 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
104 if (skb_shinfo(skb)->frags[i].size <= eat) {
105 - put_page(skb_shinfo(skb)->frags[i].page);
106 + net_put_page(skb_shinfo(skb)->frags[i].page);
107 eat -= skb_shinfo(skb)->frags[i].size;
109 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
110 @@ -1653,7 +1653,7 @@ static inline void skb_split_no_header(s
111 * where splitting is expensive.
112 * 2. Split is accurately. We make this.
114 - get_page(skb_shinfo(skb)->frags[i].page);
115 + net_get_page(skb_shinfo(skb)->frags[i].page);
116 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
117 skb_shinfo(skb1)->frags[0].size -= len - pos;
118 skb_shinfo(skb)->frags[i].size = len - pos;
119 @@ -2021,7 +2021,7 @@ struct sk_buff *skb_segment(struct sk_bu
122 *frag = skb_shinfo(skb)->frags[i];
123 - get_page(frag->page);
124 + net_get_page(frag->page);
128 diff -upr linux-2.6.18/net/core/utils.c linux-2.6.18/net/core/utils.c
129 --- linux-2.6.18/net/core/utils.c 2006-09-20 07:42:06.000000000 +0400
130 +++ linux-2.6.18/net/core/utils.c 2007-08-23 19:49:40.000000000 +0400
132 #include <linux/random.h>
133 #include <linux/percpu.h>
134 #include <linux/init.h>
135 +#include <linux/skbuff.h>
137 #include <asm/byteorder.h>
138 #include <asm/system.h>
139 #include <asm/uaccess.h>
141 +net_get_page_callback_t net_get_page_callback __read_mostly;
142 +net_put_page_callback_t net_put_page_callback __read_mostly;
145 This is a maximally equidistributed combined Tausworthe generator
146 based on code from GNU Scientific Library 1.5 (30 Jun 2004)
147 @@ -203,3 +203,32 @@ __be32 in_aton(const char *str)
150 EXPORT_SYMBOL(in_aton);
152 +int net_set_get_put_page_callbacks(
153 + net_get_page_callback_t get_callback,
154 + net_put_page_callback_t put_callback)
158 + if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
159 + (net_get_page_callback != get_callback)) {
164 + if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
165 + (net_put_page_callback != put_callback)) {
170 + net_get_page_callback = get_callback;
171 + net_put_page_callback = put_callback;
176 +EXPORT_SYMBOL(net_set_get_put_page_callbacks);
178 +EXPORT_SYMBOL(net_get_page_callback);
179 +EXPORT_SYMBOL(net_put_page_callback);
180 diff -upr linux-2.6.18/net/ipv4/ip_output.c linux-2.6.18/net/ipv4/ip_output.c
181 --- linux-2.6.18/net/ipv4/ip_output.c 2006-09-20 07:42:06.000000000 +0400
182 +++ linux-2.6.18/net/ipv4/ip_output.c 2007-08-07 19:37:24.000000000 +0400
183 @@ -1006,7 +1006,7 @@ alloc_new_skb:
188 + net_get_page(page);
189 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
190 frag = &skb_shinfo(skb)->frags[i];
192 @@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
193 if (skb_can_coalesce(skb, i, page, offset)) {
194 skb_shinfo(skb)->frags[i-1].size += len;
195 } else if (i < MAX_SKB_FRAGS) {
197 + net_get_page(page);
198 skb_fill_page_desc(skb, i, page, offset, len);
201 diff -upr linux-2.6.18/net/ipv4/tcp.c linux-2.6.18/net/ipv4/tcp.c
202 --- linux-2.6.18/net/ipv4/tcp.c 2006-09-20 07:42:06.000000000 +0400
203 +++ linux-2.6.18/net/ipv4/tcp.c 2007-08-07 19:35:51.000000000 +0400
204 @@ -560,7 +560,7 @@ new_segment:
206 skb_shinfo(skb)->frags[i - 1].size += copy;
209 + net_get_page(page);
210 skb_fill_page_desc(skb, i, page, offset, copy);
213 @@ -763,7 +763,7 @@ new_segment:
216 if (off == PAGE_SIZE) {
218 + net_put_page(page);
219 TCP_PAGE(sk) = page = NULL;
222 @@ -804,9 +804,9 @@ new_segment:
224 skb_fill_page_desc(skb, i, page, off, copy);
227 + net_get_page(page);
228 } else if (off + copy < PAGE_SIZE) {
230 + net_get_page(page);
234 diff -upr linux-2.6.18/net/ipv4/tcp_output.c linux-2.6.18/net/ipv4/tcp_output.c
235 --- linux-2.6.18/net/ipv4/tcp_output.c 2006-09-20 07:42:06.000000000 +0400
236 +++ linux-2.6.18/net/ipv4/tcp_output.c 2007-08-07 19:35:51.000000000 +0400
237 @@ -659,7 +659,7 @@ static void __pskb_trim_head(struct sk_b
239 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
240 if (skb_shinfo(skb)->frags[i].size <= eat) {
241 - put_page(skb_shinfo(skb)->frags[i].page);
242 + net_put_page(skb_shinfo(skb)->frags[i].page);
243 eat -= skb_shinfo(skb)->frags[i].size;
245 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
246 diff -upr linux-2.6.18/net/ipv6/ip6_output.c linux-2.6.18/net/ipv6/ip6_output.c
247 --- linux-2.6.18/net/ipv6/ip6_output.c 2006-09-20 07:42:06.000000000 +0400
248 +++ linux-2.6.18/net/ipv6/ip6_output.c 2007-08-07 19:35:51.000000000 +0400
249 @@ -1212,7 +1212,7 @@ alloc_new_skb:
254 + net_get_page(page);
255 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
256 frag = &skb_shinfo(skb)->frags[i];