Added possibility to coexist if both target driver and dev handler need custom memory...
[mirror/scst/.git] / iscsi-scst / kernel / nthread.c
1 /*
2  *  Network threads.
3  *
4  *  Copyright (C) 2004 - 2005 FUJITA Tomonori <tomof@acm.org>
5  *  Copyright (C) 2007 - 2008 Vladislav Bolkhovitin
6  *  Copyright (C) 2007 - 2008 CMS Distribution Limited
7  *
8  *  This program is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU General Public License
10  *  as published by the Free Software Foundation.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  */
17
18 #include <linux/sched.h>
19 #include <linux/file.h>
20 #include <linux/kthread.h>
21 #include <asm/ioctls.h>
22 #include <linux/delay.h>
23 #include <net/tcp.h>
24
25 #include "iscsi.h"
26 #include "digest.h"
27
28 enum rx_state {
29         RX_INIT_BHS, /* Must be zero. */
30         RX_BHS,
31
32         RX_INIT_AHS,
33         RX_AHS,
34
35         RX_INIT_HDIGEST,
36         RX_HDIGEST,
37         RX_CHECK_HDIGEST,
38
39         RX_INIT_DATA,
40         RX_DATA,
41
42         RX_INIT_DDIGEST,
43         RX_DDIGEST,
44         RX_CHECK_DDIGEST,
45
46         RX_END,
47 };
48
49 enum tx_state {
50         TX_INIT, /* Must be zero. */
51         TX_BHS_DATA,
52         TX_INIT_DDIGEST,
53         TX_DDIGEST,
54         TX_END,
55 };
56
57 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
58 static void iscsi_check_closewait(struct iscsi_conn *conn)
59 {
60         struct iscsi_cmnd *cmnd;
61
62         TRACE_ENTRY();
63
64         TRACE_CONN_CLOSE_DBG("conn %p, sk_state %d", conn,
65                 conn->sock->sk->sk_state);
66
67         if (conn->sock->sk->sk_state != TCP_CLOSE) {
68                 TRACE_CONN_CLOSE_DBG("conn %p, skipping", conn);
69                 goto out;
70         }
71
72         /*
73          * No data are going to be sent, so all queued buffers can be freed
74          * now. In many cases TCP does that only in close(), but we can't rely
75          * on user space on calling it.
76          */
77
78 again:
79         spin_lock_bh(&conn->cmd_list_lock);
80         list_for_each_entry(cmnd, &conn->cmd_list, cmd_list_entry) {
81                 struct iscsi_cmnd *rsp;
82                 int restart = 0;
83
84                 TRACE_CONN_CLOSE_DBG("cmd %p, scst_state %x, data_waiting %d, "
85                         "ref_cnt %d, parent_req %p, net_ref_cnt %d, sg %p",
86                         cmnd, cmnd->scst_state, cmnd->data_waiting,
87                         atomic_read(&cmnd->ref_cnt), cmnd->parent_req,
88                         atomic_read(&cmnd->net_ref_cnt), cmnd->sg);
89
90                 sBUG_ON(cmnd->parent_req != NULL);
91
92                 if (cmnd->sg != NULL) {
93                         int i;
94
95                         if (cmnd_get_check(cmnd))
96                                 continue;
97
98                         for (i = 0; i < cmnd->sg_cnt; i++) {
99                                 struct page *page = sg_page(&cmnd->sg[i]);
100                                 TRACE_CONN_CLOSE_DBG("page %p, net_priv %p, "
101                                         "_count %d", page, page->net_priv,
102                                         atomic_read(&page->_count));
103
104                                 if (page->net_priv != NULL) {
105                                         if (restart == 0) {
106                                                 spin_unlock_bh(&conn->cmd_list_lock);
107                                                 restart = 1;
108                                         }
109                                         while (page->net_priv != NULL)
110                                                 iscsi_put_page_callback(page);
111                                 }
112                         }
113                         cmnd_put(cmnd);
114
115                         if (restart)
116                                 goto again;
117                 }
118
119                 spin_lock_bh(&cmnd->rsp_cmd_lock);
120                 list_for_each_entry(rsp, &cmnd->rsp_cmd_list, rsp_cmd_list_entry) {
121                         TRACE_CONN_CLOSE_DBG("  rsp %p, ref_cnt %d, net_ref_cnt %d, "
122                                 "sg %p", rsp, atomic_read(&rsp->ref_cnt),
123                                 atomic_read(&rsp->net_ref_cnt), rsp->sg);
124
125                         if ((rsp->sg != cmnd->sg) && (rsp->sg != NULL)) {
126                                 int i;
127
128                                 if (cmnd_get_check(rsp))
129                                         continue;
130
131                                 for (i = 0; i < rsp->sg_cnt; i++) {
132                                         struct page *page = sg_page(&rsp->sg[i]);
133                                         TRACE_CONN_CLOSE_DBG("    page %p, net_priv %p, "
134                                                 "_count %d", page, page->net_priv,
135                                                 atomic_read(&page->_count));
136
137                                         if (page->net_priv != NULL) {
138                                                 if (restart == 0) {
139                                                         spin_unlock_bh(&cmnd->rsp_cmd_lock);
140                                                         spin_unlock_bh(&conn->cmd_list_lock);
141                                                         restart = 1;
142                                                 }
143                                                 while (page->net_priv != NULL)
144                                                         iscsi_put_page_callback(page);
145                                         }
146                                 }
147                                 cmnd_put(rsp);
148
149                                 if (restart)
150                                         goto again;
151                         }
152                 }
153                 spin_unlock_bh(&cmnd->rsp_cmd_lock);
154         }
155         spin_unlock_bh(&conn->cmd_list_lock);
156
157 out:
158         TRACE_EXIT();
159         return;
160 }
161 #else
162 static inline void iscsi_check_closewait(struct iscsi_conn *conn) {};
163 #endif
164
165 static void iscsi_unreg_cmds_done_fn(struct scst_session *scst_sess)
166 {
167         struct iscsi_session *sess =
168                 (struct iscsi_session *)scst_sess_get_tgt_priv(scst_sess);
169
170         TRACE_ENTRY();
171
172         TRACE_CONN_CLOSE_DBG("sess %p (scst_sess %p)", sess, scst_sess);
173
174         sess->shutting_down = 1;
175         complete_all(&sess->unreg_compl);
176
177         TRACE_EXIT();
178         return;
179 }
180
181 /* No locks */
182 static void close_conn(struct iscsi_conn *conn)
183 {
184         struct iscsi_session *session = conn->session;
185         struct iscsi_target *target = conn->target;
186         typeof(jiffies) start_waiting = jiffies;
187         typeof(jiffies) shut_start_waiting = start_waiting;
188         bool pending_reported = 0, wait_expired = 0, shut_expired = 0;
189
190 #define CONN_PENDING_TIMEOUT    ((typeof(jiffies))10*HZ)
191 #define CONN_WAIT_TIMEOUT       ((typeof(jiffies))10*HZ)
192 #define CONN_REG_SHUT_TIMEOUT   ((typeof(jiffies))125*HZ)
193 #define CONN_DEL_SHUT_TIMEOUT   ((typeof(jiffies))10*HZ)
194
195         TRACE_ENTRY();
196
197         TRACE_CONN_CLOSE("Closing connection %p (conn_ref_cnt=%d)", conn,
198                 atomic_read(&conn->conn_ref_cnt));
199
200         iscsi_extracheck_is_rd_thread(conn);
201
202         sBUG_ON(!conn->closing);
203
204         if (conn->active_close) {
205                 /* We want all our already send operations to complete */
206                 conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
207         } else {
208                 conn->sock->ops->shutdown(conn->sock,
209                         RCV_SHUTDOWN|SEND_SHUTDOWN);
210         }
211
212         /*
213          * We need to call scst_unregister_session() ASAP to make SCST start
214          * stuck commands recovery.
215          *
216          * ToDo: this is incompatible with MC/S
217          */
218         scst_unregister_session_ex(session->scst_sess, 0,
219                 NULL, iscsi_unreg_cmds_done_fn);
220         session->scst_sess = NULL;
221
222         if (conn->read_state != RX_INIT_BHS) {
223                 struct iscsi_cmnd *cmnd = conn->read_cmnd;
224                 conn->read_cmnd = NULL;
225                 conn->read_state = RX_INIT_BHS;
226                 req_cmnd_release_force(cmnd, 0);
227         }
228
229         conn_abort(conn);
230
231         /* ToDo: not the best way to wait */
232         while (atomic_read(&conn->conn_ref_cnt) != 0) {
233                 struct iscsi_cmnd *cmnd;
234
235                 mutex_lock(&target->target_mutex);
236                 spin_lock(&session->sn_lock);
237                 if ((session->tm_rsp != NULL) && (session->tm_rsp->conn == conn)) {
238                         struct iscsi_cmnd *tm_rsp = session->tm_rsp;
239                         TRACE(TRACE_MGMT_MINOR, "Dropping delayed TM rsp %p",
240                                 tm_rsp);
241                         session->tm_rsp = NULL;
242                         session->tm_active--;
243                         WARN_ON(session->tm_active < 0);
244                         spin_unlock(&session->sn_lock);
245                         mutex_unlock(&target->target_mutex);
246
247                         rsp_cmnd_release(tm_rsp);
248                 } else {
249                         spin_unlock(&session->sn_lock);
250                         mutex_unlock(&target->target_mutex);
251                 }
252
253                 if (!list_empty(&session->pending_list)) {
254                         struct list_head *pending_list = &session->pending_list;
255                         int req_freed;
256
257                         TRACE_CONN_CLOSE_DBG("Disposing pending commands on "
258                                              "connection %p (conn_ref_cnt=%d)", conn,
259                                              atomic_read(&conn->conn_ref_cnt));
260
261                         /*
262                          * Such complicated approach currently isn't necessary,
263                          * but it will be necessary for MC/S, if we won't want
264                          * to reestablish the whole session on a connection
265                          * failure.
266                          */
267
268                         spin_lock(&session->sn_lock);
269                         do {
270                                 req_freed = 0;
271                                 list_for_each_entry(cmnd, pending_list,
272                                                         pending_list_entry) {
273                                         TRACE_CONN_CLOSE_DBG("Pending cmd %p"
274                                                 "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
275                                                 cmnd, conn, cmnd->pdu.bhs.sn,
276                                                 session->exp_cmd_sn);
277                                         if ((cmnd->conn == conn) &&
278                                             (session->exp_cmd_sn == cmnd->pdu.bhs.sn)) {
279                                                 TRACE_CONN_CLOSE_DBG("Freeing pending cmd %p",
280                                                         cmnd);
281
282                                                 list_del(&cmnd->pending_list_entry);
283                                                 cmnd->pending = 0;
284
285                                                 session->exp_cmd_sn++;
286
287                                                 spin_unlock(&session->sn_lock);
288
289                                                 req_cmnd_release_force(cmnd, 0);
290
291                                                 req_freed = 1;
292                                                 spin_lock(&session->sn_lock);
293                                                 break;
294                                         }
295                                 }
296                         } while (req_freed);
297                         spin_unlock(&session->sn_lock);
298
299                         if (time_after(jiffies, start_waiting + CONN_PENDING_TIMEOUT)) {
300                                 if (!pending_reported) {
301                                         TRACE_CONN_CLOSE("%s", "Pending wait time expired");
302                                         pending_reported = 1;
303                                 }
304                                 spin_lock(&session->sn_lock);
305                                 do {
306                                         req_freed = 0;
307                                         list_for_each_entry(cmnd, pending_list,
308                                                         pending_list_entry) {
309                                                 TRACE_CONN_CLOSE_DBG("Pending cmd %p"
310                                                         "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
311                                                         cmnd, conn, cmnd->pdu.bhs.sn,
312                                                         session->exp_cmd_sn);
313                                                 if (cmnd->conn == conn) {
314                                                         PRINT_ERROR("Freeing orphaned "
315                                                                 "pending cmd %p", cmnd);
316
317                                                         list_del(&cmnd->pending_list_entry);
318                                                         cmnd->pending = 0;
319
320                                                         if (session->exp_cmd_sn == cmnd->pdu.bhs.sn)
321                                                                 session->exp_cmd_sn++;
322
323                                                         spin_unlock(&session->sn_lock);
324
325                                                         req_cmnd_release_force(cmnd, 0);
326
327                                                         req_freed = 1;
328                                                         spin_lock(&session->sn_lock);
329                                                         break;
330                                                 }
331                                         }
332                                 } while (req_freed);
333                                 spin_unlock(&session->sn_lock);
334                         }
335                 }
336
337                 iscsi_make_conn_wr_active(conn);
338
339                 /* That's for active close only, actually */
340                 if (time_after(jiffies, start_waiting + CONN_WAIT_TIMEOUT) &&
341                     !wait_expired) {
342                         TRACE_CONN_CLOSE("Wait time expired (conn %p, "
343                                 "sk_state %d)", conn, conn->sock->sk->sk_state);
344                         conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN);
345                         wait_expired = 1;
346                         shut_start_waiting = jiffies;
347                 }
348
349                 if (wait_expired && !shut_expired &&
350                     time_after(jiffies, shut_start_waiting +
351                                 conn->deleting ? CONN_DEL_SHUT_TIMEOUT :
352                                                  CONN_REG_SHUT_TIMEOUT)) {
353                         TRACE_CONN_CLOSE("Wait time after shutdown expired "
354                                 "(conn %p, sk_state %d)", conn,
355                                 conn->sock->sk->sk_state);
356                         conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
357                         shut_expired = 1;
358                 }
359
360                 if (conn->deleting)
361                         msleep(200);
362                 else
363                         msleep(1000);
364
365                 TRACE_CONN_CLOSE_DBG("conn %p, conn_ref_cnt %d left, wr_state %d, "
366                         "exp_cmd_sn %u", conn, atomic_read(&conn->conn_ref_cnt),
367                         conn->wr_state, session->exp_cmd_sn);
368 #ifdef CONFIG_SCST_DEBUG
369                 {
370 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
371                         struct iscsi_cmnd *rsp;
372 #endif
373
374 #if 0
375                         if (time_after(jiffies, start_waiting + 10*HZ))
376                                 trace_flag |= TRACE_CONN_OC_DBG;
377 #endif
378
379                         spin_lock_bh(&conn->cmd_list_lock);
380                         list_for_each_entry(cmnd, &conn->cmd_list, cmd_list_entry) {
381                                 TRACE_CONN_CLOSE_DBG("cmd %p, scst_state %x, scst_cmd "
382                                         "state %d, data_waiting %d, ref_cnt %d, sn %u, "
383                                         "parent_req %p, pending %d", cmnd, cmnd->scst_state,
384                                         (cmnd->scst_cmd != NULL) ? cmnd->scst_cmd->state : -1,
385                                         cmnd->data_waiting, atomic_read(&cmnd->ref_cnt),
386                                         cmnd->pdu.bhs.sn, cmnd->parent_req, cmnd->pending);
387 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
388                                 TRACE_CONN_CLOSE_DBG("net_ref_cnt %d, sg %p",
389                                         atomic_read(&cmnd->net_ref_cnt), cmnd->sg);
390                                 if (cmnd->sg != NULL) {
391                                         int i;
392                                         for (i = 0; i < cmnd->sg_cnt; i++) {
393                                                 struct page *page = sg_page(&cmnd->sg[i]);
394                                                 TRACE_CONN_CLOSE_DBG("page %p, net_priv %p, _count %d",
395                                                         page, page->net_priv,
396                                                         atomic_read(&page->_count));
397                                         }
398                                 }
399
400                                 sBUG_ON(cmnd->parent_req != NULL);
401
402                                 spin_lock_bh(&cmnd->rsp_cmd_lock);
403                                 list_for_each_entry(rsp, &cmnd->rsp_cmd_list, rsp_cmd_list_entry) {
404                                         TRACE_CONN_CLOSE_DBG("  rsp %p, ref_cnt %d, net_ref_cnt %d, "
405                                                 "sg %p", rsp, atomic_read(&rsp->ref_cnt),
406                                                 atomic_read(&rsp->net_ref_cnt), rsp->sg);
407                                         if ((rsp->sg != cmnd->sg) && (rsp->sg != NULL)) {
408                                                 int i;
409                                                 for (i = 0; i < rsp->sg_cnt; i++) {
410                                                         TRACE_CONN_CLOSE_DBG("    page %p, net_priv %p, "
411                                                                 "_count %d", sg_page(&rsp->sg[i]),
412                                                                 sg_page(&rsp->sg[i])->net_priv,
413                                                                 atomic_read(&sg_page(&rsp->sg[i])->_count));
414                                                 }
415                                         }
416                                 }
417                                 spin_unlock_bh(&cmnd->rsp_cmd_lock);
418 #endif
419                         }
420                         spin_unlock_bh(&conn->cmd_list_lock);
421                 }
422 #endif
423                 iscsi_check_closewait(conn);
424         }
425
426         write_lock_bh(&conn->sock->sk->sk_callback_lock);
427         conn->sock->sk->sk_state_change = conn->old_state_change;
428         conn->sock->sk->sk_data_ready = conn->old_data_ready;
429         conn->sock->sk->sk_write_space = conn->old_write_space;
430         write_unlock_bh(&conn->sock->sk->sk_callback_lock);
431
432         while (1) {
433                 bool t;
434
435                 spin_lock_bh(&iscsi_wr_lock);
436                 t = (conn->wr_state == ISCSI_CONN_WR_STATE_IDLE);
437                 spin_unlock_bh(&iscsi_wr_lock);
438
439                 if (t && (atomic_read(&conn->conn_ref_cnt) == 0))
440                         break;
441
442                 TRACE_CONN_CLOSE_DBG("Waiting for wr thread (conn %p), "
443                         "wr_state %x", conn, conn->wr_state);
444                 msleep(50);
445         }
446
447         TRACE_CONN_CLOSE("Notifying user space about closing connection %p", conn);
448         event_send(target->tid, session->sid, conn->cid, E_CONN_CLOSE, 0);
449
450         wait_for_completion(&session->unreg_compl);
451
452         sBUG_ON(!session->shutting_down);
453
454         mutex_lock(&target->target_mutex);
455         conn_free(conn);
456         /* ToDo: this is incompatible with MC/S */
457         session_free(session);
458         mutex_unlock(&target->target_mutex);
459
460         TRACE_EXIT();
461         return;
462 }
463
464 static int close_conn_thr(void *arg)
465 {
466         struct iscsi_conn *conn = (struct iscsi_conn *)arg;
467
468         TRACE_ENTRY();
469
470 #ifdef CONFIG_SCST_EXTRACHECKS
471         conn->rd_task = current;
472 #endif
473         close_conn(conn);
474
475         TRACE_EXIT();
476         return 0;
477 }
478
479 /* No locks */
480 static void start_close_conn(struct iscsi_conn *conn)
481 {
482         struct task_struct *t;
483
484         TRACE_ENTRY();
485
486         t = kthread_run(close_conn_thr, conn, "iscsi_conn_cleanup");
487         if (IS_ERR(t)) {
488                 PRINT_ERROR("kthread_run() failed (%ld), closing conn %p "
489                         "directly", PTR_ERR(t), conn);
490                 close_conn(conn);
491         }
492
493         TRACE_EXIT();
494         return;
495 }
496
497 static inline void iscsi_conn_init_read(struct iscsi_conn *conn, void *data, size_t len)
498 {
499         len = (len + 3) & -4; /* XXX ??? */
500         conn->read_iov[0].iov_base = data;
501         conn->read_iov[0].iov_len = len;
502         conn->read_msg.msg_iov = conn->read_iov;
503         conn->read_msg.msg_iovlen = 1;
504         conn->read_size = (len + 3) & -4;
505 }
506
507 static void iscsi_conn_read_ahs(struct iscsi_conn *conn, struct iscsi_cmnd *cmnd)
508 {
509         /* ToDo: __GFP_NOFAIL ?? */
510         cmnd->pdu.ahs = kmalloc(cmnd->pdu.ahssize, __GFP_NOFAIL|GFP_KERNEL);
511         sBUG_ON(cmnd->pdu.ahs == NULL);
512         iscsi_conn_init_read(conn, cmnd->pdu.ahs, cmnd->pdu.ahssize);
513 }
514
515 static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn)
516 {
517         struct iscsi_cmnd *cmnd = NULL;
518
519         spin_lock_bh(&conn->write_list_lock);
520         if (!list_empty(&conn->write_list)) {
521                 cmnd = list_entry(conn->write_list.next, struct iscsi_cmnd,
522                                 write_list_entry);
523                 cmd_del_from_write_list(cmnd);
524                 cmnd->write_processing_started = 1;
525         }
526         spin_unlock_bh(&conn->write_list_lock);
527
528         return cmnd;
529 }
530
531 static int do_recv(struct iscsi_conn *conn, int state)
532 {
533         mm_segment_t oldfs;
534         struct msghdr msg;
535         int res, first_len;
536
537         sBUG_ON(conn->read_cmnd == NULL);
538
539         if (unlikely(conn->closing)) {
540                 res = -EIO;
541                 goto out;
542         }
543
544         memset(&msg, 0, sizeof(msg));
545         msg.msg_iov = conn->read_msg.msg_iov;
546         msg.msg_iovlen = conn->read_msg.msg_iovlen;
547         first_len = msg.msg_iov->iov_len;
548
549         oldfs = get_fs();
550         set_fs(get_ds());
551         res = sock_recvmsg(conn->sock, &msg, conn->read_size, MSG_DONTWAIT | MSG_NOSIGNAL);
552         set_fs(oldfs);
553
554         if (res <= 0) {
555                 switch (res) {
556                 case -EAGAIN:
557                 case -ERESTARTSYS:
558                         TRACE_DBG("EAGAIN or ERESTARTSYS (%d) received for "
559                                 "conn %p", res, conn);
560                         break;
561                 default:
562                         PRINT_ERROR("sock_recvmsg() failed: %d", res);
563                         mark_conn_closed(conn);
564                         break;
565                 }
566         } else {
567                 /*
568                  * To save some considerable effort and CPU power we suppose
569                  * that TCP functions adjust conn->read_msg.msg_iov and
570                  * conn->read_msg.msg_iovlen on amount of copied data. This
571                  * BUG_ON is intended to catch if it is changed in the future.
572                  */
573                 sBUG_ON((res >= first_len) &&
574                         (conn->read_msg.msg_iov->iov_len != 0));
575                 conn->read_size -= res;
576                 if (conn->read_size) {
577                         if (res >= first_len) {
578                                 int done = 1 + ((res - first_len) >> PAGE_SHIFT);
579                                 conn->read_msg.msg_iov += done;
580                                 conn->read_msg.msg_iovlen -= done;
581                         }
582                 } else
583                         conn->read_state = state;
584         }
585
586 out:
587         TRACE_EXIT_RES(res);
588         return res;
589 }
590
591 static int rx_hdigest(struct iscsi_conn *conn)
592 {
593         struct iscsi_cmnd *cmnd = conn->read_cmnd;
594         int res = digest_rx_header(cmnd);
595
596         if (unlikely(res != 0)) {
597                 PRINT_ERROR("rx header digest for initiator %s failed "
598                         "(%d)", conn->session->initiator_name, res);
599                 mark_conn_closed(conn);
600         }
601         return res;
602 }
603
604 static struct iscsi_cmnd *create_cmnd(struct iscsi_conn *conn)
605 {
606         struct iscsi_cmnd *cmnd;
607
608         cmnd = cmnd_alloc(conn, NULL);
609         iscsi_conn_init_read(cmnd->conn, &cmnd->pdu.bhs, sizeof(cmnd->pdu.bhs));
610         conn->read_state = RX_BHS;
611
612         return cmnd;
613 }
614
615 /* Returns >0 for success, <=0 for error or successful finish */
616 static int recv(struct iscsi_conn *conn)
617 {
618         struct iscsi_cmnd *cmnd = conn->read_cmnd;
619         int hdigest, ddigest, res = 1, rc;
620
621         TRACE_ENTRY();
622
623         hdigest = conn->hdigest_type & DIGEST_NONE ? 0 : 1;
624         ddigest = conn->ddigest_type & DIGEST_NONE ? 0 : 1;
625
626         switch (conn->read_state) {
627         case RX_INIT_BHS:
628                 sBUG_ON(cmnd != NULL);
629                 cmnd = conn->read_cmnd = create_cmnd(conn);
630         case RX_BHS:
631                 res = do_recv(conn, RX_INIT_AHS);
632                 if (res <= 0 || conn->read_state != RX_INIT_AHS)
633                         break;
634         case RX_INIT_AHS:
635                 iscsi_cmnd_get_length(&cmnd->pdu);
636                 if (cmnd->pdu.ahssize) {
637                         iscsi_conn_read_ahs(conn, cmnd);
638                         conn->read_state = RX_AHS;
639                 } else
640                         conn->read_state = hdigest ? RX_INIT_HDIGEST : RX_INIT_DATA;
641
642                 if (conn->read_state != RX_AHS)
643                         break;
644         case RX_AHS:
645                 res = do_recv(conn, hdigest ? RX_INIT_HDIGEST : RX_INIT_DATA);
646                 if (res <= 0 || conn->read_state != RX_INIT_HDIGEST)
647                         break;
648         case RX_INIT_HDIGEST:
649                 iscsi_conn_init_read(conn, &cmnd->hdigest, sizeof(u32));
650                 conn->read_state = RX_HDIGEST;
651         case RX_HDIGEST:
652                 res = do_recv(conn, RX_CHECK_HDIGEST);
653                 if (res <= 0 || conn->read_state != RX_CHECK_HDIGEST)
654                         break;
655         case RX_CHECK_HDIGEST:
656                 rc = rx_hdigest(conn);
657                 if (likely(rc == 0))
658                         conn->read_state = RX_INIT_DATA;
659                 else {
660                         res = rc;
661                         break;
662                 }
663         case RX_INIT_DATA:
664                 rc = cmnd_rx_start(cmnd);
665                 if (unlikely(rc != 0)) {
666                         sBUG_ON(!conn->closing);
667                         conn->read_state = RX_END;
668                         res = rc;
669                         /* cmnd will be freed in close_conn() */
670                         goto out;
671                 }
672                 conn->read_state = cmnd->pdu.datasize ? RX_DATA : RX_END;
673                 if (conn->read_state != RX_DATA)
674                         break;
675         case RX_DATA:
676                 res = do_recv(conn, ddigest ? RX_INIT_DDIGEST : RX_END);
677                 if (res <= 0 || conn->read_state != RX_INIT_DDIGEST)
678                         break;
679         case RX_INIT_DDIGEST:
680                 iscsi_conn_init_read(conn, &cmnd->ddigest, sizeof(u32));
681                 conn->read_state = RX_DDIGEST;
682         case RX_DDIGEST:
683                 res = do_recv(conn, RX_CHECK_DDIGEST);
684                 if (res <= 0 || conn->read_state != RX_CHECK_DDIGEST)
685                         break;
686         case RX_CHECK_DDIGEST:
687                 conn->read_state = RX_END;
688                 if (cmnd->pdu.datasize <= 256*1024) {
689                         /* It's cache hot, so let's compute it inline */
690                         TRACE_DBG("cmnd %p, opcode %x: checking RX "
691                                 "ddigest inline", cmnd, cmnd_opcode(cmnd));
692                         cmnd->ddigest_checked = 1;
693                         rc = digest_rx_data(cmnd);
694                         if (unlikely(rc != 0)) {
695                                 mark_conn_closed(conn);
696                                 goto out;
697                         }
698                 } else if (cmnd_opcode(cmnd) == ISCSI_OP_SCSI_CMD) {
699                         cmd_add_on_rx_ddigest_list(cmnd, cmnd);
700                         cmnd_get(cmnd);
701                 } else if (cmnd_opcode(cmnd) != ISCSI_OP_SCSI_DATA_OUT) {
702                         /*
703                          * We could get here only for NOP-Out. ISCSI RFC doesn't
704                          * specify how to deal with digest errors in this case.
705                          * Is closing connection correct?
706                          */
707                         TRACE_DBG("cmnd %p, opcode %x: checking NOP RX "
708                                 "ddigest", cmnd, cmnd_opcode(cmnd));
709                         rc = digest_rx_data(cmnd);
710                         if (unlikely(rc != 0)) {
711                                 mark_conn_closed(conn);
712                                 goto out;
713                         }
714                 }
715                 break;
716         default:
717                 PRINT_CRIT_ERROR("%d %x", conn->read_state, cmnd_opcode(cmnd));
718                 sBUG();
719         }
720
721         if (res <= 0)
722                 goto out;
723
724         if (conn->read_state != RX_END)
725                 goto out;
726
727         if (unlikely(conn->read_size)) {
728                 PRINT_CRIT_ERROR("%d %x %d", res, cmnd_opcode(cmnd),
729                         conn->read_size);
730                 sBUG();
731         }
732
733         conn->read_cmnd = NULL;
734         conn->read_state = RX_INIT_BHS;
735
736         cmnd_rx_end(cmnd);
737
738         sBUG_ON(conn->read_size != 0);
739
740         res = 0;
741
742 out:
743         TRACE_EXIT_RES(res);
744         return res;
745 }
746
747 /* No locks, conn is rd processing */
748 static int process_read_io(struct iscsi_conn *conn, int *closed)
749 {
750         int res;
751
752         do {
753                 res = recv(conn);
754                 if (unlikely(conn->closing)) {
755                         start_close_conn(conn);
756                         *closed = 1;
757                         break;
758                 }
759         } while (res > 0);
760
761         TRACE_EXIT_RES(res);
762         return res;
763 }
764
765 /*
766  * Called under iscsi_rd_lock and BHs disabled, but will drop it inside,
767  * then reaquire.
768  */
769 static void scst_do_job_rd(void)
770 {
771         TRACE_ENTRY();
772
773         /* We delete/add to tail connections to maintain fairness between them */
774
775         while (!list_empty(&iscsi_rd_list)) {
776                 int rc, closed = 0;
777                 struct iscsi_conn *conn = list_entry(iscsi_rd_list.next,
778                         typeof(*conn), rd_list_entry);
779
780                 list_del(&conn->rd_list_entry);
781
782                 sBUG_ON(conn->rd_state == ISCSI_CONN_RD_STATE_PROCESSING);
783                 conn->rd_data_ready = 0;
784                 conn->rd_state = ISCSI_CONN_RD_STATE_PROCESSING;
785 #ifdef CONFIG_SCST_EXTRACHECKS
786                 conn->rd_task = current;
787 #endif
788                 spin_unlock_bh(&iscsi_rd_lock);
789
790                 rc = process_read_io(conn, &closed);
791
792                 spin_lock_bh(&iscsi_rd_lock);
793
794                 if (closed)
795                         continue;
796
797 #ifdef CONFIG_SCST_EXTRACHECKS
798                 conn->rd_task = NULL;
799 #endif
800                 if ((rc == 0) || conn->rd_data_ready) {
801                         list_add_tail(&conn->rd_list_entry, &iscsi_rd_list);
802                         conn->rd_state = ISCSI_CONN_RD_STATE_IN_LIST;
803                 } else
804                         conn->rd_state = ISCSI_CONN_RD_STATE_IDLE;
805         }
806
807         TRACE_EXIT();
808         return;
809 }
810
811 static inline int test_rd_list(void)
812 {
813         int res = !list_empty(&iscsi_rd_list) ||
814                   unlikely(kthread_should_stop());
815         return res;
816 }
817
818 int istrd(void *arg)
819 {
820         TRACE_ENTRY();
821
822         PRINT_INFO("Read thread started, PID %d", current->pid);
823
824         current->flags |= PF_NOFREEZE;
825
826         spin_lock_bh(&iscsi_rd_lock);
827         while (!kthread_should_stop()) {
828                 wait_queue_t wait;
829                 init_waitqueue_entry(&wait, current);
830
831                 if (!test_rd_list()) {
832                         add_wait_queue_exclusive(&iscsi_rd_waitQ, &wait);
833                         for (;;) {
834                                 set_current_state(TASK_INTERRUPTIBLE);
835                                 if (test_rd_list())
836                                         break;
837                                 spin_unlock_bh(&iscsi_rd_lock);
838                                 schedule();
839                                 spin_lock_bh(&iscsi_rd_lock);
840                         }
841                         set_current_state(TASK_RUNNING);
842                         remove_wait_queue(&iscsi_rd_waitQ, &wait);
843                 }
844                 scst_do_job_rd();
845         }
846         spin_unlock_bh(&iscsi_rd_lock);
847
848         /*
849          * If kthread_should_stop() is true, we are guaranteed to be
850          * on the module unload, so iscsi_rd_list must be empty.
851          */
852         sBUG_ON(!list_empty(&iscsi_rd_list));
853
854         PRINT_INFO("Read thread PID %d finished", current->pid);
855
856         TRACE_EXIT();
857         return 0;
858 }
859
860 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
861 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd)
862 {
863         int v;
864
865         TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d",
866                 cmd, atomic_read(&cmd->net_ref_cnt)+1);
867
868         v = atomic_inc_return(&cmd->net_ref_cnt);
869         if (v == 1) {
870                 TRACE_NET_PAGE("getting cmd %p", cmd);
871                 cmnd_get(cmd);
872         }
873 }
874
875 void iscsi_get_page_callback(struct page *page)
876 {
877         struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
878
879         TRACE_NET_PAGE("page %p, _count %d", page,
880                 atomic_read(&page->_count));
881
882         __iscsi_get_page_callback(cmd);
883 }
884
885 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd)
886 {
887         TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d", cmd,
888                 atomic_read(&cmd->net_ref_cnt)-1);
889
890         if (atomic_dec_and_test(&cmd->net_ref_cnt)) {
891                 int i, sg_cnt = cmd->sg_cnt;
892                 for (i = 0; i < sg_cnt; i++) {
893                         struct page *page = sg_page(&cmd->sg[i]);
894                         TRACE_NET_PAGE("Clearing page %p", page);
895                         if (page->net_priv == cmd)
896                                 page->net_priv = NULL;
897                 }
898                 cmnd_put(cmd);
899         }
900 }
901
902 void iscsi_put_page_callback(struct page *page)
903 {
904         struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
905
906         TRACE_NET_PAGE("page %p, _count %d", page,
907                 atomic_read(&page->_count));
908
909         __iscsi_put_page_callback(cmd);
910 }
911
912 static void check_net_priv(struct iscsi_cmnd *cmd, struct page *page)
913 {
914         if ((atomic_read(&cmd->net_ref_cnt) == 1) && (page->net_priv == cmd)) {
915                 TRACE_DBG("sendpage() not called get_page(), zeroing net_priv "
916                         "%p (page %p)", page->net_priv, page);
917                 page->net_priv = NULL;
918         }
919 }
920 #else
921 static inline void check_net_priv(struct iscsi_cmnd *cmd, struct page *page) {}
922 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd) {}
923 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd) {}
924 #endif
925
926 /* This is partially taken from the Ardis code. */
927 static int write_data(struct iscsi_conn *conn)
928 {
929         mm_segment_t oldfs;
930         struct file *file;
931         struct socket *sock;
932         ssize_t (*sock_sendpage)(struct socket *, struct page *, int, size_t, int);
933         ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
934         struct iscsi_cmnd *write_cmnd = conn->write_cmnd;
935         struct iscsi_cmnd *ref_cmd;
936         struct scatterlist *sg;
937         struct iovec *iop;
938         int saved_size, size, sendsize;
939         int offset, idx, sg_offset;
940         int flags, res, count;
941         bool do_put = false;
942
943         TRACE_ENTRY();
944
945         iscsi_extracheck_is_wr_thread(conn);
946
947         if (write_cmnd->own_sg == 0)
948                 ref_cmd = write_cmnd->parent_req;
949         else
950                 ref_cmd = write_cmnd;
951
952         if (!ref_cmd->on_written_list) {
953                 TRACE_DBG("Adding cmd %p to conn %p written_list", ref_cmd,
954                         conn);
955                 spin_lock_bh(&conn->write_list_lock);
956                 ref_cmd->on_written_list = 1;
957                 ref_cmd->write_timeout = jiffies + ISCSI_RSP_TIMEOUT;
958                 list_add_tail(&ref_cmd->write_list_entry, &conn->written_list);
959                 spin_unlock_bh(&conn->write_list_lock);
960         }
961
962         if (!timer_pending(&conn->rsp_timer)) {
963                 sBUG_ON(!ref_cmd->on_written_list);
964                 spin_lock_bh(&conn->write_list_lock);
965                 if (likely(!timer_pending(&conn->rsp_timer))) {
966                         TRACE_DBG("Starting timer on %ld (conn %p)",
967                                 ref_cmd->write_timeout, conn);
968                         conn->rsp_timer.expires = ref_cmd->write_timeout;
969                         add_timer(&conn->rsp_timer);
970                 }
971                 spin_unlock_bh(&conn->write_list_lock);
972         }
973
974         file = conn->file;
975         saved_size = size = conn->write_size;
976         iop = conn->write_iop;
977         count = conn->write_iop_used;
978
979         if (iop) {
980                 while (1) {
981                         loff_t off = 0;
982                         int rest;
983
984                         sBUG_ON(count > sizeof(conn->write_iov)
985                                         / sizeof(conn->write_iov[0]));
986  retry:
987                         oldfs = get_fs();
988                         set_fs(KERNEL_DS);
989                         res = vfs_writev(file, (struct iovec __user *)iop,
990                                          count, &off);
991                         set_fs(oldfs);
992                         TRACE_WRITE("%#Lx:%u: %d(%ld)",
993                                     (long long unsigned int)conn->session->sid,
994                                     conn->cid,
995                                     res, (long)iop->iov_len);
996                         if (unlikely(res <= 0)) {
997                                 if (res == -EAGAIN) {
998                                         conn->write_iop = iop;
999                                         conn->write_iop_used = count;
1000                                         goto out_iov;
1001                                 } else if (res == -EINTR)
1002                                         goto retry;
1003                                 goto out_err;
1004                         }
1005
1006                         rest = res;
1007                         size -= res;
1008                         while (iop->iov_len <= rest && rest) {
1009                                 rest -= iop->iov_len;
1010                                 iop++;
1011                                 count--;
1012                         }
1013                         if (count == 0) {
1014                                 conn->write_iop = NULL;
1015                                 conn->write_iop_used = 0;
1016                                 if (size)
1017                                         break;
1018                                 goto out_iov;
1019                         }
1020                         sBUG_ON(iop > conn->write_iov + sizeof(conn->write_iov)
1021                                                   /sizeof(conn->write_iov[0]));
1022                         iop->iov_base += rest;
1023                         iop->iov_len -= rest;
1024                 }
1025         }
1026
1027         sg = write_cmnd->sg;
1028         if (unlikely(sg == NULL)) {
1029                 PRINT_INFO("WARNING: Data missed (cmd %p)!", write_cmnd);
1030                 res = 0;
1031                 goto out;
1032         }
1033
1034         /* To protect from too early transfer completion race */
1035         __iscsi_get_page_callback(ref_cmd);
1036         do_put = true;
1037
1038         sg_offset = sg[0].offset;
1039         offset = conn->write_offset + sg_offset;
1040         idx = offset >> PAGE_SHIFT;
1041         offset &= ~PAGE_MASK;
1042
1043         sock = conn->sock;
1044
1045 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1046         sock_sendpage = sock->ops->sendpage;
1047 #else
1048         if ((write_cmnd->parent_req->scst_cmd != NULL) &&
1049             scst_cmd_get_dh_data_buff_alloced(write_cmnd->parent_req->scst_cmd))
1050                 sock_sendpage = sock_no_sendpage;
1051         else
1052                 sock_sendpage = sock->ops->sendpage;
1053 #endif
1054
1055         flags = MSG_DONTWAIT;
1056
1057         while (1) {
1058                 sendpage = sock_sendpage;
1059
1060 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1061                 {
1062                         static DEFINE_SPINLOCK(net_priv_lock);
1063                         spin_lock(&net_priv_lock);
1064                         if (sg_page(&sg[idx])->net_priv != NULL) {
1065                                 if (sg_page(&sg[idx])->net_priv != ref_cmd) {
1066                                         /*
1067                                          * This might happen if user space supplies
1068                                          * to scst_user the same pages in different
1069                                          * commands or in case of zero-copy FILEIO,
1070                                          * when several initiators request the same
1071                                          * data simultaneously.
1072                                          */
1073                                         TRACE_DBG("net_priv isn't NULL and != "
1074                                                 "ref_cmd (write_cmnd %p, ref_cmd %p, "
1075                                                 "sg %p, idx %d, page %p, net_priv %p)",
1076                                                 write_cmnd, ref_cmd, sg, idx,
1077                                                 sg_page(&sg[idx]),
1078                                                 sg_page(&sg[idx])->net_priv);
1079                                         sendpage = sock_no_sendpage;
1080                                 }
1081                         } else
1082                                 sg_page(&sg[idx])->net_priv = ref_cmd;
1083                         spin_unlock(&net_priv_lock);
1084                 }
1085 #endif
1086                 sendsize = PAGE_SIZE - offset;
1087                 if (size <= sendsize) {
1088 retry2:
1089                         res = sendpage(sock, sg_page(&sg[idx]), offset, size, flags);
1090                         TRACE_WRITE("Final %s %#Lx:%u: %d(%lu,%u,%u, cmd %p, page %p)",
1091                                 (sendpage != sock_no_sendpage) ? "sendpage" :
1092                                                                  "sock_no_sendpage",
1093                                 (long long unsigned int)conn->session->sid,
1094                                 conn->cid,
1095                                 res, sg_page(&sg[idx])->index, offset, size,
1096                                 write_cmnd, sg_page(&sg[idx]));
1097                         if (unlikely(res <= 0)) {
1098                                 if (res == -EINTR)
1099                                         goto retry2;
1100                                 else
1101                                         goto out_res;
1102                         }
1103
1104                         check_net_priv(ref_cmd, sg_page(&sg[idx]));
1105                         if (res == size) {
1106                                 conn->write_size = 0;
1107                                 res = saved_size;
1108                                 goto out_put;
1109                         }
1110
1111                         offset += res;
1112                         size -= res;
1113                         continue;
1114                 }
1115
1116 retry1:
1117                 res = sendpage(sock, sg_page(&sg[idx]), offset, sendsize,
1118                         flags | MSG_MORE);
1119                 TRACE_WRITE("%s %#Lx:%u: %d(%lu,%u,%u, cmd %p, page %p)",
1120                         (sendpage != sock_no_sendpage) ? "sendpage" :
1121                                                          "sock_no_sendpage",
1122                         (unsigned long long)conn->session->sid, conn->cid,
1123                         res, sg_page(&sg[idx])->index, offset, sendsize,
1124                         write_cmnd, sg_page(&sg[idx]));
1125                 if (unlikely(res <= 0)) {
1126                         if (res == -EINTR)
1127                                 goto retry1;
1128                         else
1129                                 goto out_res;
1130                 }
1131
1132                 check_net_priv(ref_cmd, sg_page(&sg[idx]));
1133                 if (res == sendsize) {
1134                         idx++;
1135                         offset = 0;
1136                         EXTRACHECKS_BUG_ON(idx >= ref_cmd->sg_cnt);
1137                 } else
1138                         offset += res;
1139
1140                 size -= res;
1141         }
1142
1143 out_off:
1144         conn->write_offset = (idx << PAGE_SHIFT) + offset - sg_offset;
1145
1146 out_iov:
1147         conn->write_size = size;
1148         if ((saved_size == size) && res == -EAGAIN)
1149                 goto out_put;
1150
1151         res = saved_size - size;
1152
1153 out_put:
1154         if (do_put)
1155                 __iscsi_put_page_callback(ref_cmd);
1156
1157 out:
1158         TRACE_EXIT_RES(res);
1159         return res;
1160
1161 out_res:
1162         check_net_priv(ref_cmd, sg_page(&sg[idx]));
1163         if (res == -EAGAIN)
1164                 goto out_off;
1165         /* else go through */
1166
1167 out_err:
1168 #ifndef CONFIG_SCST_DEBUG
1169         if (!conn->closing)
1170 #endif
1171         {
1172                 PRINT_ERROR("error %d at sid:cid %#Lx:%u, cmnd %p", res,
1173                             (long long unsigned int)conn->session->sid,
1174                             conn->cid, conn->write_cmnd);
1175         }
1176         if (ref_cmd->scst_cmd != NULL)
1177                 scst_set_delivery_status(ref_cmd->scst_cmd,
1178                         SCST_CMD_DELIVERY_FAILED);
1179         goto out_put;
1180 }
1181
1182 static int exit_tx(struct iscsi_conn *conn, int res)
1183 {
1184         iscsi_extracheck_is_wr_thread(conn);
1185
1186         switch (res) {
1187         case -EAGAIN:
1188         case -ERESTARTSYS:
1189                 res = 0;
1190                 break;
1191         default:
1192 #ifndef CONFIG_SCST_DEBUG
1193                 if (!conn->closing)
1194 #endif
1195                 {
1196                         PRINT_ERROR("Sending data failed: initiator %s, "
1197                                 "write_size %d, write_state %d, res %d",
1198                                 conn->session->initiator_name, conn->write_size,
1199                                 conn->write_state, res);
1200                 }
1201                 conn->write_state = TX_END;
1202                 conn->write_size = 0;
1203                 mark_conn_closed(conn);
1204                 break;
1205         }
1206         return res;
1207 }
1208
1209 static int tx_ddigest(struct iscsi_cmnd *cmnd, int state)
1210 {
1211         int res, rest = cmnd->conn->write_size;
1212         struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
1213         struct kvec iov;
1214
1215         iscsi_extracheck_is_wr_thread(cmnd->conn);
1216
1217         TRACE_DBG("Sending data digest %x (cmd %p)", cmnd->ddigest, cmnd);
1218
1219         iov.iov_base = (char *) (&cmnd->ddigest) + (sizeof(u32) - rest);
1220         iov.iov_len = rest;
1221
1222         res = kernel_sendmsg(cmnd->conn->sock, &msg, &iov, 1, rest);
1223         if (res > 0) {
1224                 cmnd->conn->write_size -= res;
1225                 if (!cmnd->conn->write_size)
1226                         cmnd->conn->write_state = state;
1227         } else
1228                 res = exit_tx(cmnd->conn, res);
1229
1230         return res;
1231 }
1232
1233 static void init_tx_hdigest(struct iscsi_cmnd *cmnd)
1234 {
1235         struct iscsi_conn *conn = cmnd->conn;
1236         struct iovec *iop;
1237
1238         iscsi_extracheck_is_wr_thread(conn);
1239
1240         digest_tx_header(cmnd);
1241
1242         sBUG_ON(conn->write_iop_used >= sizeof(conn->write_iov)/sizeof(conn->write_iov[0]));
1243         iop = &conn->write_iop[conn->write_iop_used];
1244         conn->write_iop_used++;
1245         iop->iov_base = &(cmnd->hdigest);
1246         iop->iov_len = sizeof(u32);
1247         conn->write_size += sizeof(u32);
1248
1249         return;
1250 }
1251
1252 static int iscsi_do_send(struct iscsi_conn *conn, int state)
1253 {
1254         int res;
1255
1256         iscsi_extracheck_is_wr_thread(conn);
1257
1258         res = write_data(conn);
1259         if (res > 0) {
1260                 if (!conn->write_size)
1261                         conn->write_state = state;
1262         } else
1263                 res = exit_tx(conn, res);
1264
1265         return res;
1266 }
1267
1268 /*
1269  * No locks, conn is wr processing.
1270  *
1271  * IMPORTANT! Connection conn must be protected by additional conn_get()
1272  * upon entrance in this function, because otherwise it could be destroyed
1273  * inside as a result of cmnd release.
1274  */
1275 int iscsi_send(struct iscsi_conn *conn)
1276 {
1277         struct iscsi_cmnd *cmnd = conn->write_cmnd;
1278         int ddigest, res = 0;
1279
1280         TRACE_ENTRY();
1281
1282         TRACE_DBG("conn %p, write_cmnd %p", conn, cmnd);
1283
1284         iscsi_extracheck_is_wr_thread(conn);
1285
1286         ddigest = conn->ddigest_type != DIGEST_NONE ? 1 : 0;
1287
1288         switch (conn->write_state) {
1289         case TX_INIT:
1290                 sBUG_ON(cmnd != NULL);
1291                 cmnd = conn->write_cmnd = iscsi_get_send_cmnd(conn);
1292                 if (!cmnd)
1293                         goto out;
1294                 cmnd_tx_start(cmnd);
1295                 if (!(conn->hdigest_type & DIGEST_NONE))
1296                         init_tx_hdigest(cmnd);
1297                 conn->write_state = TX_BHS_DATA;
1298         case TX_BHS_DATA:
1299                 res = iscsi_do_send(conn, ddigest && cmnd->pdu.datasize ?
1300                                         TX_INIT_DDIGEST : TX_END);
1301                 if (res <= 0 || conn->write_state != TX_INIT_DDIGEST)
1302                         break;
1303         case TX_INIT_DDIGEST:
1304                 cmnd->conn->write_size = sizeof(u32);
1305                 conn->write_state = TX_DDIGEST;
1306         case TX_DDIGEST:
1307                 res = tx_ddigest(cmnd, TX_END);
1308                 break;
1309         default:
1310                 PRINT_CRIT_ERROR("%d %d %x", res, conn->write_state,
1311                         cmnd_opcode(cmnd));
1312                 sBUG();
1313         }
1314
1315         if (res == 0)
1316                 goto out;
1317
1318         if (conn->write_state != TX_END)
1319                 goto out;
1320
1321         if (unlikely(conn->write_size)) {
1322                 PRINT_CRIT_ERROR("%d %x %u", res, cmnd_opcode(cmnd),
1323                         conn->write_size);
1324                 sBUG();
1325         }
1326         cmnd_tx_end(cmnd);
1327
1328         rsp_cmnd_release(cmnd);
1329
1330         conn->write_cmnd = NULL;
1331         conn->write_state = TX_INIT;
1332
1333 out:
1334         TRACE_EXIT_RES(res);
1335         return res;
1336 }
1337
1338 /* No locks, conn is wr processing.
1339  *
1340  * IMPORTANT! Connection conn must be protected by additional conn_get()
1341  * upon entrance in this function, because otherwise it could be destroyed
1342  * inside as a result of iscsi_send(), which releases sent commands.
1343  */
1344 static int process_write_queue(struct iscsi_conn *conn)
1345 {
1346         int res = 0;
1347
1348         TRACE_ENTRY();
1349
1350         if (likely(test_write_ready(conn)))
1351                 res = iscsi_send(conn);
1352
1353         TRACE_EXIT_RES(res);
1354         return res;
1355 }
1356
1357 /*
1358  * Called under iscsi_wr_lock and BHs disabled, but will drop it inside,
1359  * then reaquire.
1360  */
1361 static void scst_do_job_wr(void)
1362 {
1363         TRACE_ENTRY();
1364
1365         /* We delete/add to tail connections to maintain fairness between them */
1366
1367         while (!list_empty(&iscsi_wr_list)) {
1368                 int rc;
1369                 struct iscsi_conn *conn = list_entry(iscsi_wr_list.next,
1370                         typeof(*conn), wr_list_entry);
1371
1372                 TRACE_DBG("conn %p, wr_state %x, wr_space_ready %d, "
1373                         "write ready %d", conn, conn->wr_state,
1374                         conn->wr_space_ready, test_write_ready(conn));
1375
1376                 list_del(&conn->wr_list_entry);
1377
1378                 sBUG_ON(conn->wr_state == ISCSI_CONN_WR_STATE_PROCESSING);
1379
1380                 conn->wr_state = ISCSI_CONN_WR_STATE_PROCESSING;
1381                 conn->wr_space_ready = 0;
1382 #ifdef CONFIG_SCST_EXTRACHECKS
1383                 conn->wr_task = current;
1384 #endif
1385                 spin_unlock_bh(&iscsi_wr_lock);
1386
1387                 conn_get(conn);
1388
1389                 rc = process_write_queue(conn);
1390
1391                 spin_lock_bh(&iscsi_wr_lock);
1392 #ifdef CONFIG_SCST_EXTRACHECKS
1393                 conn->wr_task = NULL;
1394 #endif
1395                 if ((rc == -EAGAIN) && !conn->wr_space_ready) {
1396                         conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT;
1397                         goto cont;
1398                 }
1399
1400                 if (test_write_ready(conn)) {
1401                         list_add_tail(&conn->wr_list_entry, &iscsi_wr_list);
1402                         conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST;
1403                 } else
1404                         conn->wr_state = ISCSI_CONN_WR_STATE_IDLE;
1405
1406 cont:
1407                 conn_put(conn);
1408         }
1409
1410         TRACE_EXIT();
1411         return;
1412 }
1413
1414 static inline int test_wr_list(void)
1415 {
1416         int res = !list_empty(&iscsi_wr_list) ||
1417                   unlikely(kthread_should_stop());
1418         return res;
1419 }
1420
1421 int istwr(void *arg)
1422 {
1423         TRACE_ENTRY();
1424
1425         PRINT_INFO("Write thread started, PID %d", current->pid);
1426
1427         current->flags |= PF_NOFREEZE;
1428
1429         spin_lock_bh(&iscsi_wr_lock);
1430         while (!kthread_should_stop()) {
1431                 wait_queue_t wait;
1432                 init_waitqueue_entry(&wait, current);
1433
1434                 if (!test_wr_list()) {
1435                         add_wait_queue_exclusive(&iscsi_wr_waitQ, &wait);
1436                         for (;;) {
1437                                 set_current_state(TASK_INTERRUPTIBLE);
1438                                 if (test_wr_list())
1439                                         break;
1440                                 spin_unlock_bh(&iscsi_wr_lock);
1441                                 schedule();
1442                                 spin_lock_bh(&iscsi_wr_lock);
1443                         }
1444                         set_current_state(TASK_RUNNING);
1445                         remove_wait_queue(&iscsi_wr_waitQ, &wait);
1446                 }
1447                 scst_do_job_wr();
1448         }
1449         spin_unlock_bh(&iscsi_wr_lock);
1450
1451         /*
1452          * If kthread_should_stop() is true, we are guaranteed to be
1453          * on the module unload, so iscsi_wr_list must be empty.
1454          */
1455         sBUG_ON(!list_empty(&iscsi_wr_list));
1456
1457         PRINT_INFO("Write thread PID %d finished", current->pid);
1458
1459         TRACE_EXIT();
1460         return 0;
1461 }