Minor compiler and checkpatch fixes
[mirror/scst/.git] / iscsi-scst / kernel / nthread.c
1 /*
2  *  Network threads.
3  *
4  *  Copyright (C) 2004 - 2005 FUJITA Tomonori <tomof@acm.org>
5  *  Copyright (C) 2007 - 2009 Vladislav Bolkhovitin
6  *  Copyright (C) 2007 - 2009 ID7 Ltd.
7  *
8  *  This program is free software; you can redistribute it and/or
9  *  modify it under the terms of the GNU General Public License
10  *  as published by the Free Software Foundation.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  */
17
18 #include <linux/sched.h>
19 #include <linux/file.h>
20 #include <linux/kthread.h>
21 #include <asm/ioctls.h>
22 #include <linux/delay.h>
23 #include <net/tcp.h>
24
25 #include "iscsi.h"
26 #include "digest.h"
27
28 enum rx_state {
29         RX_INIT_BHS, /* Must be zero for better "switch" optimiztion. */
30         RX_BHS,
31         RX_CMD_START,
32         RX_DATA,
33         RX_END,
34
35         RX_CMD_CONTINUE,
36         RX_INIT_HDIGEST,
37         RX_CHECK_HDIGEST,
38         RX_INIT_DDIGEST,
39         RX_CHECK_DDIGEST,
40         RX_AHS,
41         RX_PADDING,
42 };
43
44 enum tx_state {
45         TX_INIT = 0, /* Must be zero for better "switch" optimiztion. */
46         TX_BHS_DATA,
47         TX_INIT_PADDING,
48         TX_PADDING,
49         TX_INIT_DDIGEST,
50         TX_DDIGEST,
51         TX_END,
52 };
53
54 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
55 static void iscsi_check_closewait(struct iscsi_conn *conn)
56 {
57         struct iscsi_cmnd *cmnd;
58
59         TRACE_ENTRY();
60
61         TRACE_CONN_CLOSE_DBG("conn %p, sk_state %d", conn,
62                 conn->sock->sk->sk_state);
63
64         if (conn->sock->sk->sk_state != TCP_CLOSE) {
65                 TRACE_CONN_CLOSE_DBG("conn %p, skipping", conn);
66                 goto out;
67         }
68
69         /*
70          * No data are going to be sent, so all queued buffers can be freed
71          * now. In many cases TCP does that only in close(), but we can't rely
72          * on user space on calling it.
73          */
74
75 again:
76         spin_lock_bh(&conn->cmd_list_lock);
77         list_for_each_entry(cmnd, &conn->cmd_list, cmd_list_entry) {
78                 struct iscsi_cmnd *rsp;
79                 int restart = 0;
80
81                 TRACE_CONN_CLOSE_DBG("cmd %p, scst_state %x, data_waiting %d, "
82                         "ref_cnt %d, parent_req %p, net_ref_cnt %d, sg %p",
83                         cmnd, cmnd->scst_state, cmnd->data_waiting,
84                         atomic_read(&cmnd->ref_cnt), cmnd->parent_req,
85                         atomic_read(&cmnd->net_ref_cnt), cmnd->sg);
86
87                 sBUG_ON(cmnd->parent_req != NULL);
88
89                 if (cmnd->sg != NULL) {
90                         int i;
91
92                         if (cmnd_get_check(cmnd))
93                                 continue;
94
95                         for (i = 0; i < cmnd->sg_cnt; i++) {
96                                 struct page *page = sg_page(&cmnd->sg[i]);
97                                 TRACE_CONN_CLOSE_DBG("page %p, net_priv %p, "
98                                         "_count %d", page, page->net_priv,
99                                         atomic_read(&page->_count));
100
101                                 if (page->net_priv != NULL) {
102                                         if (restart == 0) {
103                                                 spin_unlock_bh(&conn->cmd_list_lock);
104                                                 restart = 1;
105                                         }
106                                         while (page->net_priv != NULL)
107                                                 iscsi_put_page_callback(page);
108                                 }
109                         }
110                         cmnd_put(cmnd);
111
112                         if (restart)
113                                 goto again;
114                 }
115
116                 spin_lock_bh(&cmnd->rsp_cmd_lock);
117                 list_for_each_entry(rsp, &cmnd->rsp_cmd_list,
118                                 rsp_cmd_list_entry) {
119                         TRACE_CONN_CLOSE_DBG("  rsp %p, ref_cnt %d, "
120                                 "net_ref_cnt %d, sg %p",
121                                 rsp, atomic_read(&rsp->ref_cnt),
122                                 atomic_read(&rsp->net_ref_cnt), rsp->sg);
123
124                         if ((rsp->sg != cmnd->sg) && (rsp->sg != NULL)) {
125                                 int i;
126
127                                 if (cmnd_get_check(rsp))
128                                         continue;
129
130                                 for (i = 0; i < rsp->sg_cnt; i++) {
131                                         struct page *page =
132                                                 sg_page(&rsp->sg[i]);
133                                         TRACE_CONN_CLOSE_DBG(
134                                                 "    page %p, net_priv %p, "
135                                                 "_count %d",
136                                                 page, page->net_priv,
137                                                 atomic_read(&page->_count));
138
139                                         if (page->net_priv != NULL) {
140                                                 if (restart == 0) {
141                                                         spin_unlock_bh(&cmnd->rsp_cmd_lock);
142                                                         spin_unlock_bh(&conn->cmd_list_lock);
143                                                         restart = 1;
144                                                 }
145                                                 while (page->net_priv != NULL)
146                                                         iscsi_put_page_callback(page);
147                                         }
148                                 }
149                                 cmnd_put(rsp);
150
151                                 if (restart)
152                                         goto again;
153                         }
154                 }
155                 spin_unlock_bh(&cmnd->rsp_cmd_lock);
156         }
157         spin_unlock_bh(&conn->cmd_list_lock);
158
159 out:
160         TRACE_EXIT();
161         return;
162 }
163 #else
164 static inline void iscsi_check_closewait(struct iscsi_conn *conn) {};
165 #endif
166
167 static void free_pending_commands(struct iscsi_conn *conn)
168 {
169         struct iscsi_session *session = conn->session;
170         struct list_head *pending_list = &session->pending_list;
171         int req_freed;
172         struct iscsi_cmnd *cmnd;
173
174         spin_lock(&session->sn_lock);
175         do {
176                 req_freed = 0;
177                 list_for_each_entry(cmnd, pending_list, pending_list_entry) {
178                         TRACE_CONN_CLOSE_DBG("Pending cmd %p"
179                                 "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
180                                 cmnd, conn, cmnd->pdu.bhs.sn,
181                                 session->exp_cmd_sn);
182                         if ((cmnd->conn == conn) &&
183                             (session->exp_cmd_sn == cmnd->pdu.bhs.sn)) {
184                                 TRACE_CONN_CLOSE_DBG("Freeing pending cmd %p",
185                                         cmnd);
186
187                                 list_del(&cmnd->pending_list_entry);
188                                 cmnd->pending = 0;
189
190                                 session->exp_cmd_sn++;
191
192                                 spin_unlock(&session->sn_lock);
193
194                                 req_cmnd_release_force(cmnd, 0);
195
196                                 req_freed = 1;
197                                 spin_lock(&session->sn_lock);
198                                 break;
199                         }
200                 }
201         } while (req_freed);
202         spin_unlock(&session->sn_lock);
203
204         return;
205 }
206
207 static void free_orphaned_pending_commands(struct iscsi_conn *conn)
208 {
209         struct iscsi_session *session = conn->session;
210         struct list_head *pending_list = &session->pending_list;
211         int req_freed;
212         struct iscsi_cmnd *cmnd;
213
214         spin_lock(&session->sn_lock);
215         do {
216                 req_freed = 0;
217                 list_for_each_entry(cmnd, pending_list, pending_list_entry) {
218                         TRACE_CONN_CLOSE_DBG("Pending cmd %p"
219                                 "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
220                                 cmnd, conn, cmnd->pdu.bhs.sn,
221                                 session->exp_cmd_sn);
222                         if (cmnd->conn == conn) {
223                                 PRINT_ERROR("Freeing orphaned pending cmd %p",
224                                             cmnd);
225
226                                 list_del(&cmnd->pending_list_entry);
227                                 cmnd->pending = 0;
228
229                                 if (session->exp_cmd_sn == cmnd->pdu.bhs.sn)
230                                         session->exp_cmd_sn++;
231
232                                 spin_unlock(&session->sn_lock);
233
234                                 req_cmnd_release_force(cmnd, 0);
235
236                                 req_freed = 1;
237                                 spin_lock(&session->sn_lock);
238                                 break;
239                         }
240                 }
241         } while (req_freed);
242         spin_unlock(&session->sn_lock);
243
244         return;
245 }
246
247 #ifdef CONFIG_SCST_DEBUG
248 static void trace_conn_close(struct iscsi_conn *conn)
249 {
250         struct iscsi_cmnd *cmnd;
251 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
252         struct iscsi_cmnd *rsp;
253 #endif
254
255 #if 0
256         if (time_after(jiffies, start_waiting + 10*HZ))
257                 trace_flag |= TRACE_CONN_OC_DBG;
258 #endif
259
260         spin_lock_bh(&conn->cmd_list_lock);
261         list_for_each_entry(cmnd, &conn->cmd_list,
262                         cmd_list_entry) {
263                 TRACE_CONN_CLOSE_DBG(
264                         "cmd %p, scst_state %x, scst_cmd state %d, "
265                         "data_waiting %d, ref_cnt %d, sn %u, "
266                         "parent_req %p, pending %d",
267                         cmnd, cmnd->scst_state,
268                         (cmnd->parent_req && cmnd->scst_cmd) ?
269                                 cmnd->scst_cmd->state : -1,
270                         cmnd->data_waiting, atomic_read(&cmnd->ref_cnt),
271                         cmnd->pdu.bhs.sn, cmnd->parent_req, cmnd->pending);
272 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
273                 TRACE_CONN_CLOSE_DBG("net_ref_cnt %d, sg %p",
274                         atomic_read(&cmnd->net_ref_cnt),
275                         cmnd->sg);
276                 if (cmnd->sg != NULL) {
277                         int i;
278                         for (i = 0; i < cmnd->sg_cnt; i++) {
279                                 struct page *page = sg_page(&cmnd->sg[i]);
280                                 TRACE_CONN_CLOSE_DBG("page %p, "
281                                         "net_priv %p, _count %d",
282                                         page, page->net_priv,
283                                         atomic_read(&page->_count));
284                         }
285                 }
286
287                 sBUG_ON(cmnd->parent_req != NULL);
288
289                 spin_lock_bh(&cmnd->rsp_cmd_lock);
290                 list_for_each_entry(rsp, &cmnd->rsp_cmd_list,
291                                 rsp_cmd_list_entry) {
292                         TRACE_CONN_CLOSE_DBG("  rsp %p, "
293                             "ref_cnt %d, net_ref_cnt %d, sg %p",
294                             rsp, atomic_read(&rsp->ref_cnt),
295                             atomic_read(&rsp->net_ref_cnt), rsp->sg);
296                         if (rsp->sg != cmnd->sg && rsp->sg) {
297                                 int i;
298                                 for (i = 0; i < rsp->sg_cnt; i++) {
299                                         TRACE_CONN_CLOSE_DBG("    page %p, "
300                                           "net_priv %p, _count %d",
301                                           sg_page(&rsp->sg[i]),
302                                           sg_page(&rsp->sg[i])->net_priv,
303                                           atomic_read(&sg_page(&rsp->sg[i])->
304                                                 _count));
305                                 }
306                         }
307                 }
308                 spin_unlock_bh(&cmnd->rsp_cmd_lock);
309 #endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
310         }
311         spin_unlock_bh(&conn->cmd_list_lock);
312         return;
313 }
314 #else /* CONFIG_SCST_DEBUG */
315 static void trace_conn_close(struct iscsi_conn *conn) {}
316 #endif /* CONFIG_SCST_DEBUG */
317
318 void iscsi_task_mgmt_affected_cmds_done(struct scst_mgmt_cmd *scst_mcmd)
319 {
320         int fn = scst_mgmt_cmd_get_fn(scst_mcmd);
321         void *priv = scst_mgmt_cmd_get_tgt_priv(scst_mcmd);
322
323         TRACE_MGMT_DBG("scst_mcmd %p, fn %d, priv %p", scst_mcmd, fn, priv);
324
325         switch (fn) {
326         case SCST_NEXUS_LOSS_SESS:
327         case SCST_ABORT_ALL_TASKS_SESS:
328         {
329                 struct iscsi_conn *conn = (struct iscsi_conn *)priv;
330                 struct iscsi_session *sess = conn->session;
331                 struct iscsi_conn *c;
332
333                 mutex_lock(&sess->target->target_mutex);
334
335                 /*
336                  * We can't mark sess as shutting down earlier, because until
337                  * now it might have pending commands. Otherwise, in case of
338                  * reinstatement it might lead to data corruption, because
339                  * commands in being reinstated session can be executed
340                  * after commands in the new session.
341                  */
342                 sess->sess_shutting_down = 1;
343                 list_for_each_entry(c, &sess->conn_list, conn_list_entry) {
344                         if (!test_bit(ISCSI_CONN_SHUTTINGDOWN, &c->conn_aflags)) {
345                                 sess->sess_shutting_down = 0;
346                                 break;
347                         }
348                 }
349
350                 if (conn->conn_reinst_successor != NULL) {
351                         sBUG_ON(!test_bit(ISCSI_CONN_REINSTATING,
352                                   &conn->conn_reinst_successor->conn_aflags));
353                         conn_reinst_finished(conn->conn_reinst_successor);
354                         conn->conn_reinst_successor = NULL;
355                 } else if (sess->sess_reinst_successor != NULL) {
356                         sess_reinst_finished(sess->sess_reinst_successor);
357                         sess->sess_reinst_successor = NULL;
358                 }
359                 mutex_unlock(&sess->target->target_mutex);
360
361                 complete_all(&conn->ready_to_free);
362                 break;
363         }
364         default:
365                 /* Nothing to do */
366                 break;
367         }
368
369         return;
370 }
371
372 /* No locks */
373 static void close_conn(struct iscsi_conn *conn)
374 {
375         struct iscsi_session *session = conn->session;
376         struct iscsi_target *target = conn->target;
377         typeof(jiffies) start_waiting = jiffies;
378         typeof(jiffies) shut_start_waiting = start_waiting;
379         bool pending_reported = 0, wait_expired = 0, shut_expired = 0;
380         bool reinst;
381
382 #define CONN_PENDING_TIMEOUT    ((typeof(jiffies))10*HZ)
383 #define CONN_WAIT_TIMEOUT       ((typeof(jiffies))10*HZ)
384 #define CONN_REG_SHUT_TIMEOUT   ((typeof(jiffies))125*HZ)
385 #define CONN_DEL_SHUT_TIMEOUT   ((typeof(jiffies))10*HZ)
386
387         TRACE_ENTRY();
388
389         TRACE_CONN_CLOSE("Closing connection %p (conn_ref_cnt=%d)", conn,
390                 atomic_read(&conn->conn_ref_cnt));
391
392         iscsi_extracheck_is_rd_thread(conn);
393
394         sBUG_ON(!conn->closing);
395
396         if (conn->active_close) {
397                 /* We want all our already send operations to complete */
398                 conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
399         } else {
400                 conn->sock->ops->shutdown(conn->sock,
401                         RCV_SHUTDOWN|SEND_SHUTDOWN);
402         }
403
404         mutex_lock(&session->target->target_mutex);
405
406         set_bit(ISCSI_CONN_SHUTTINGDOWN, &conn->conn_aflags);
407         reinst = (conn->conn_reinst_successor != NULL);
408
409         mutex_unlock(&session->target->target_mutex);
410
411         if (reinst) {
412                 int rc;
413                 int lun = 0;
414
415                 /* Abort all outstanding commands */
416                 rc = scst_rx_mgmt_fn_lun(session->scst_sess,
417                         SCST_ABORT_ALL_TASKS_SESS, (uint8_t *)&lun, sizeof(lun),
418                         SCST_NON_ATOMIC, conn);
419                 if (rc != 0)
420                         PRINT_ERROR("SCST_ABORT_ALL_TASKS_SESS failed %d", rc);
421         } else {
422                 int rc;
423                 int lun = 0;
424
425                 rc = scst_rx_mgmt_fn_lun(session->scst_sess,
426                         SCST_NEXUS_LOSS_SESS, (uint8_t *)&lun, sizeof(lun),
427                         SCST_NON_ATOMIC, conn);
428                 if (rc != 0)
429                         PRINT_ERROR("SCST_NEXUS_LOSS_SESS failed %d", rc);
430         }
431
432         if (conn->read_state != RX_INIT_BHS) {
433                 struct iscsi_cmnd *cmnd = conn->read_cmnd;
434
435                 if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) {
436                         TRACE_DBG("Going to wait for cmnd %p to change state "
437                                 "from RX_CMD", cmnd);
438                 }
439                 wait_event(conn->read_state_waitQ,
440                         cmnd->scst_state != ISCSI_CMD_STATE_RX_CMD);
441
442                 conn->read_cmnd = NULL;
443                 conn->read_state = RX_INIT_BHS;
444                 req_cmnd_release_force(cmnd, 0);
445         }
446
447         conn_abort(conn);
448
449         /* ToDo: not the best way to wait */
450         while (atomic_read(&conn->conn_ref_cnt) != 0) {
451                 mutex_lock(&target->target_mutex);
452                 spin_lock(&session->sn_lock);
453                 if (session->tm_rsp && session->tm_rsp->conn == conn) {
454                         struct iscsi_cmnd *tm_rsp = session->tm_rsp;
455                         TRACE(TRACE_MGMT_MINOR, "Dropping delayed TM rsp %p",
456                                 tm_rsp);
457                         session->tm_rsp = NULL;
458                         session->tm_active--;
459                         WARN_ON(session->tm_active < 0);
460                         spin_unlock(&session->sn_lock);
461                         mutex_unlock(&target->target_mutex);
462
463                         rsp_cmnd_release(tm_rsp);
464                 } else {
465                         spin_unlock(&session->sn_lock);
466                         mutex_unlock(&target->target_mutex);
467                 }
468
469                 /* It's safe to check it without sn_lock */
470                 if (!list_empty(&session->pending_list)) {
471                         TRACE_CONN_CLOSE_DBG("Disposing pending commands on "
472                                 "connection %p (conn_ref_cnt=%d)", conn,
473                                 atomic_read(&conn->conn_ref_cnt));
474
475                         free_pending_commands(conn);
476
477                         if (time_after(jiffies,
478                                 start_waiting + CONN_PENDING_TIMEOUT)) {
479                                 if (!pending_reported) {
480                                         TRACE_CONN_CLOSE("%s",
481                                                 "Pending wait time expired");
482                                         pending_reported = 1;
483                                 }
484                                 free_orphaned_pending_commands(conn);
485                         }
486                 }
487
488                 iscsi_make_conn_wr_active(conn);
489
490                 /* That's for active close only, actually */
491                 if (time_after(jiffies, start_waiting + CONN_WAIT_TIMEOUT) &&
492                     !wait_expired) {
493                         TRACE_CONN_CLOSE("Wait time expired (conn %p, "
494                                 "sk_state %d)",
495                                 conn, conn->sock->sk->sk_state);
496                         conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN);
497                         wait_expired = 1;
498                         shut_start_waiting = jiffies;
499                 }
500
501                 if (wait_expired && !shut_expired &&
502                     time_after(jiffies, shut_start_waiting +
503                                 conn->deleting ? CONN_DEL_SHUT_TIMEOUT :
504                                                  CONN_REG_SHUT_TIMEOUT)) {
505                         TRACE_CONN_CLOSE("Wait time after shutdown expired "
506                                 "(conn %p, sk_state %d)", conn,
507                                 conn->sock->sk->sk_state);
508                         conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
509                         shut_expired = 1;
510                 }
511
512                 if (conn->deleting)
513                         msleep(200);
514                 else
515                         msleep(1000);
516
517                 TRACE_CONN_CLOSE_DBG("conn %p, conn_ref_cnt %d left, "
518                         "wr_state %d, exp_cmd_sn %u",
519                         conn, atomic_read(&conn->conn_ref_cnt),
520                         conn->wr_state, session->exp_cmd_sn);
521
522                 trace_conn_close(conn);
523
524                 iscsi_check_closewait(conn);
525         }
526
527         write_lock_bh(&conn->sock->sk->sk_callback_lock);
528         conn->sock->sk->sk_state_change = conn->old_state_change;
529         conn->sock->sk->sk_data_ready = conn->old_data_ready;
530         conn->sock->sk->sk_write_space = conn->old_write_space;
531         write_unlock_bh(&conn->sock->sk->sk_callback_lock);
532
533         while (1) {
534                 bool t;
535
536                 spin_lock_bh(&iscsi_wr_lock);
537                 t = (conn->wr_state == ISCSI_CONN_WR_STATE_IDLE);
538                 spin_unlock_bh(&iscsi_wr_lock);
539
540                 if (t && (atomic_read(&conn->conn_ref_cnt) == 0))
541                         break;
542
543                 TRACE_CONN_CLOSE_DBG("Waiting for wr thread (conn %p), "
544                         "wr_state %x", conn, conn->wr_state);
545                 msleep(50);
546         }
547
548         wait_for_completion(&conn->ready_to_free);
549
550         TRACE_CONN_CLOSE("Notifying user space about closing connection %p",
551                          conn);
552         event_send(target->tid, session->sid, conn->cid, E_CONN_CLOSE);
553
554 #ifdef CONFIG_SCST_PROC
555         mutex_lock(&target->target_mutex);
556         conn_free(conn);
557         mutex_unlock(&target->target_mutex);
558 #else
559         kobject_put(&conn->iscsi_conn_kobj);
560 #endif
561
562         TRACE_EXIT();
563         return;
564 }
565
566 static int close_conn_thr(void *arg)
567 {
568         struct iscsi_conn *conn = (struct iscsi_conn *)arg;
569
570         TRACE_ENTRY();
571
572 #ifdef CONFIG_SCST_EXTRACHECKS
573         /*
574          * To satisfy iscsi_extracheck_is_rd_thread() in functions called
575          * on the connection close. It is safe, because at this point conn
576          * can't be used by any other thread.
577          */
578         conn->rd_task = current;
579 #endif
580         close_conn(conn);
581
582         TRACE_EXIT();
583         return 0;
584 }
585
586 /* No locks */
587 static void start_close_conn(struct iscsi_conn *conn)
588 {
589         struct task_struct *t;
590
591         TRACE_ENTRY();
592
593         t = kthread_run(close_conn_thr, conn, "iscsi_conn_cleanup");
594         if (IS_ERR(t)) {
595                 PRINT_ERROR("kthread_run() failed (%ld), closing conn %p "
596                         "directly", PTR_ERR(t), conn);
597                 close_conn(conn);
598         }
599
600         TRACE_EXIT();
601         return;
602 }
603
604 static inline void iscsi_conn_init_read(struct iscsi_conn *conn,
605         void __user *data, size_t len)
606 {
607         conn->read_iov[0].iov_base = data;
608         conn->read_iov[0].iov_len = len;
609         conn->read_msg.msg_iov = conn->read_iov;
610         conn->read_msg.msg_iovlen = 1;
611         conn->read_size = len;
612         return;
613 }
614
615 static void iscsi_conn_prepare_read_ahs(struct iscsi_conn *conn,
616         struct iscsi_cmnd *cmnd)
617 {
618         int asize = (cmnd->pdu.ahssize + 3) & -4;
619
620         /* ToDo: __GFP_NOFAIL ?? */
621         cmnd->pdu.ahs = kmalloc(asize, __GFP_NOFAIL|GFP_KERNEL);
622         sBUG_ON(cmnd->pdu.ahs == NULL);
623         iscsi_conn_init_read(conn, (void __force __user *)cmnd->pdu.ahs, asize);
624         return;
625 }
626
627 static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn)
628 {
629         struct iscsi_cmnd *cmnd = NULL;
630
631         spin_lock_bh(&conn->write_list_lock);
632         if (!list_empty(&conn->write_list)) {
633                 cmnd = list_entry(conn->write_list.next, struct iscsi_cmnd,
634                                 write_list_entry);
635                 cmd_del_from_write_list(cmnd);
636                 cmnd->write_processing_started = 1;
637         }
638         spin_unlock_bh(&conn->write_list_lock);
639
640         return cmnd;
641 }
642
643 /* Returns number of bytes left to receive or <0 for error */
644 static int do_recv(struct iscsi_conn *conn)
645 {
646         int res;
647         mm_segment_t oldfs;
648         struct msghdr msg;
649         int first_len;
650
651         EXTRACHECKS_BUG_ON(conn->read_cmnd == NULL);
652
653         if (unlikely(conn->closing)) {
654                 res = -EIO;
655                 goto out;
656         }
657
658         /*
659          * We suppose that if sock_recvmsg() returned less data than requested,
660          * then next time it will return -EAGAIN, so there's no point to call
661          * it again.
662          */
663
664 restart:
665         memset(&msg, 0, sizeof(msg));
666         msg.msg_iov = conn->read_msg.msg_iov;
667         msg.msg_iovlen = conn->read_msg.msg_iovlen;
668         first_len = msg.msg_iov->iov_len;
669
670         oldfs = get_fs();
671         set_fs(get_ds());
672         res = sock_recvmsg(conn->sock, &msg, conn->read_size,
673                            MSG_DONTWAIT | MSG_NOSIGNAL);
674         set_fs(oldfs);
675
676         if (res > 0) {
677                 /*
678                  * To save some considerable effort and CPU power we
679                  * suppose that TCP functions adjust
680                  * conn->read_msg.msg_iov and conn->read_msg.msg_iovlen
681                  * on amount of copied data. This BUG_ON is intended
682                  * to catch if it is changed in the future.
683                  */
684                 sBUG_ON((res >= first_len) &&
685                         (conn->read_msg.msg_iov->iov_len != 0));
686                 conn->read_size -= res;
687                 if (conn->read_size != 0) {
688                         if (res >= first_len) {
689                                 int done = 1 + ((res - first_len) >> PAGE_SHIFT);
690                                 conn->read_msg.msg_iov += done;
691                                 conn->read_msg.msg_iovlen -= done;
692                         }
693                 }
694                 res = conn->read_size;
695         } else {
696                 switch (res) {
697                 case -EAGAIN:
698                         TRACE_DBG("EAGAIN received for conn %p", conn);
699                         res = conn->read_size;
700                         break;
701                 case -ERESTARTSYS:
702                         TRACE_DBG("ERESTARTSYS received for conn %p", conn);
703                         goto restart;
704                 default:
705                         PRINT_ERROR("sock_recvmsg() failed: %d", res);
706                         mark_conn_closed(conn);
707                         if (res == 0)
708                                 res = -EIO;
709                         break;
710                 }
711         }
712
713 out:
714         TRACE_EXIT_RES(res);
715         return res;
716 }
717
718 static int iscsi_rx_check_ddigest(struct iscsi_conn *conn)
719 {
720         struct iscsi_cmnd *cmnd = conn->read_cmnd;
721         int res;
722
723         res = do_recv(conn);
724         if (res == 0) {
725                 conn->read_state = RX_END;
726
727                 if (cmnd->pdu.datasize <= 16*1024) {
728                         /*
729                          * It's cache hot, so let's compute it inline. The
730                          * choice here about what will expose more latency:
731                          * possible cache misses or the digest calculation.
732                          */
733                         TRACE_DBG("cmnd %p, opcode %x: checking RX "
734                                 "ddigest inline", cmnd, cmnd_opcode(cmnd));
735                         cmnd->ddigest_checked = 1;
736                         res = digest_rx_data(cmnd);
737                         if (unlikely(res != 0)) {
738                                 mark_conn_closed(conn);
739                                 goto out;
740                         }
741                 } else if (cmnd_opcode(cmnd) == ISCSI_OP_SCSI_CMD) {
742                         cmd_add_on_rx_ddigest_list(cmnd, cmnd);
743                         cmnd_get(cmnd);
744                 } else if (cmnd_opcode(cmnd) != ISCSI_OP_SCSI_DATA_OUT) {
745                         /*
746                          * We could get here only for NOP-Out. ISCSI RFC
747                          * doesn't specify how to deal with digest errors in
748                          * this case. Is closing connection correct?
749                          */
750                         TRACE_DBG("cmnd %p, opcode %x: checking NOP RX "
751                                 "ddigest", cmnd, cmnd_opcode(cmnd));
752                         res = digest_rx_data(cmnd);
753                         if (unlikely(res != 0)) {
754                                 mark_conn_closed(conn);
755                                 goto out;
756                         }
757                 }
758         }
759
760 out:
761         return res;
762 }
763
764 /* No locks, conn is rd processing */
765 static void process_read_io(struct iscsi_conn *conn, int *closed)
766 {
767         struct iscsi_cmnd *cmnd = conn->read_cmnd;
768         int res;
769
770         TRACE_ENTRY();
771
772         /* In case of error cmnd will be freed in close_conn() */
773
774         do {
775                 switch (conn->read_state) {
776                 case RX_INIT_BHS:
777                         EXTRACHECKS_BUG_ON(conn->read_cmnd != NULL);
778                         cmnd = cmnd_alloc(conn, NULL);
779                         conn->read_cmnd = cmnd;
780                         iscsi_conn_init_read(cmnd->conn,
781                                 (void __force __user *)&cmnd->pdu.bhs,
782                                 sizeof(cmnd->pdu.bhs));
783                         conn->read_state = RX_BHS;
784                         /* go through */
785
786                 case RX_BHS:
787                         res = do_recv(conn);
788                         if (res == 0) {
789                                 iscsi_cmnd_get_length(&cmnd->pdu);
790                                 if (cmnd->pdu.ahssize == 0) {
791                                         if ((conn->hdigest_type & DIGEST_NONE) == 0)
792                                                 conn->read_state = RX_INIT_HDIGEST;
793                                         else
794                                                 conn->read_state = RX_CMD_START;
795                                 } else {
796                                         iscsi_conn_prepare_read_ahs(conn, cmnd);
797                                         conn->read_state = RX_AHS;
798                                 }
799                         }
800                         break;
801
802                 case RX_CMD_START:
803                         res = cmnd_rx_start(cmnd);
804                         if (res == 0) {
805                                 if (cmnd->pdu.datasize == 0)
806                                         conn->read_state = RX_END;
807                                 else
808                                         conn->read_state = RX_DATA;
809                         } else if (res > 0)
810                                 conn->read_state = RX_CMD_CONTINUE;
811                         else
812                                 sBUG_ON(!conn->closing);
813                         break;
814
815                 case RX_CMD_CONTINUE:
816                         if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) {
817                                 TRACE_DBG("cmnd %p is still in RX_CMD state",
818                                         cmnd);
819                                 res = 1;
820                                 break;
821                         }
822                         res = cmnd_rx_continue(cmnd);
823                         if (unlikely(res != 0))
824                                 sBUG_ON(!conn->closing);
825                         else {
826                                 if (cmnd->pdu.datasize == 0)
827                                         conn->read_state = RX_END;
828                                 else
829                                         conn->read_state = RX_DATA;
830                         }
831                         break;
832
833                 case RX_DATA:
834                         res = do_recv(conn);
835                         if (res == 0) {
836                                 int psz = ((cmnd->pdu.datasize + 3) & -4) - cmnd->pdu.datasize;
837                                 if (psz != 0) {
838                                         TRACE_DBG("padding %d bytes", psz);
839                                         iscsi_conn_init_read(conn,
840                                                 (void __force __user *)&conn->rpadding, psz);
841                                         conn->read_state = RX_PADDING;
842                                 } else if ((conn->ddigest_type & DIGEST_NONE) != 0)
843                                         conn->read_state = RX_END;
844                                 else
845                                         conn->read_state = RX_INIT_DDIGEST;
846                         }
847                         break;
848
849                 case RX_END:
850                         if (unlikely(conn->read_size != 0)) {
851                                 PRINT_CRIT_ERROR("%d %x %d", res,
852                                         cmnd_opcode(cmnd), conn->read_size);
853                                 sBUG();
854                         }
855                         conn->read_cmnd = NULL;
856                         conn->read_state = RX_INIT_BHS;
857
858                         cmnd_rx_end(cmnd);
859
860                         EXTRACHECKS_BUG_ON(conn->read_size != 0);
861                         break;
862
863                 case RX_INIT_HDIGEST:
864                         iscsi_conn_init_read(conn,
865                                 (void __force __user *)&cmnd->hdigest, sizeof(u32));
866                         conn->read_state = RX_CHECK_HDIGEST;
867                         /* go through */
868
869                 case RX_CHECK_HDIGEST:
870                         res = do_recv(conn);
871                         if (res == 0) {
872                                 res = digest_rx_header(cmnd);
873                                 if (unlikely(res != 0)) {
874                                         PRINT_ERROR("rx header digest for "
875                                                 "initiator %s failed (%d)",
876                                                 conn->session->initiator_name,
877                                                 res);
878                                         mark_conn_closed(conn);
879                                 } else
880                                         conn->read_state = RX_CMD_START;
881                         }
882                         break;
883
884                 case RX_INIT_DDIGEST:
885                         iscsi_conn_init_read(conn,
886                                 (void __force __user *)&cmnd->ddigest,
887                                 sizeof(u32));
888                         conn->read_state = RX_CHECK_DDIGEST;
889                         /* go through */
890
891                 case RX_CHECK_DDIGEST:
892                         res = iscsi_rx_check_ddigest(conn);
893                         break;
894
895                 case RX_AHS:
896                         res = do_recv(conn);
897                         if (res == 0) {
898                                 if ((conn->hdigest_type & DIGEST_NONE) == 0)
899                                         conn->read_state = RX_INIT_HDIGEST;
900                                 else
901                                         conn->read_state = RX_CMD_START;
902                         }
903                         break;
904
905                 case RX_PADDING:
906                         res = do_recv(conn);
907                         if (res == 0) {
908                                 if ((conn->ddigest_type & DIGEST_NONE) == 0)
909                                         conn->read_state = RX_INIT_DDIGEST;
910                                 else
911                                         conn->read_state = RX_END;
912                         }
913                         break;
914
915                 default:
916                         PRINT_CRIT_ERROR("%d %x", conn->read_state, cmnd_opcode(cmnd));
917                         res = -1; /* to keep compiler happy */
918                         sBUG();
919                 }
920         } while (res == 0);
921
922         if (unlikely(conn->closing)) {
923                 start_close_conn(conn);
924                 *closed = 1;
925         }
926
927         TRACE_EXIT();
928         return;
929 }
930
931 /*
932  * Called under iscsi_rd_lock and BHs disabled, but will drop it inside,
933  * then reaquire.
934  */
935 static void scst_do_job_rd(void)
936         __acquires(&iscsi_rd_lock)
937         __releases(&iscsi_rd_lock)
938 {
939         TRACE_ENTRY();
940
941         /*
942          * We delete/add to tail connections to maintain fairness between them.
943          */
944
945         while (!list_empty(&iscsi_rd_list)) {
946                 int closed = 0;
947                 struct iscsi_conn *conn = list_entry(iscsi_rd_list.next,
948                         typeof(*conn), rd_list_entry);
949
950                 list_del(&conn->rd_list_entry);
951
952                 sBUG_ON(conn->rd_state == ISCSI_CONN_RD_STATE_PROCESSING);
953                 conn->rd_data_ready = 0;
954                 conn->rd_state = ISCSI_CONN_RD_STATE_PROCESSING;
955 #ifdef CONFIG_SCST_EXTRACHECKS
956                 conn->rd_task = current;
957 #endif
958                 spin_unlock_bh(&iscsi_rd_lock);
959
960                 process_read_io(conn, &closed);
961
962                 spin_lock_bh(&iscsi_rd_lock);
963
964                 if (closed)
965                         continue;
966
967 #ifdef CONFIG_SCST_EXTRACHECKS
968                 conn->rd_task = NULL;
969 #endif
970                 if (conn->rd_data_ready) {
971                         list_add_tail(&conn->rd_list_entry, &iscsi_rd_list);
972                         conn->rd_state = ISCSI_CONN_RD_STATE_IN_LIST;
973                 } else
974                         conn->rd_state = ISCSI_CONN_RD_STATE_IDLE;
975         }
976
977         TRACE_EXIT();
978         return;
979 }
980
981 static inline int test_rd_list(void)
982 {
983         int res = !list_empty(&iscsi_rd_list) ||
984                   unlikely(kthread_should_stop());
985         return res;
986 }
987
988 int istrd(void *arg)
989 {
990         TRACE_ENTRY();
991
992         PRINT_INFO("Read thread started, PID %d", current->pid);
993
994         current->flags |= PF_NOFREEZE;
995
996         spin_lock_bh(&iscsi_rd_lock);
997         while (!kthread_should_stop()) {
998                 wait_queue_t wait;
999                 init_waitqueue_entry(&wait, current);
1000
1001                 if (!test_rd_list()) {
1002                         add_wait_queue_exclusive_head(&iscsi_rd_waitQ, &wait);
1003                         for (;;) {
1004                                 set_current_state(TASK_INTERRUPTIBLE);
1005                                 if (test_rd_list())
1006                                         break;
1007                                 spin_unlock_bh(&iscsi_rd_lock);
1008                                 schedule();
1009                                 spin_lock_bh(&iscsi_rd_lock);
1010                         }
1011                         set_current_state(TASK_RUNNING);
1012                         remove_wait_queue(&iscsi_rd_waitQ, &wait);
1013                 }
1014                 scst_do_job_rd();
1015         }
1016         spin_unlock_bh(&iscsi_rd_lock);
1017
1018         /*
1019          * If kthread_should_stop() is true, we are guaranteed to be
1020          * on the module unload, so iscsi_rd_list must be empty.
1021          */
1022         sBUG_ON(!list_empty(&iscsi_rd_list));
1023
1024         PRINT_INFO("Read thread PID %d finished", current->pid);
1025
1026         TRACE_EXIT();
1027         return 0;
1028 }
1029
1030 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1031 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd)
1032 {
1033         int v;
1034
1035         TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d",
1036                 cmd, atomic_read(&cmd->net_ref_cnt)+1);
1037
1038         v = atomic_inc_return(&cmd->net_ref_cnt);
1039         if (v == 1) {
1040                 TRACE_NET_PAGE("getting cmd %p", cmd);
1041                 cmnd_get(cmd);
1042         }
1043         return;
1044 }
1045
1046 void iscsi_get_page_callback(struct page *page)
1047 {
1048         struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
1049
1050         TRACE_NET_PAGE("page %p, _count %d", page,
1051                 atomic_read(&page->_count));
1052
1053         __iscsi_get_page_callback(cmd);
1054         return;
1055 }
1056
1057 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd)
1058 {
1059         TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d", cmd,
1060                 atomic_read(&cmd->net_ref_cnt)-1);
1061
1062         if (atomic_dec_and_test(&cmd->net_ref_cnt)) {
1063                 int i, sg_cnt = cmd->sg_cnt;
1064                 for (i = 0; i < sg_cnt; i++) {
1065                         struct page *page = sg_page(&cmd->sg[i]);
1066                         TRACE_NET_PAGE("Clearing page %p", page);
1067                         if (page->net_priv == cmd)
1068                                 page->net_priv = NULL;
1069                 }
1070                 cmnd_put(cmd);
1071         }
1072         return;
1073 }
1074
1075 void iscsi_put_page_callback(struct page *page)
1076 {
1077         struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
1078
1079         TRACE_NET_PAGE("page %p, _count %d", page,
1080                 atomic_read(&page->_count));
1081
1082         __iscsi_put_page_callback(cmd);
1083         return;
1084 }
1085
1086 static void check_net_priv(struct iscsi_cmnd *cmd, struct page *page)
1087 {
1088         if ((atomic_read(&cmd->net_ref_cnt) == 1) && (page->net_priv == cmd)) {
1089                 TRACE_DBG("sendpage() not called get_page(), zeroing net_priv "
1090                         "%p (page %p)", page->net_priv, page);
1091                 page->net_priv = NULL;
1092         }
1093         return;
1094 }
1095 #else
1096 static inline void check_net_priv(struct iscsi_cmnd *cmd, struct page *page) {}
1097 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd) {}
1098 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd) {}
1099 #endif
1100
1101 /* This is partially taken from the Ardis code. */
1102 static int write_data(struct iscsi_conn *conn)
1103 {
1104         mm_segment_t oldfs;
1105         struct file *file;
1106         struct iovec *iop;
1107         struct socket *sock;
1108         ssize_t (*sock_sendpage)(struct socket *, struct page *, int, size_t,
1109                                  int);
1110         ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
1111         struct iscsi_cmnd *write_cmnd = conn->write_cmnd;
1112         struct iscsi_cmnd *ref_cmd;
1113         struct page *page;
1114         struct scatterlist *sg;
1115         int saved_size, size, sendsize;
1116         int length, offset, idx;
1117         int flags, res, count, sg_size;
1118         bool do_put = false, ref_cmd_to_parent;
1119
1120         TRACE_ENTRY();
1121
1122         iscsi_extracheck_is_wr_thread(conn);
1123
1124         if (write_cmnd->own_sg == 0) {
1125                 ref_cmd = write_cmnd->parent_req;
1126                 ref_cmd_to_parent = true;
1127         } else {
1128                 ref_cmd = write_cmnd;
1129                 ref_cmd_to_parent = false;
1130         }
1131
1132         if (!ref_cmd->on_written_list) {
1133                 TRACE_DBG("Adding cmd %p to conn %p written_list", ref_cmd,
1134                         conn);
1135                 spin_lock_bh(&conn->write_list_lock);
1136                 ref_cmd->on_written_list = 1;
1137                 ref_cmd->write_timeout = jiffies + ISCSI_RSP_TIMEOUT;
1138                 list_add_tail(&ref_cmd->written_list_entry,
1139                         &conn->written_list);
1140                 spin_unlock_bh(&conn->write_list_lock);
1141         }
1142
1143         if (!timer_pending(&conn->rsp_timer)) {
1144                 sBUG_ON(!ref_cmd->on_written_list);
1145                 spin_lock_bh(&conn->write_list_lock);
1146                 if (likely(!timer_pending(&conn->rsp_timer))) {
1147                         TRACE_DBG("Starting timer on %ld (conn %p)",
1148                                 ref_cmd->write_timeout, conn);
1149                         conn->rsp_timer.expires = ref_cmd->write_timeout;
1150                         add_timer(&conn->rsp_timer);
1151                 }
1152                 spin_unlock_bh(&conn->write_list_lock);
1153         }
1154
1155         file = conn->file;
1156         size = conn->write_size;
1157         saved_size = size;
1158         iop = conn->write_iop;
1159         count = conn->write_iop_used;
1160
1161         if (iop) {
1162                 while (1) {
1163                         loff_t off = 0;
1164                         int rest;
1165
1166                         sBUG_ON(count > (signed)(sizeof(conn->write_iov) /
1167                                                 sizeof(conn->write_iov[0])));
1168 retry:
1169                         oldfs = get_fs();
1170                         set_fs(KERNEL_DS);
1171                         res = vfs_writev(file,
1172                                          (struct iovec __force __user *)iop,
1173                                          count, &off);
1174                         set_fs(oldfs);
1175                         TRACE_WRITE("sid %#Lx, cid %u, res %d, iov_len %ld",
1176                                     (long long unsigned int)conn->session->sid,
1177                                     conn->cid, res, (long)iop->iov_len);
1178                         if (unlikely(res <= 0)) {
1179                                 if (res == -EAGAIN) {
1180                                         conn->write_iop = iop;
1181                                         conn->write_iop_used = count;
1182                                         goto out_iov;
1183                                 } else if (res == -EINTR)
1184                                         goto retry;
1185                                 goto out_err;
1186                         }
1187
1188                         rest = res;
1189                         size -= res;
1190                         while ((typeof(rest))iop->iov_len <= rest && rest) {
1191                                 rest -= iop->iov_len;
1192                                 iop++;
1193                                 count--;
1194                         }
1195                         if (count == 0) {
1196                                 conn->write_iop = NULL;
1197                                 conn->write_iop_used = 0;
1198                                 if (size)
1199                                         break;
1200                                 goto out_iov;
1201                         }
1202                         sBUG_ON(iop > conn->write_iov + sizeof(conn->write_iov)
1203                                                   /sizeof(conn->write_iov[0]));
1204                         iop->iov_base += rest;
1205                         iop->iov_len -= rest;
1206                 }
1207         }
1208
1209         sg = write_cmnd->sg;
1210         if (unlikely(sg == NULL)) {
1211                 PRINT_INFO("WARNING: Data missed (cmd %p)!", write_cmnd);
1212                 res = 0;
1213                 goto out;
1214         }
1215
1216         /* To protect from too early transfer completion race */
1217         __iscsi_get_page_callback(ref_cmd);
1218         do_put = true;
1219
1220         sock = conn->sock;
1221
1222 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1223         sock_sendpage = sock->ops->sendpage;
1224 #else
1225         if ((write_cmnd->parent_req->scst_cmd != NULL) &&
1226             scst_cmd_get_dh_data_buff_alloced(write_cmnd->parent_req->scst_cmd))
1227                 sock_sendpage = sock_no_sendpage;
1228         else
1229                 sock_sendpage = sock->ops->sendpage;
1230 #endif
1231
1232         flags = MSG_DONTWAIT;
1233         sg_size = size;
1234
1235         if (sg != write_cmnd->rsp_sg) {
1236                 offset = conn->write_offset + sg[0].offset;
1237                 idx = offset >> PAGE_SHIFT;
1238                 offset &= ~PAGE_MASK;
1239                 length = min(size, (int)PAGE_SIZE - offset);
1240                 TRACE_WRITE("write_offset %d, sg_size %d, idx %d, offset %d, "
1241                         "length %d", conn->write_offset, sg_size, idx, offset,
1242                         length);
1243         } else {
1244                 idx = 0;
1245                 offset = conn->write_offset;
1246                 while (offset >= sg[idx].length) {
1247                         offset -= sg[idx].length;
1248                         idx++;
1249                 }
1250                 length = sg[idx].length - offset;
1251                 offset += sg[idx].offset;
1252                 sock_sendpage = sock_no_sendpage;
1253                 TRACE_WRITE("rsp_sg: write_offset %d, sg_size %d, idx %d, "
1254                         "offset %d, length %d", conn->write_offset, sg_size,
1255                         idx, offset, length);
1256         }
1257         page = sg_page(&sg[idx]);
1258
1259         while (1) {
1260                 sendpage = sock_sendpage;
1261
1262 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1263                 {
1264                         static DEFINE_SPINLOCK(net_priv_lock);
1265                         spin_lock(&net_priv_lock);
1266                         if (unlikely(page->net_priv != NULL)) {
1267                                 if (page->net_priv != ref_cmd) {
1268                                         /*
1269                                          * This might happen if user space
1270                                          * supplies to scst_user the same
1271                                          * pages in different commands or in
1272                                          * case of zero-copy FILEIO, when
1273                                          * several initiators request the same
1274                                          * data simultaneously.
1275                                          */
1276                                         TRACE_DBG("net_priv isn't NULL and != "
1277                                             "ref_cmd (write_cmnd %p, ref_cmd "
1278                                             "%p, sg %p, idx %d, page %p, "
1279                                             "net_priv %p)",
1280                                             write_cmnd, ref_cmd, sg, idx,
1281                                             page, page->net_priv);
1282                                         sendpage = sock_no_sendpage;
1283                                 }
1284                         } else
1285                                 page->net_priv = ref_cmd;
1286                         spin_unlock(&net_priv_lock);
1287                 }
1288 #endif
1289                 sendsize = min(size, length);
1290                 if (size <= sendsize) {
1291 retry2:
1292                         res = sendpage(sock, page, offset, size, flags);
1293                         TRACE_WRITE("Final %s sid %#Lx, cid %u, res %d (page "
1294                                 "index %lu, offset %u, size %u, cmd %p, "
1295                                 "page %p)", (sendpage != sock_no_sendpage) ?
1296                                                 "sendpage" : "sock_no_sendpage",
1297                                 (long long unsigned int)conn->session->sid,
1298                                 conn->cid, res, page->index,
1299                                 offset, size, write_cmnd, page);
1300                         if (unlikely(res <= 0)) {
1301                                 if (res == -EINTR)
1302                                         goto retry2;
1303                                 else
1304                                         goto out_res;
1305                         }
1306
1307                         check_net_priv(ref_cmd, page);
1308                         if (res == size) {
1309                                 conn->write_size = 0;
1310                                 res = saved_size;
1311                                 goto out_put;
1312                         }
1313
1314                         offset += res;
1315                         size -= res;
1316                         goto retry2;
1317                 }
1318
1319 retry1:
1320                 res = sendpage(sock, page, offset, sendsize, flags | MSG_MORE);
1321                 TRACE_WRITE("%s sid %#Lx, cid %u, res %d (page index %lu, "
1322                         "offset %u, sendsize %u, size %u, cmd %p, page %p)",
1323                         (sendpage != sock_no_sendpage) ? "sendpage" :
1324                                                          "sock_no_sendpage",
1325                         (unsigned long long)conn->session->sid, conn->cid,
1326                         res, page->index, offset, sendsize, size,
1327                         write_cmnd, page);
1328                 if (unlikely(res <= 0)) {
1329                         if (res == -EINTR)
1330                                 goto retry1;
1331                         else
1332                                 goto out_res;
1333                 }
1334
1335                 check_net_priv(ref_cmd, page);
1336
1337                 size -= res;
1338
1339                 if (res == sendsize) {
1340                         idx++;
1341                         EXTRACHECKS_BUG_ON(idx >= ref_cmd->sg_cnt);
1342                         page = sg_page(&sg[idx]);
1343                         length = sg[idx].length;
1344                         offset = sg[idx].offset;
1345                 } else {
1346                         offset += res;
1347                         sendsize -= res;
1348                         goto retry1;
1349                 }
1350         }
1351
1352 out_off:
1353         conn->write_offset += sg_size - size;
1354
1355 out_iov:
1356         conn->write_size = size;
1357         if ((saved_size == size) && res == -EAGAIN)
1358                 goto out_put;
1359
1360         res = saved_size - size;
1361
1362 out_put:
1363         if (do_put)
1364                 __iscsi_put_page_callback(ref_cmd);
1365
1366 out:
1367         TRACE_EXIT_RES(res);
1368         return res;
1369
1370 out_res:
1371         check_net_priv(ref_cmd, page);
1372         if (res == -EAGAIN)
1373                 goto out_off;
1374         /* else go through */
1375
1376 out_err:
1377 #ifndef CONFIG_SCST_DEBUG
1378         if (!conn->closing)
1379 #endif
1380         {
1381                 PRINT_ERROR("error %d at sid:cid %#Lx:%u, cmnd %p", res,
1382                             (long long unsigned int)conn->session->sid,
1383                             conn->cid, conn->write_cmnd);
1384         }
1385         if (ref_cmd_to_parent &&
1386             ((ref_cmd->scst_cmd != NULL) || (ref_cmd->scst_aen != NULL))) {
1387                 if (ref_cmd->scst_state == ISCSI_CMD_STATE_AEN)
1388                         scst_set_aen_delivery_status(ref_cmd->scst_aen,
1389                                 SCST_AEN_RES_FAILED);
1390                 else
1391                         scst_set_delivery_status(ref_cmd->scst_cmd,
1392                                 SCST_CMD_DELIVERY_FAILED);
1393         }
1394         goto out_put;
1395 }
1396
1397 static int exit_tx(struct iscsi_conn *conn, int res)
1398 {
1399         iscsi_extracheck_is_wr_thread(conn);
1400
1401         switch (res) {
1402         case -EAGAIN:
1403         case -ERESTARTSYS:
1404                 res = 0;
1405                 break;
1406         default:
1407 #ifndef CONFIG_SCST_DEBUG
1408                 if (!conn->closing)
1409 #endif
1410                 {
1411                         PRINT_ERROR("Sending data failed: initiator %s, "
1412                                 "write_size %d, write_state %d, res %d",
1413                                 conn->session->initiator_name,
1414                                 conn->write_size,
1415                                 conn->write_state, res);
1416                 }
1417                 conn->write_state = TX_END;
1418                 conn->write_size = 0;
1419                 mark_conn_closed(conn);
1420                 break;
1421         }
1422         return res;
1423 }
1424
1425 static int tx_ddigest(struct iscsi_cmnd *cmnd, int state)
1426 {
1427         int res, rest = cmnd->conn->write_size;
1428         struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
1429         struct kvec iov;
1430
1431         iscsi_extracheck_is_wr_thread(cmnd->conn);
1432
1433         TRACE_DBG("Sending data digest %x (cmd %p)", cmnd->ddigest, cmnd);
1434
1435         iov.iov_base = (char *)(&cmnd->ddigest) + (sizeof(u32) - rest);
1436         iov.iov_len = rest;
1437
1438         res = kernel_sendmsg(cmnd->conn->sock, &msg, &iov, 1, rest);
1439         if (res > 0) {
1440                 cmnd->conn->write_size -= res;
1441                 if (!cmnd->conn->write_size)
1442                         cmnd->conn->write_state = state;
1443         } else
1444                 res = exit_tx(cmnd->conn, res);
1445
1446         return res;
1447 }
1448
1449 static void init_tx_hdigest(struct iscsi_cmnd *cmnd)
1450 {
1451         struct iscsi_conn *conn = cmnd->conn;
1452         struct iovec *iop;
1453
1454         iscsi_extracheck_is_wr_thread(conn);
1455
1456         digest_tx_header(cmnd);
1457
1458         sBUG_ON(conn->write_iop_used >=
1459                 (signed)(sizeof(conn->write_iov)/sizeof(conn->write_iov[0])));
1460
1461         iop = &conn->write_iop[conn->write_iop_used];
1462         conn->write_iop_used++;
1463         iop->iov_base = (void __force __user *)&(cmnd->hdigest);
1464         iop->iov_len = sizeof(u32);
1465         conn->write_size += sizeof(u32);
1466
1467         return;
1468 }
1469
1470 static int tx_padding(struct iscsi_cmnd *cmnd, int state)
1471 {
1472         int res, rest = cmnd->conn->write_size;
1473         struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
1474         struct kvec iov;
1475         static const uint32_t padding;
1476
1477         iscsi_extracheck_is_wr_thread(cmnd->conn);
1478
1479         TRACE_DBG("Sending %d padding bytes (cmd %p)", rest, cmnd);
1480
1481         iov.iov_base = (char *)(&padding) + (sizeof(uint32_t) - rest);
1482         iov.iov_len = rest;
1483
1484         res = kernel_sendmsg(cmnd->conn->sock, &msg, &iov, 1, rest);
1485         if (res > 0) {
1486                 cmnd->conn->write_size -= res;
1487                 if (!cmnd->conn->write_size)
1488                         cmnd->conn->write_state = state;
1489         } else
1490                 res = exit_tx(cmnd->conn, res);
1491
1492         return res;
1493 }
1494
1495 static int iscsi_do_send(struct iscsi_conn *conn, int state)
1496 {
1497         int res;
1498
1499         iscsi_extracheck_is_wr_thread(conn);
1500
1501         res = write_data(conn);
1502         if (res > 0) {
1503                 if (!conn->write_size)
1504                         conn->write_state = state;
1505         } else
1506                 res = exit_tx(conn, res);
1507
1508         return res;
1509 }
1510
1511 /*
1512  * No locks, conn is wr processing.
1513  *
1514  * IMPORTANT! Connection conn must be protected by additional conn_get()
1515  * upon entrance in this function, because otherwise it could be destroyed
1516  * inside as a result of cmnd release.
1517  */
1518 int iscsi_send(struct iscsi_conn *conn)
1519 {
1520         struct iscsi_cmnd *cmnd = conn->write_cmnd;
1521         int ddigest, res = 0;
1522
1523         TRACE_ENTRY();
1524
1525         TRACE_DBG("conn %p, write_cmnd %p", conn, cmnd);
1526
1527         iscsi_extracheck_is_wr_thread(conn);
1528
1529         ddigest = conn->ddigest_type != DIGEST_NONE ? 1 : 0;
1530
1531         switch (conn->write_state) {
1532         case TX_INIT:
1533                 sBUG_ON(cmnd != NULL);
1534                 cmnd = conn->write_cmnd = iscsi_get_send_cmnd(conn);
1535                 if (!cmnd)
1536                         goto out;
1537                 cmnd_tx_start(cmnd);
1538                 if (!(conn->hdigest_type & DIGEST_NONE))
1539                         init_tx_hdigest(cmnd);
1540                 conn->write_state = TX_BHS_DATA;
1541         case TX_BHS_DATA:
1542                 res = iscsi_do_send(conn, cmnd->pdu.datasize ?
1543                                         TX_INIT_PADDING : TX_END);
1544                 if (res <= 0 || conn->write_state != TX_INIT_PADDING)
1545                         break;
1546         case TX_INIT_PADDING:
1547                 cmnd->conn->write_size = ((cmnd->pdu.datasize + 3) & -4) -
1548                                                 cmnd->pdu.datasize;
1549                 if (cmnd->conn->write_size != 0)
1550                         conn->write_state = TX_PADDING;
1551                 else if (ddigest)
1552                         conn->write_state = TX_INIT_DDIGEST;
1553                  else
1554                         conn->write_state = TX_END;
1555                 break;
1556         case TX_PADDING:
1557                 res = tx_padding(cmnd, ddigest ? TX_INIT_DDIGEST : TX_END);
1558                 if (res <= 0 || conn->write_state != TX_INIT_DDIGEST)
1559                         break;
1560         case TX_INIT_DDIGEST:
1561                 cmnd->conn->write_size = sizeof(u32);
1562                 conn->write_state = TX_DDIGEST;
1563         case TX_DDIGEST:
1564                 res = tx_ddigest(cmnd, TX_END);
1565                 break;
1566         default:
1567                 PRINT_CRIT_ERROR("%d %d %x", res, conn->write_state,
1568                         cmnd_opcode(cmnd));
1569                 sBUG();
1570         }
1571
1572         if (res == 0)
1573                 goto out;
1574
1575         if (conn->write_state != TX_END)
1576                 goto out;
1577
1578         if (unlikely(conn->write_size)) {
1579                 PRINT_CRIT_ERROR("%d %x %u", res, cmnd_opcode(cmnd),
1580                         conn->write_size);
1581                 sBUG();
1582         }
1583         cmnd_tx_end(cmnd);
1584
1585         rsp_cmnd_release(cmnd);
1586
1587         conn->write_cmnd = NULL;
1588         conn->write_state = TX_INIT;
1589
1590 out:
1591         TRACE_EXIT_RES(res);
1592         return res;
1593 }
1594
1595 /* No locks, conn is wr processing.
1596  *
1597  * IMPORTANT! Connection conn must be protected by additional conn_get()
1598  * upon entrance in this function, because otherwise it could be destroyed
1599  * inside as a result of iscsi_send(), which releases sent commands.
1600  */
1601 static int process_write_queue(struct iscsi_conn *conn)
1602 {
1603         int res = 0;
1604
1605         TRACE_ENTRY();
1606
1607         if (likely(test_write_ready(conn)))
1608                 res = iscsi_send(conn);
1609
1610         TRACE_EXIT_RES(res);
1611         return res;
1612 }
1613
1614 /*
1615  * Called under iscsi_wr_lock and BHs disabled, but will drop it inside,
1616  * then reaquire.
1617  */
1618 static void scst_do_job_wr(void)
1619         __acquires(&iscsi_wr_lock)
1620         __releases(&iscsi_wr_lock)
1621 {
1622         TRACE_ENTRY();
1623
1624         /*
1625          * We delete/add to tail connections to maintain fairness between them.
1626          */
1627
1628         while (!list_empty(&iscsi_wr_list)) {
1629                 int rc;
1630                 struct iscsi_conn *conn = list_entry(iscsi_wr_list.next,
1631                         typeof(*conn), wr_list_entry);
1632
1633                 TRACE_DBG("conn %p, wr_state %x, wr_space_ready %d, "
1634                         "write ready %d", conn, conn->wr_state,
1635                         conn->wr_space_ready, test_write_ready(conn));
1636
1637                 list_del(&conn->wr_list_entry);
1638
1639                 sBUG_ON(conn->wr_state == ISCSI_CONN_WR_STATE_PROCESSING);
1640
1641                 conn->wr_state = ISCSI_CONN_WR_STATE_PROCESSING;
1642                 conn->wr_space_ready = 0;
1643 #ifdef CONFIG_SCST_EXTRACHECKS
1644                 conn->wr_task = current;
1645 #endif
1646                 spin_unlock_bh(&iscsi_wr_lock);
1647
1648                 conn_get(conn);
1649
1650                 rc = process_write_queue(conn);
1651
1652                 spin_lock_bh(&iscsi_wr_lock);
1653 #ifdef CONFIG_SCST_EXTRACHECKS
1654                 conn->wr_task = NULL;
1655 #endif
1656                 if ((rc == -EAGAIN) && !conn->wr_space_ready) {
1657                         conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT;
1658                         goto cont;
1659                 }
1660
1661                 if (test_write_ready(conn)) {
1662                         list_add_tail(&conn->wr_list_entry, &iscsi_wr_list);
1663                         conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST;
1664                 } else
1665                         conn->wr_state = ISCSI_CONN_WR_STATE_IDLE;
1666
1667 cont:
1668                 conn_put(conn);
1669         }
1670
1671         TRACE_EXIT();
1672         return;
1673 }
1674
1675 static inline int test_wr_list(void)
1676 {
1677         int res = !list_empty(&iscsi_wr_list) ||
1678                   unlikely(kthread_should_stop());
1679         return res;
1680 }
1681
1682 int istwr(void *arg)
1683 {
1684         TRACE_ENTRY();
1685
1686         PRINT_INFO("Write thread started, PID %d", current->pid);
1687
1688         current->flags |= PF_NOFREEZE;
1689
1690         spin_lock_bh(&iscsi_wr_lock);
1691         while (!kthread_should_stop()) {
1692                 wait_queue_t wait;
1693                 init_waitqueue_entry(&wait, current);
1694
1695                 if (!test_wr_list()) {
1696                         add_wait_queue_exclusive_head(&iscsi_wr_waitQ, &wait);
1697                         for (;;) {
1698                                 set_current_state(TASK_INTERRUPTIBLE);
1699                                 if (test_wr_list())
1700                                         break;
1701                                 spin_unlock_bh(&iscsi_wr_lock);
1702                                 schedule();
1703                                 spin_lock_bh(&iscsi_wr_lock);
1704                         }
1705                         set_current_state(TASK_RUNNING);
1706                         remove_wait_queue(&iscsi_wr_waitQ, &wait);
1707                 }
1708                 scst_do_job_wr();
1709         }
1710         spin_unlock_bh(&iscsi_wr_lock);
1711
1712         /*
1713          * If kthread_should_stop() is true, we are guaranteed to be
1714          * on the module unload, so iscsi_wr_list must be empty.
1715          */
1716         sBUG_ON(!list_empty(&iscsi_wr_list));
1717
1718         PRINT_INFO("Write thread PID %d finished", current->pid);
1719
1720         TRACE_EXIT();
1721         return 0;
1722 }