4 * Copyright (C) 2004 - 2005 FUJITA Tomonori <tomof@acm.org>
5 * Copyright (C) 2007 - 2009 Vladislav Bolkhovitin
6 * Copyright (C) 2007 - 2009 ID7 Ltd.
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
18 #include <linux/sched.h>
19 #include <linux/file.h>
20 #include <linux/kthread.h>
21 #include <asm/ioctls.h>
22 #include <linux/delay.h>
29 RX_INIT_BHS, /* Must be zero for better "switch" optimiztion. */
45 TX_INIT = 0, /* Must be zero for better "switch" optimiztion. */
54 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
55 static void iscsi_check_closewait(struct iscsi_conn *conn)
57 struct iscsi_cmnd *cmnd;
61 TRACE_CONN_CLOSE_DBG("conn %p, sk_state %d", conn,
62 conn->sock->sk->sk_state);
64 if (conn->sock->sk->sk_state != TCP_CLOSE) {
65 TRACE_CONN_CLOSE_DBG("conn %p, skipping", conn);
70 * No data are going to be sent, so all queued buffers can be freed
71 * now. In many cases TCP does that only in close(), but we can't rely
72 * on user space on calling it.
76 spin_lock_bh(&conn->cmd_list_lock);
77 list_for_each_entry(cmnd, &conn->cmd_list, cmd_list_entry) {
78 struct iscsi_cmnd *rsp;
81 TRACE_CONN_CLOSE_DBG("cmd %p, scst_state %x, data_waiting %d, "
82 "ref_cnt %d, parent_req %p, net_ref_cnt %d, sg %p",
83 cmnd, cmnd->scst_state, cmnd->data_waiting,
84 atomic_read(&cmnd->ref_cnt), cmnd->parent_req,
85 atomic_read(&cmnd->net_ref_cnt), cmnd->sg);
87 sBUG_ON(cmnd->parent_req != NULL);
89 if (cmnd->sg != NULL) {
92 if (cmnd_get_check(cmnd))
95 for (i = 0; i < cmnd->sg_cnt; i++) {
96 struct page *page = sg_page(&cmnd->sg[i]);
97 TRACE_CONN_CLOSE_DBG("page %p, net_priv %p, "
98 "_count %d", page, page->net_priv,
99 atomic_read(&page->_count));
101 if (page->net_priv != NULL) {
103 spin_unlock_bh(&conn->cmd_list_lock);
106 while (page->net_priv != NULL)
107 iscsi_put_page_callback(page);
116 spin_lock_bh(&cmnd->rsp_cmd_lock);
117 list_for_each_entry(rsp, &cmnd->rsp_cmd_list,
118 rsp_cmd_list_entry) {
119 TRACE_CONN_CLOSE_DBG(" rsp %p, ref_cnt %d, "
120 "net_ref_cnt %d, sg %p",
121 rsp, atomic_read(&rsp->ref_cnt),
122 atomic_read(&rsp->net_ref_cnt), rsp->sg);
124 if ((rsp->sg != cmnd->sg) && (rsp->sg != NULL)) {
127 if (cmnd_get_check(rsp))
130 for (i = 0; i < rsp->sg_cnt; i++) {
132 sg_page(&rsp->sg[i]);
133 TRACE_CONN_CLOSE_DBG(
134 " page %p, net_priv %p, "
136 page, page->net_priv,
137 atomic_read(&page->_count));
139 if (page->net_priv != NULL) {
141 spin_unlock_bh(&cmnd->rsp_cmd_lock);
142 spin_unlock_bh(&conn->cmd_list_lock);
145 while (page->net_priv != NULL)
146 iscsi_put_page_callback(page);
155 spin_unlock_bh(&cmnd->rsp_cmd_lock);
157 spin_unlock_bh(&conn->cmd_list_lock);
164 static inline void iscsi_check_closewait(struct iscsi_conn *conn) {};
167 static void free_pending_commands(struct iscsi_conn *conn)
169 struct iscsi_session *session = conn->session;
170 struct list_head *pending_list = &session->pending_list;
172 struct iscsi_cmnd *cmnd;
174 spin_lock(&session->sn_lock);
177 list_for_each_entry(cmnd, pending_list, pending_list_entry) {
178 TRACE_CONN_CLOSE_DBG("Pending cmd %p"
179 "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
180 cmnd, conn, cmnd->pdu.bhs.sn,
181 session->exp_cmd_sn);
182 if ((cmnd->conn == conn) &&
183 (session->exp_cmd_sn == cmnd->pdu.bhs.sn)) {
184 TRACE_CONN_CLOSE_DBG("Freeing pending cmd %p",
187 list_del(&cmnd->pending_list_entry);
190 session->exp_cmd_sn++;
192 spin_unlock(&session->sn_lock);
194 req_cmnd_release_force(cmnd, 0);
197 spin_lock(&session->sn_lock);
202 spin_unlock(&session->sn_lock);
207 static void free_orphaned_pending_commands(struct iscsi_conn *conn)
209 struct iscsi_session *session = conn->session;
210 struct list_head *pending_list = &session->pending_list;
212 struct iscsi_cmnd *cmnd;
214 spin_lock(&session->sn_lock);
217 list_for_each_entry(cmnd, pending_list, pending_list_entry) {
218 TRACE_CONN_CLOSE_DBG("Pending cmd %p"
219 "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
220 cmnd, conn, cmnd->pdu.bhs.sn,
221 session->exp_cmd_sn);
222 if (cmnd->conn == conn) {
223 PRINT_ERROR("Freeing orphaned pending cmd %p",
226 list_del(&cmnd->pending_list_entry);
229 if (session->exp_cmd_sn == cmnd->pdu.bhs.sn)
230 session->exp_cmd_sn++;
232 spin_unlock(&session->sn_lock);
234 req_cmnd_release_force(cmnd, 0);
237 spin_lock(&session->sn_lock);
242 spin_unlock(&session->sn_lock);
247 #ifdef CONFIG_SCST_DEBUG
248 static void trace_conn_close(struct iscsi_conn *conn)
250 struct iscsi_cmnd *cmnd;
251 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
252 struct iscsi_cmnd *rsp;
256 if (time_after(jiffies, start_waiting + 10*HZ))
257 trace_flag |= TRACE_CONN_OC_DBG;
260 spin_lock_bh(&conn->cmd_list_lock);
261 list_for_each_entry(cmnd, &conn->cmd_list,
263 TRACE_CONN_CLOSE_DBG(
264 "cmd %p, scst_state %x, scst_cmd state %d, "
265 "data_waiting %d, ref_cnt %d, sn %u, "
266 "parent_req %p, pending %d",
267 cmnd, cmnd->scst_state,
268 (cmnd->parent_req && cmnd->scst_cmd) ?
269 cmnd->scst_cmd->state : -1,
270 cmnd->data_waiting, atomic_read(&cmnd->ref_cnt),
271 cmnd->pdu.bhs.sn, cmnd->parent_req, cmnd->pending);
272 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
273 TRACE_CONN_CLOSE_DBG("net_ref_cnt %d, sg %p",
274 atomic_read(&cmnd->net_ref_cnt),
276 if (cmnd->sg != NULL) {
278 for (i = 0; i < cmnd->sg_cnt; i++) {
279 struct page *page = sg_page(&cmnd->sg[i]);
280 TRACE_CONN_CLOSE_DBG("page %p, "
281 "net_priv %p, _count %d",
282 page, page->net_priv,
283 atomic_read(&page->_count));
287 sBUG_ON(cmnd->parent_req != NULL);
289 spin_lock_bh(&cmnd->rsp_cmd_lock);
290 list_for_each_entry(rsp, &cmnd->rsp_cmd_list,
291 rsp_cmd_list_entry) {
292 TRACE_CONN_CLOSE_DBG(" rsp %p, "
293 "ref_cnt %d, net_ref_cnt %d, sg %p",
294 rsp, atomic_read(&rsp->ref_cnt),
295 atomic_read(&rsp->net_ref_cnt), rsp->sg);
296 if (rsp->sg != cmnd->sg && rsp->sg) {
298 for (i = 0; i < rsp->sg_cnt; i++) {
299 TRACE_CONN_CLOSE_DBG(" page %p, "
300 "net_priv %p, _count %d",
301 sg_page(&rsp->sg[i]),
302 sg_page(&rsp->sg[i])->net_priv,
303 atomic_read(&sg_page(&rsp->sg[i])->
308 spin_unlock_bh(&cmnd->rsp_cmd_lock);
309 #endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
311 spin_unlock_bh(&conn->cmd_list_lock);
314 #else /* CONFIG_SCST_DEBUG */
315 static void trace_conn_close(struct iscsi_conn *conn) {}
316 #endif /* CONFIG_SCST_DEBUG */
318 void iscsi_task_mgmt_affected_cmds_done(struct scst_mgmt_cmd *scst_mcmd)
320 int fn = scst_mgmt_cmd_get_fn(scst_mcmd);
321 void *priv = scst_mgmt_cmd_get_tgt_priv(scst_mcmd);
323 TRACE_MGMT_DBG("scst_mcmd %p, fn %d, priv %p", scst_mcmd, fn, priv);
326 case SCST_NEXUS_LOSS_SESS:
327 case SCST_ABORT_ALL_TASKS_SESS:
329 struct iscsi_conn *conn = (struct iscsi_conn *)priv;
330 struct iscsi_session *sess = conn->session;
331 struct iscsi_conn *c;
333 mutex_lock(&sess->target->target_mutex);
336 * We can't mark sess as shutting down earlier, because until
337 * now it might have pending commands. Otherwise, in case of
338 * reinstatement it might lead to data corruption, because
339 * commands in being reinstated session can be executed
340 * after commands in the new session.
342 sess->sess_shutting_down = 1;
343 list_for_each_entry(c, &sess->conn_list, conn_list_entry) {
344 if (!test_bit(ISCSI_CONN_SHUTTINGDOWN, &c->conn_aflags)) {
345 sess->sess_shutting_down = 0;
350 if (conn->conn_reinst_successor != NULL) {
351 sBUG_ON(!test_bit(ISCSI_CONN_REINSTATING,
352 &conn->conn_reinst_successor->conn_aflags));
353 conn_reinst_finished(conn->conn_reinst_successor);
354 conn->conn_reinst_successor = NULL;
355 } else if (sess->sess_reinst_successor != NULL) {
356 sess_reinst_finished(sess->sess_reinst_successor);
357 sess->sess_reinst_successor = NULL;
359 mutex_unlock(&sess->target->target_mutex);
361 complete_all(&conn->ready_to_free);
373 static void close_conn(struct iscsi_conn *conn)
375 struct iscsi_session *session = conn->session;
376 struct iscsi_target *target = conn->target;
377 typeof(jiffies) start_waiting = jiffies;
378 typeof(jiffies) shut_start_waiting = start_waiting;
379 bool pending_reported = 0, wait_expired = 0, shut_expired = 0;
382 #define CONN_PENDING_TIMEOUT ((typeof(jiffies))10*HZ)
383 #define CONN_WAIT_TIMEOUT ((typeof(jiffies))10*HZ)
384 #define CONN_REG_SHUT_TIMEOUT ((typeof(jiffies))125*HZ)
385 #define CONN_DEL_SHUT_TIMEOUT ((typeof(jiffies))10*HZ)
389 TRACE_CONN_CLOSE("Closing connection %p (conn_ref_cnt=%d)", conn,
390 atomic_read(&conn->conn_ref_cnt));
392 iscsi_extracheck_is_rd_thread(conn);
394 sBUG_ON(!conn->closing);
396 if (conn->active_close) {
397 /* We want all our already send operations to complete */
398 conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
400 conn->sock->ops->shutdown(conn->sock,
401 RCV_SHUTDOWN|SEND_SHUTDOWN);
404 mutex_lock(&session->target->target_mutex);
406 set_bit(ISCSI_CONN_SHUTTINGDOWN, &conn->conn_aflags);
407 reinst = (conn->conn_reinst_successor != NULL);
409 mutex_unlock(&session->target->target_mutex);
415 /* Abort all outstanding commands */
416 rc = scst_rx_mgmt_fn_lun(session->scst_sess,
417 SCST_ABORT_ALL_TASKS_SESS, (uint8_t *)&lun, sizeof(lun),
418 SCST_NON_ATOMIC, conn);
420 PRINT_ERROR("SCST_ABORT_ALL_TASKS_SESS failed %d", rc);
425 rc = scst_rx_mgmt_fn_lun(session->scst_sess,
426 SCST_NEXUS_LOSS_SESS, (uint8_t *)&lun, sizeof(lun),
427 SCST_NON_ATOMIC, conn);
429 PRINT_ERROR("SCST_NEXUS_LOSS_SESS failed %d", rc);
432 if (conn->read_state != RX_INIT_BHS) {
433 struct iscsi_cmnd *cmnd = conn->read_cmnd;
435 if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) {
436 TRACE_DBG("Going to wait for cmnd %p to change state "
437 "from RX_CMD", cmnd);
439 wait_event(conn->read_state_waitQ,
440 cmnd->scst_state != ISCSI_CMD_STATE_RX_CMD);
442 conn->read_cmnd = NULL;
443 conn->read_state = RX_INIT_BHS;
444 req_cmnd_release_force(cmnd, 0);
449 /* ToDo: not the best way to wait */
450 while (atomic_read(&conn->conn_ref_cnt) != 0) {
451 mutex_lock(&target->target_mutex);
452 spin_lock(&session->sn_lock);
453 if (session->tm_rsp && session->tm_rsp->conn == conn) {
454 struct iscsi_cmnd *tm_rsp = session->tm_rsp;
455 TRACE(TRACE_MGMT_MINOR, "Dropping delayed TM rsp %p",
457 session->tm_rsp = NULL;
458 session->tm_active--;
459 WARN_ON(session->tm_active < 0);
460 spin_unlock(&session->sn_lock);
461 mutex_unlock(&target->target_mutex);
463 rsp_cmnd_release(tm_rsp);
465 spin_unlock(&session->sn_lock);
466 mutex_unlock(&target->target_mutex);
469 /* It's safe to check it without sn_lock */
470 if (!list_empty(&session->pending_list)) {
471 TRACE_CONN_CLOSE_DBG("Disposing pending commands on "
472 "connection %p (conn_ref_cnt=%d)", conn,
473 atomic_read(&conn->conn_ref_cnt));
475 free_pending_commands(conn);
477 if (time_after(jiffies,
478 start_waiting + CONN_PENDING_TIMEOUT)) {
479 if (!pending_reported) {
480 TRACE_CONN_CLOSE("%s",
481 "Pending wait time expired");
482 pending_reported = 1;
484 free_orphaned_pending_commands(conn);
488 iscsi_make_conn_wr_active(conn);
490 /* That's for active close only, actually */
491 if (time_after(jiffies, start_waiting + CONN_WAIT_TIMEOUT) &&
493 TRACE_CONN_CLOSE("Wait time expired (conn %p, "
495 conn, conn->sock->sk->sk_state);
496 conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN);
498 shut_start_waiting = jiffies;
501 if (wait_expired && !shut_expired &&
502 time_after(jiffies, shut_start_waiting +
503 conn->deleting ? CONN_DEL_SHUT_TIMEOUT :
504 CONN_REG_SHUT_TIMEOUT)) {
505 TRACE_CONN_CLOSE("Wait time after shutdown expired "
506 "(conn %p, sk_state %d)", conn,
507 conn->sock->sk->sk_state);
508 conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
517 TRACE_CONN_CLOSE_DBG("conn %p, conn_ref_cnt %d left, "
518 "wr_state %d, exp_cmd_sn %u",
519 conn, atomic_read(&conn->conn_ref_cnt),
520 conn->wr_state, session->exp_cmd_sn);
522 trace_conn_close(conn);
524 iscsi_check_closewait(conn);
527 write_lock_bh(&conn->sock->sk->sk_callback_lock);
528 conn->sock->sk->sk_state_change = conn->old_state_change;
529 conn->sock->sk->sk_data_ready = conn->old_data_ready;
530 conn->sock->sk->sk_write_space = conn->old_write_space;
531 write_unlock_bh(&conn->sock->sk->sk_callback_lock);
536 spin_lock_bh(&iscsi_wr_lock);
537 t = (conn->wr_state == ISCSI_CONN_WR_STATE_IDLE);
538 spin_unlock_bh(&iscsi_wr_lock);
540 if (t && (atomic_read(&conn->conn_ref_cnt) == 0))
543 TRACE_CONN_CLOSE_DBG("Waiting for wr thread (conn %p), "
544 "wr_state %x", conn, conn->wr_state);
548 wait_for_completion(&conn->ready_to_free);
550 TRACE_CONN_CLOSE("Notifying user space about closing connection %p",
552 event_send(target->tid, session->sid, conn->cid, E_CONN_CLOSE, 0);
554 #ifdef CONFIG_SCST_PROC
555 mutex_lock(&target->target_mutex);
557 mutex_unlock(&target->target_mutex);
559 kobject_put(&conn->iscsi_conn_kobj);
566 static int close_conn_thr(void *arg)
568 struct iscsi_conn *conn = (struct iscsi_conn *)arg;
572 #ifdef CONFIG_SCST_EXTRACHECKS
574 * To satisfy iscsi_extracheck_is_rd_thread() in functions called
575 * on the connection close. It is safe, because at this point conn
576 * can't be used by any other thread.
578 conn->rd_task = current;
587 static void start_close_conn(struct iscsi_conn *conn)
589 struct task_struct *t;
593 t = kthread_run(close_conn_thr, conn, "iscsi_conn_cleanup");
595 PRINT_ERROR("kthread_run() failed (%ld), closing conn %p "
596 "directly", PTR_ERR(t), conn);
604 static inline void iscsi_conn_init_read(struct iscsi_conn *conn,
605 void __user *data, size_t len)
607 conn->read_iov[0].iov_base = data;
608 conn->read_iov[0].iov_len = len;
609 conn->read_msg.msg_iov = conn->read_iov;
610 conn->read_msg.msg_iovlen = 1;
611 conn->read_size = len;
615 static void iscsi_conn_prepare_read_ahs(struct iscsi_conn *conn,
616 struct iscsi_cmnd *cmnd)
618 int asize = (cmnd->pdu.ahssize + 3) & -4;
620 /* ToDo: __GFP_NOFAIL ?? */
621 cmnd->pdu.ahs = kmalloc(asize, __GFP_NOFAIL|GFP_KERNEL);
622 sBUG_ON(cmnd->pdu.ahs == NULL);
623 iscsi_conn_init_read(conn, (void __force __user *)cmnd->pdu.ahs, asize);
627 static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn)
629 struct iscsi_cmnd *cmnd = NULL;
631 spin_lock_bh(&conn->write_list_lock);
632 if (!list_empty(&conn->write_list)) {
633 cmnd = list_entry(conn->write_list.next, struct iscsi_cmnd,
635 cmd_del_from_write_list(cmnd);
636 cmnd->write_processing_started = 1;
638 spin_unlock_bh(&conn->write_list_lock);
643 /* Returns number of bytes left to receive or <0 for error */
644 static int do_recv(struct iscsi_conn *conn)
651 EXTRACHECKS_BUG_ON(conn->read_cmnd == NULL);
653 if (unlikely(conn->closing)) {
659 * We suppose that if sock_recvmsg() returned less data than requested,
660 * then next time it will return -EAGAIN, so there's no point to call
665 memset(&msg, 0, sizeof(msg));
666 msg.msg_iov = conn->read_msg.msg_iov;
667 msg.msg_iovlen = conn->read_msg.msg_iovlen;
668 first_len = msg.msg_iov->iov_len;
672 res = sock_recvmsg(conn->sock, &msg, conn->read_size,
673 MSG_DONTWAIT | MSG_NOSIGNAL);
678 * To save some considerable effort and CPU power we
679 * suppose that TCP functions adjust
680 * conn->read_msg.msg_iov and conn->read_msg.msg_iovlen
681 * on amount of copied data. This BUG_ON is intended
682 * to catch if it is changed in the future.
684 sBUG_ON((res >= first_len) &&
685 (conn->read_msg.msg_iov->iov_len != 0));
686 conn->read_size -= res;
687 if (conn->read_size != 0) {
688 if (res >= first_len) {
689 int done = 1 + ((res - first_len) >> PAGE_SHIFT);
690 conn->read_msg.msg_iov += done;
691 conn->read_msg.msg_iovlen -= done;
694 res = conn->read_size;
698 TRACE_DBG("EAGAIN received for conn %p", conn);
699 res = conn->read_size;
702 TRACE_DBG("ERESTARTSYS received for conn %p", conn);
705 PRINT_ERROR("sock_recvmsg() failed: %d", res);
706 mark_conn_closed(conn);
718 static int iscsi_rx_check_ddigest(struct iscsi_conn *conn)
720 struct iscsi_cmnd *cmnd = conn->read_cmnd;
725 conn->read_state = RX_END;
727 if (cmnd->pdu.datasize <= 16*1024) {
729 * It's cache hot, so let's compute it inline. The
730 * choice here about what will expose more latency:
731 * possible cache misses or the digest calculation.
733 TRACE_DBG("cmnd %p, opcode %x: checking RX "
734 "ddigest inline", cmnd, cmnd_opcode(cmnd));
735 cmnd->ddigest_checked = 1;
736 res = digest_rx_data(cmnd);
737 if (unlikely(res != 0)) {
738 mark_conn_closed(conn);
741 } else if (cmnd_opcode(cmnd) == ISCSI_OP_SCSI_CMD) {
742 cmd_add_on_rx_ddigest_list(cmnd, cmnd);
744 } else if (cmnd_opcode(cmnd) != ISCSI_OP_SCSI_DATA_OUT) {
746 * We could get here only for NOP-Out. ISCSI RFC
747 * doesn't specify how to deal with digest errors in
748 * this case. Is closing connection correct?
750 TRACE_DBG("cmnd %p, opcode %x: checking NOP RX "
751 "ddigest", cmnd, cmnd_opcode(cmnd));
752 res = digest_rx_data(cmnd);
753 if (unlikely(res != 0)) {
754 mark_conn_closed(conn);
764 /* No locks, conn is rd processing */
765 static void process_read_io(struct iscsi_conn *conn, int *closed)
767 struct iscsi_cmnd *cmnd = conn->read_cmnd;
772 /* In case of error cmnd will be freed in close_conn() */
775 switch (conn->read_state) {
777 EXTRACHECKS_BUG_ON(conn->read_cmnd != NULL);
778 cmnd = cmnd_alloc(conn, NULL);
779 conn->read_cmnd = cmnd;
780 iscsi_conn_init_read(cmnd->conn,
781 (void __force __user *)&cmnd->pdu.bhs,
782 sizeof(cmnd->pdu.bhs));
783 conn->read_state = RX_BHS;
789 iscsi_cmnd_get_length(&cmnd->pdu);
790 if (cmnd->pdu.ahssize == 0) {
791 if ((conn->hdigest_type & DIGEST_NONE) == 0)
792 conn->read_state = RX_INIT_HDIGEST;
794 conn->read_state = RX_CMD_START;
796 iscsi_conn_prepare_read_ahs(conn, cmnd);
797 conn->read_state = RX_AHS;
803 res = cmnd_rx_start(cmnd);
805 if (cmnd->pdu.datasize == 0)
806 conn->read_state = RX_END;
808 conn->read_state = RX_DATA;
810 conn->read_state = RX_CMD_CONTINUE;
812 sBUG_ON(!conn->closing);
815 case RX_CMD_CONTINUE:
816 if (cmnd->scst_state == ISCSI_CMD_STATE_RX_CMD) {
817 TRACE_DBG("cmnd %p is still in RX_CMD state",
822 res = cmnd_rx_continue(cmnd);
823 if (unlikely(res != 0))
824 sBUG_ON(!conn->closing);
826 if (cmnd->pdu.datasize == 0)
827 conn->read_state = RX_END;
829 conn->read_state = RX_DATA;
836 int psz = ((cmnd->pdu.datasize + 3) & -4) - cmnd->pdu.datasize;
838 TRACE_DBG("padding %d bytes", psz);
839 iscsi_conn_init_read(conn,
840 (void __force __user *)&conn->rpadding, psz);
841 conn->read_state = RX_PADDING;
842 } else if ((conn->ddigest_type & DIGEST_NONE) != 0)
843 conn->read_state = RX_END;
845 conn->read_state = RX_INIT_DDIGEST;
850 if (unlikely(conn->read_size != 0)) {
851 PRINT_CRIT_ERROR("%d %x %d", res,
852 cmnd_opcode(cmnd), conn->read_size);
855 conn->read_cmnd = NULL;
856 conn->read_state = RX_INIT_BHS;
860 EXTRACHECKS_BUG_ON(conn->read_size != 0);
863 case RX_INIT_HDIGEST:
864 iscsi_conn_init_read(conn,
865 (void __force __user *)&cmnd->hdigest, sizeof(u32));
866 conn->read_state = RX_CHECK_HDIGEST;
869 case RX_CHECK_HDIGEST:
872 res = digest_rx_header(cmnd);
873 if (unlikely(res != 0)) {
874 PRINT_ERROR("rx header digest for "
875 "initiator %s failed (%d)",
876 conn->session->initiator_name,
878 mark_conn_closed(conn);
880 conn->read_state = RX_CMD_START;
884 case RX_INIT_DDIGEST:
885 iscsi_conn_init_read(conn,
886 (void __force __user *)&cmnd->ddigest,
888 conn->read_state = RX_CHECK_DDIGEST;
891 case RX_CHECK_DDIGEST:
892 res = iscsi_rx_check_ddigest(conn);
898 if ((conn->hdigest_type & DIGEST_NONE) == 0)
899 conn->read_state = RX_INIT_HDIGEST;
901 conn->read_state = RX_CMD_START;
908 if ((conn->ddigest_type & DIGEST_NONE) == 0)
909 conn->read_state = RX_INIT_DDIGEST;
911 conn->read_state = RX_END;
916 PRINT_CRIT_ERROR("%d %x", conn->read_state, cmnd_opcode(cmnd));
921 if (unlikely(conn->closing)) {
922 start_close_conn(conn);
931 * Called under iscsi_rd_lock and BHs disabled, but will drop it inside,
934 static void scst_do_job_rd(void)
935 __acquires(&iscsi_rd_lock)
936 __releases(&iscsi_rd_lock)
941 * We delete/add to tail connections to maintain fairness between them.
944 while (!list_empty(&iscsi_rd_list)) {
946 struct iscsi_conn *conn = list_entry(iscsi_rd_list.next,
947 typeof(*conn), rd_list_entry);
949 list_del(&conn->rd_list_entry);
951 sBUG_ON(conn->rd_state == ISCSI_CONN_RD_STATE_PROCESSING);
952 conn->rd_data_ready = 0;
953 conn->rd_state = ISCSI_CONN_RD_STATE_PROCESSING;
954 #ifdef CONFIG_SCST_EXTRACHECKS
955 conn->rd_task = current;
957 spin_unlock_bh(&iscsi_rd_lock);
959 process_read_io(conn, &closed);
961 spin_lock_bh(&iscsi_rd_lock);
966 #ifdef CONFIG_SCST_EXTRACHECKS
967 conn->rd_task = NULL;
969 if (conn->rd_data_ready) {
970 list_add_tail(&conn->rd_list_entry, &iscsi_rd_list);
971 conn->rd_state = ISCSI_CONN_RD_STATE_IN_LIST;
973 conn->rd_state = ISCSI_CONN_RD_STATE_IDLE;
980 static inline int test_rd_list(void)
982 int res = !list_empty(&iscsi_rd_list) ||
983 unlikely(kthread_should_stop());
991 PRINT_INFO("Read thread started, PID %d", current->pid);
993 current->flags |= PF_NOFREEZE;
995 spin_lock_bh(&iscsi_rd_lock);
996 while (!kthread_should_stop()) {
998 init_waitqueue_entry(&wait, current);
1000 if (!test_rd_list()) {
1001 add_wait_queue_exclusive_head(&iscsi_rd_waitQ, &wait);
1003 set_current_state(TASK_INTERRUPTIBLE);
1006 spin_unlock_bh(&iscsi_rd_lock);
1008 spin_lock_bh(&iscsi_rd_lock);
1010 set_current_state(TASK_RUNNING);
1011 remove_wait_queue(&iscsi_rd_waitQ, &wait);
1015 spin_unlock_bh(&iscsi_rd_lock);
1018 * If kthread_should_stop() is true, we are guaranteed to be
1019 * on the module unload, so iscsi_rd_list must be empty.
1021 sBUG_ON(!list_empty(&iscsi_rd_list));
1023 PRINT_INFO("Read thread PID %d finished", current->pid);
1029 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1030 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd)
1034 TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d",
1035 cmd, atomic_read(&cmd->net_ref_cnt)+1);
1037 v = atomic_inc_return(&cmd->net_ref_cnt);
1039 TRACE_NET_PAGE("getting cmd %p", cmd);
1045 void iscsi_get_page_callback(struct page *page)
1047 struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
1049 TRACE_NET_PAGE("page %p, _count %d", page,
1050 atomic_read(&page->_count));
1052 __iscsi_get_page_callback(cmd);
1056 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd)
1058 TRACE_NET_PAGE("cmd %p, new net_ref_cnt %d", cmd,
1059 atomic_read(&cmd->net_ref_cnt)-1);
1061 if (atomic_dec_and_test(&cmd->net_ref_cnt)) {
1062 int i, sg_cnt = cmd->sg_cnt;
1063 for (i = 0; i < sg_cnt; i++) {
1064 struct page *page = sg_page(&cmd->sg[i]);
1065 TRACE_NET_PAGE("Clearing page %p", page);
1066 if (page->net_priv == cmd)
1067 page->net_priv = NULL;
1074 void iscsi_put_page_callback(struct page *page)
1076 struct iscsi_cmnd *cmd = (struct iscsi_cmnd *)page->net_priv;
1078 TRACE_NET_PAGE("page %p, _count %d", page,
1079 atomic_read(&page->_count));
1081 __iscsi_put_page_callback(cmd);
1085 static void check_net_priv(struct iscsi_cmnd *cmd, struct page *page)
1087 if ((atomic_read(&cmd->net_ref_cnt) == 1) && (page->net_priv == cmd)) {
1088 TRACE_DBG("sendpage() not called get_page(), zeroing net_priv "
1089 "%p (page %p)", page->net_priv, page);
1090 page->net_priv = NULL;
1095 static inline void check_net_priv(struct iscsi_cmnd *cmd, struct page *page) {}
1096 static inline void __iscsi_get_page_callback(struct iscsi_cmnd *cmd) {}
1097 static inline void __iscsi_put_page_callback(struct iscsi_cmnd *cmd) {}
1100 /* This is partially taken from the Ardis code. */
1101 static int write_data(struct iscsi_conn *conn)
1106 struct socket *sock;
1107 ssize_t (*sock_sendpage)(struct socket *, struct page *, int, size_t,
1109 ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
1110 struct iscsi_cmnd *write_cmnd = conn->write_cmnd;
1111 struct iscsi_cmnd *ref_cmd;
1113 struct scatterlist *sg;
1114 int saved_size, size, sendsize;
1115 int length, offset, idx;
1116 int flags, res, count, sg_size;
1117 bool do_put = false, ref_cmd_to_parent;
1121 iscsi_extracheck_is_wr_thread(conn);
1123 if (write_cmnd->own_sg == 0) {
1124 ref_cmd = write_cmnd->parent_req;
1125 ref_cmd_to_parent = true;
1127 ref_cmd = write_cmnd;
1128 ref_cmd_to_parent = false;
1131 if (!ref_cmd->on_written_list) {
1132 TRACE_DBG("Adding cmd %p to conn %p written_list", ref_cmd,
1134 spin_lock_bh(&conn->write_list_lock);
1135 ref_cmd->on_written_list = 1;
1136 ref_cmd->write_timeout = jiffies + ISCSI_RSP_TIMEOUT;
1137 list_add_tail(&ref_cmd->written_list_entry,
1138 &conn->written_list);
1139 spin_unlock_bh(&conn->write_list_lock);
1142 if (!timer_pending(&conn->rsp_timer)) {
1143 sBUG_ON(!ref_cmd->on_written_list);
1144 spin_lock_bh(&conn->write_list_lock);
1145 if (likely(!timer_pending(&conn->rsp_timer))) {
1146 TRACE_DBG("Starting timer on %ld (conn %p)",
1147 ref_cmd->write_timeout, conn);
1148 conn->rsp_timer.expires = ref_cmd->write_timeout;
1149 add_timer(&conn->rsp_timer);
1151 spin_unlock_bh(&conn->write_list_lock);
1155 size = conn->write_size;
1157 iop = conn->write_iop;
1158 count = conn->write_iop_used;
1165 sBUG_ON(count > (signed)(sizeof(conn->write_iov) /
1166 sizeof(conn->write_iov[0])));
1170 res = vfs_writev(file,
1171 (struct iovec __force __user *)iop,
1174 TRACE_WRITE("sid %#Lx, cid %u, res %d, iov_len %ld",
1175 (long long unsigned int)conn->session->sid,
1176 conn->cid, res, (long)iop->iov_len);
1177 if (unlikely(res <= 0)) {
1178 if (res == -EAGAIN) {
1179 conn->write_iop = iop;
1180 conn->write_iop_used = count;
1182 } else if (res == -EINTR)
1189 while ((typeof(rest))iop->iov_len <= rest && rest) {
1190 rest -= iop->iov_len;
1195 conn->write_iop = NULL;
1196 conn->write_iop_used = 0;
1201 sBUG_ON(iop > conn->write_iov + sizeof(conn->write_iov)
1202 /sizeof(conn->write_iov[0]));
1203 iop->iov_base += rest;
1204 iop->iov_len -= rest;
1208 sg = write_cmnd->sg;
1209 if (unlikely(sg == NULL)) {
1210 PRINT_INFO("WARNING: Data missed (cmd %p)!", write_cmnd);
1215 /* To protect from too early transfer completion race */
1216 __iscsi_get_page_callback(ref_cmd);
1221 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1222 sock_sendpage = sock->ops->sendpage;
1224 if ((write_cmnd->parent_req->scst_cmd != NULL) &&
1225 scst_cmd_get_dh_data_buff_alloced(write_cmnd->parent_req->scst_cmd))
1226 sock_sendpage = sock_no_sendpage;
1228 sock_sendpage = sock->ops->sendpage;
1231 flags = MSG_DONTWAIT;
1234 if (sg != write_cmnd->rsp_sg) {
1235 offset = conn->write_offset + sg[0].offset;
1236 idx = offset >> PAGE_SHIFT;
1237 offset &= ~PAGE_MASK;
1238 length = min(size, (int)PAGE_SIZE - offset);
1239 TRACE_WRITE("write_offset %d, sg_size %d, idx %d, offset %d, "
1240 "length %d", conn->write_offset, sg_size, idx, offset,
1244 offset = conn->write_offset;
1245 while (offset >= sg[idx].length) {
1246 offset -= sg[idx].length;
1249 length = sg[idx].length - offset;
1250 offset += sg[idx].offset;
1251 sock_sendpage = sock_no_sendpage;
1252 TRACE_WRITE("rsp_sg: write_offset %d, sg_size %d, idx %d, "
1253 "offset %d, length %d", conn->write_offset, sg_size,
1254 idx, offset, length);
1256 page = sg_page(&sg[idx]);
1259 sendpage = sock_sendpage;
1261 #if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
1263 static DEFINE_SPINLOCK(net_priv_lock);
1264 spin_lock(&net_priv_lock);
1265 if (unlikely(page->net_priv != NULL)) {
1266 if (page->net_priv != ref_cmd) {
1268 * This might happen if user space
1269 * supplies to scst_user the same
1270 * pages in different commands or in
1271 * case of zero-copy FILEIO, when
1272 * several initiators request the same
1273 * data simultaneously.
1275 TRACE_DBG("net_priv isn't NULL and != "
1276 "ref_cmd (write_cmnd %p, ref_cmd "
1277 "%p, sg %p, idx %d, page %p, "
1279 write_cmnd, ref_cmd, sg, idx,
1280 page, page->net_priv);
1281 sendpage = sock_no_sendpage;
1284 page->net_priv = ref_cmd;
1285 spin_unlock(&net_priv_lock);
1288 sendsize = min(size, length);
1289 if (size <= sendsize) {
1291 res = sendpage(sock, page, offset, size, flags);
1292 TRACE_WRITE("Final %s sid %#Lx, cid %u, res %d (page "
1293 "index %lu, offset %u, size %u, cmd %p, "
1294 "page %p)", (sendpage != sock_no_sendpage) ?
1295 "sendpage" : "sock_no_sendpage",
1296 (long long unsigned int)conn->session->sid,
1297 conn->cid, res, page->index,
1298 offset, size, write_cmnd, page);
1299 if (unlikely(res <= 0)) {
1306 check_net_priv(ref_cmd, page);
1308 conn->write_size = 0;
1319 res = sendpage(sock, page, offset, sendsize, flags | MSG_MORE);
1320 TRACE_WRITE("%s sid %#Lx, cid %u, res %d (page index %lu, "
1321 "offset %u, sendsize %u, size %u, cmd %p, page %p)",
1322 (sendpage != sock_no_sendpage) ? "sendpage" :
1324 (unsigned long long)conn->session->sid, conn->cid,
1325 res, page->index, offset, sendsize, size,
1327 if (unlikely(res <= 0)) {
1334 check_net_priv(ref_cmd, page);
1338 if (res == sendsize) {
1340 EXTRACHECKS_BUG_ON(idx >= ref_cmd->sg_cnt);
1341 page = sg_page(&sg[idx]);
1342 length = sg[idx].length;
1343 offset = sg[idx].offset;
1352 conn->write_offset += sg_size - size;
1355 conn->write_size = size;
1356 if ((saved_size == size) && res == -EAGAIN)
1359 res = saved_size - size;
1363 __iscsi_put_page_callback(ref_cmd);
1366 TRACE_EXIT_RES(res);
1370 check_net_priv(ref_cmd, page);
1373 /* else go through */
1376 #ifndef CONFIG_SCST_DEBUG
1380 PRINT_ERROR("error %d at sid:cid %#Lx:%u, cmnd %p", res,
1381 (long long unsigned int)conn->session->sid,
1382 conn->cid, conn->write_cmnd);
1384 if (ref_cmd_to_parent &&
1385 ((ref_cmd->scst_cmd != NULL) || (ref_cmd->scst_aen != NULL))) {
1386 if (ref_cmd->scst_state == ISCSI_CMD_STATE_AEN)
1387 scst_set_aen_delivery_status(ref_cmd->scst_aen,
1388 SCST_AEN_RES_FAILED);
1390 scst_set_delivery_status(ref_cmd->scst_cmd,
1391 SCST_CMD_DELIVERY_FAILED);
1396 static int exit_tx(struct iscsi_conn *conn, int res)
1398 iscsi_extracheck_is_wr_thread(conn);
1406 #ifndef CONFIG_SCST_DEBUG
1410 PRINT_ERROR("Sending data failed: initiator %s, "
1411 "write_size %d, write_state %d, res %d",
1412 conn->session->initiator_name,
1414 conn->write_state, res);
1416 conn->write_state = TX_END;
1417 conn->write_size = 0;
1418 mark_conn_closed(conn);
1424 static int tx_ddigest(struct iscsi_cmnd *cmnd, int state)
1426 int res, rest = cmnd->conn->write_size;
1427 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
1430 iscsi_extracheck_is_wr_thread(cmnd->conn);
1432 TRACE_DBG("Sending data digest %x (cmd %p)", cmnd->ddigest, cmnd);
1434 iov.iov_base = (char *)(&cmnd->ddigest) + (sizeof(u32) - rest);
1437 res = kernel_sendmsg(cmnd->conn->sock, &msg, &iov, 1, rest);
1439 cmnd->conn->write_size -= res;
1440 if (!cmnd->conn->write_size)
1441 cmnd->conn->write_state = state;
1443 res = exit_tx(cmnd->conn, res);
1448 static void init_tx_hdigest(struct iscsi_cmnd *cmnd)
1450 struct iscsi_conn *conn = cmnd->conn;
1453 iscsi_extracheck_is_wr_thread(conn);
1455 digest_tx_header(cmnd);
1457 sBUG_ON(conn->write_iop_used >=
1458 (signed)(sizeof(conn->write_iov)/sizeof(conn->write_iov[0])));
1460 iop = &conn->write_iop[conn->write_iop_used];
1461 conn->write_iop_used++;
1462 iop->iov_base = (void __force __user *)&(cmnd->hdigest);
1463 iop->iov_len = sizeof(u32);
1464 conn->write_size += sizeof(u32);
1469 static int tx_padding(struct iscsi_cmnd *cmnd, int state)
1471 int res, rest = cmnd->conn->write_size;
1472 struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
1474 static const uint32_t padding;
1476 iscsi_extracheck_is_wr_thread(cmnd->conn);
1478 TRACE_DBG("Sending %d padding bytes (cmd %p)", rest, cmnd);
1480 iov.iov_base = (char *)(&padding) + (sizeof(uint32_t) - rest);
1483 res = kernel_sendmsg(cmnd->conn->sock, &msg, &iov, 1, rest);
1485 cmnd->conn->write_size -= res;
1486 if (!cmnd->conn->write_size)
1487 cmnd->conn->write_state = state;
1489 res = exit_tx(cmnd->conn, res);
1494 static int iscsi_do_send(struct iscsi_conn *conn, int state)
1498 iscsi_extracheck_is_wr_thread(conn);
1500 res = write_data(conn);
1502 if (!conn->write_size)
1503 conn->write_state = state;
1505 res = exit_tx(conn, res);
1511 * No locks, conn is wr processing.
1513 * IMPORTANT! Connection conn must be protected by additional conn_get()
1514 * upon entrance in this function, because otherwise it could be destroyed
1515 * inside as a result of cmnd release.
1517 int iscsi_send(struct iscsi_conn *conn)
1519 struct iscsi_cmnd *cmnd = conn->write_cmnd;
1520 int ddigest, res = 0;
1524 TRACE_DBG("conn %p, write_cmnd %p", conn, cmnd);
1526 iscsi_extracheck_is_wr_thread(conn);
1528 ddigest = conn->ddigest_type != DIGEST_NONE ? 1 : 0;
1530 switch (conn->write_state) {
1532 sBUG_ON(cmnd != NULL);
1533 cmnd = conn->write_cmnd = iscsi_get_send_cmnd(conn);
1536 cmnd_tx_start(cmnd);
1537 if (!(conn->hdigest_type & DIGEST_NONE))
1538 init_tx_hdigest(cmnd);
1539 conn->write_state = TX_BHS_DATA;
1541 res = iscsi_do_send(conn, cmnd->pdu.datasize ?
1542 TX_INIT_PADDING : TX_END);
1543 if (res <= 0 || conn->write_state != TX_INIT_PADDING)
1545 case TX_INIT_PADDING:
1546 cmnd->conn->write_size = ((cmnd->pdu.datasize + 3) & -4) -
1548 if (cmnd->conn->write_size != 0)
1549 conn->write_state = TX_PADDING;
1551 conn->write_state = TX_INIT_DDIGEST;
1553 conn->write_state = TX_END;
1556 res = tx_padding(cmnd, ddigest ? TX_INIT_DDIGEST : TX_END);
1557 if (res <= 0 || conn->write_state != TX_INIT_DDIGEST)
1559 case TX_INIT_DDIGEST:
1560 cmnd->conn->write_size = sizeof(u32);
1561 conn->write_state = TX_DDIGEST;
1563 res = tx_ddigest(cmnd, TX_END);
1566 PRINT_CRIT_ERROR("%d %d %x", res, conn->write_state,
1574 if (conn->write_state != TX_END)
1577 if (unlikely(conn->write_size)) {
1578 PRINT_CRIT_ERROR("%d %x %u", res, cmnd_opcode(cmnd),
1584 rsp_cmnd_release(cmnd);
1586 conn->write_cmnd = NULL;
1587 conn->write_state = TX_INIT;
1590 TRACE_EXIT_RES(res);
1594 /* No locks, conn is wr processing.
1596 * IMPORTANT! Connection conn must be protected by additional conn_get()
1597 * upon entrance in this function, because otherwise it could be destroyed
1598 * inside as a result of iscsi_send(), which releases sent commands.
1600 static int process_write_queue(struct iscsi_conn *conn)
1606 if (likely(test_write_ready(conn)))
1607 res = iscsi_send(conn);
1609 TRACE_EXIT_RES(res);
1614 * Called under iscsi_wr_lock and BHs disabled, but will drop it inside,
1617 static void scst_do_job_wr(void)
1618 __acquires(&iscsi_wr_lock)
1619 __releases(&iscsi_wr_lock)
1624 * We delete/add to tail connections to maintain fairness between them.
1627 while (!list_empty(&iscsi_wr_list)) {
1629 struct iscsi_conn *conn = list_entry(iscsi_wr_list.next,
1630 typeof(*conn), wr_list_entry);
1632 TRACE_DBG("conn %p, wr_state %x, wr_space_ready %d, "
1633 "write ready %d", conn, conn->wr_state,
1634 conn->wr_space_ready, test_write_ready(conn));
1636 list_del(&conn->wr_list_entry);
1638 sBUG_ON(conn->wr_state == ISCSI_CONN_WR_STATE_PROCESSING);
1640 conn->wr_state = ISCSI_CONN_WR_STATE_PROCESSING;
1641 conn->wr_space_ready = 0;
1642 #ifdef CONFIG_SCST_EXTRACHECKS
1643 conn->wr_task = current;
1645 spin_unlock_bh(&iscsi_wr_lock);
1649 rc = process_write_queue(conn);
1651 spin_lock_bh(&iscsi_wr_lock);
1652 #ifdef CONFIG_SCST_EXTRACHECKS
1653 conn->wr_task = NULL;
1655 if ((rc == -EAGAIN) && !conn->wr_space_ready) {
1656 conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT;
1660 if (test_write_ready(conn)) {
1661 list_add_tail(&conn->wr_list_entry, &iscsi_wr_list);
1662 conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST;
1664 conn->wr_state = ISCSI_CONN_WR_STATE_IDLE;
1674 static inline int test_wr_list(void)
1676 int res = !list_empty(&iscsi_wr_list) ||
1677 unlikely(kthread_should_stop());
1681 int istwr(void *arg)
1685 PRINT_INFO("Write thread started, PID %d", current->pid);
1687 current->flags |= PF_NOFREEZE;
1689 spin_lock_bh(&iscsi_wr_lock);
1690 while (!kthread_should_stop()) {
1692 init_waitqueue_entry(&wait, current);
1694 if (!test_wr_list()) {
1695 add_wait_queue_exclusive_head(&iscsi_wr_waitQ, &wait);
1697 set_current_state(TASK_INTERRUPTIBLE);
1700 spin_unlock_bh(&iscsi_wr_lock);
1702 spin_lock_bh(&iscsi_wr_lock);
1704 set_current_state(TASK_RUNNING);
1705 remove_wait_queue(&iscsi_wr_waitQ, &wait);
1709 spin_unlock_bh(&iscsi_wr_lock);
1712 * If kthread_should_stop() is true, we are guaranteed to be
1713 * on the module unload, so iscsi_wr_list must be empty.
1715 sBUG_ON(!list_empty(&iscsi_wr_list));
1717 PRINT_INFO("Write thread PID %d finished", current->pid);