- Fixed race on TM leading to crashes
authorvlnb <vlnb@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Tue, 11 Mar 2008 19:09:30 +0000 (19:09 +0000)
committervlnb <vlnb@d57e44dd-8a1f-0410-8b47-8ef2f437770f>
Tue, 11 Mar 2008 19:09:30 +0000 (19:09 +0000)
 - Connection close improved to be less agressive and honor TCP TIME_WATE state
 - Docs updates

git-svn-id: https://scst.svn.sourceforge.net/svnroot/scst/trunk@301 d57e44dd-8a1f-0410-8b47-8ef2f437770f

iscsi-scst/kernel/conn.c
iscsi-scst/kernel/iscsi.h
iscsi-scst/kernel/nthread.c
iscsi-scst/kernel/session.c
iscsi-scst/kernel/target.c
scst/README
scst/include/scsi_tgt.h
scst/src/scst_targ.c

index f7230e5..c514c1a 100644 (file)
@@ -158,11 +158,14 @@ void iscsi_make_conn_wr_active(struct iscsi_conn *conn)
        return;
 }
 
-void __mark_conn_closed(struct iscsi_conn *conn, bool force)
+void __mark_conn_closed(struct iscsi_conn *conn, int flags)
 {
        spin_lock_bh(&iscsi_rd_lock);
        conn->closing = 1;
-       conn->force_close = force;
+       if (flags & ISCSI_CONN_ACTIVE_CLOSE)
+               conn->active_close = 1;
+       if (flags & ISCSI_CONN_DELETING)
+               conn->deleting = 1;
        spin_unlock_bh(&iscsi_rd_lock);
 
        iscsi_make_conn_rd_active(conn);
@@ -170,7 +173,7 @@ void __mark_conn_closed(struct iscsi_conn *conn, bool force)
 
 void mark_conn_closed(struct iscsi_conn *conn)
 {
-       __mark_conn_closed(conn, 0);
+       __mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE);
 }
 
 static void iscsi_state_change(struct sock *sk)
@@ -184,7 +187,7 @@ static void iscsi_state_change(struct sock *sk)
                        PRINT_ERROR("Connection with initiator %s (%p) "
                                "unexpectedly closed!",
                                conn->session->initiator_name, conn);
-                       __mark_conn_closed(conn, 1);
+                       __mark_conn_closed(conn, 0);
                }
        } else
                iscsi_make_conn_rd_active(conn);
@@ -252,7 +255,7 @@ static void conn_rsp_timer_fn(unsigned long arg)
                                        "%s (SID %Lx), closing connection",
                                        conn->session->initiator_name,
                                        conn->session->sid);
-                               __mark_conn_closed(conn, 1);
+                               mark_conn_closed(conn);
                        }
                } else {
                        TRACE_DBG("Restarting timer on %ld (conn %p)",
@@ -438,7 +441,7 @@ int conn_del(struct iscsi_session *session, struct conn_info *info)
        PRINT_INFO("Deleting connection with initiator %s (%p)",
                conn->session->initiator_name, conn);
 
-       mark_conn_closed(conn);
+       __mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING);
 
        return 0;
 }
index 809de8a..b1223d1 100644 (file)
@@ -115,7 +115,7 @@ struct iscsi_session {
 
        struct completion unreg_compl;
 
-       /* Both don't need any protection */
+       /* All don't need any protection */
        char *initiator_name;
        unsigned int shutting_down:1;
        u64 sid;
@@ -185,11 +185,13 @@ struct iscsi_conn {
        int hdigest_type;
        int ddigest_type;
 
-       /* All 4 protected by iscsi_rd_lock */
+       /* All 5 protected by iscsi_rd_lock */
        unsigned short rd_state;
        unsigned short rd_data_ready:1;
-       unsigned short closing:1; /* Let's save some cache footprint by putting it here */
-       unsigned short force_close:1; /* Let's save some cache footprint by putting it here */
+       /* Let's save some cache footprint by putting them here */
+       unsigned short closing:1;
+       unsigned short active_close:1;
+       unsigned short deleting:1;
 
        struct list_head rd_list_entry;
 
@@ -353,7 +355,11 @@ extern struct iscsi_conn *conn_lookup(struct iscsi_session *, u16);
 extern int conn_add(struct iscsi_session *, struct conn_info *);
 extern int conn_del(struct iscsi_session *, struct conn_info *);
 extern int conn_free(struct iscsi_conn *);
-extern void __mark_conn_closed(struct iscsi_conn *, bool);
+
+#define ISCSI_CONN_ACTIVE_CLOSE                1
+#define ISCSI_CONN_DELETING            2
+extern void __mark_conn_closed(struct iscsi_conn *, int);
+
 extern void mark_conn_closed(struct iscsi_conn *);
 extern void iscsi_make_conn_wr_active(struct iscsi_conn *);
 extern void conn_info_show(struct seq_file *, struct iscsi_session *);
index 922ee8d..0c44798 100644 (file)
@@ -61,7 +61,8 @@ static void iscsi_check_closewait(struct iscsi_conn *conn)
 
        TRACE_ENTRY();
 
-       TRACE_CONN_CLOSE("conn %p, sk_state %d", conn, conn->sock->sk->sk_state);
+       TRACE_CONN_CLOSE_DBG("conn %p, sk_state %d", conn,
+               conn->sock->sk->sk_state);
 
        if (conn->sock->sk->sk_state != TCP_CLOSE) {
                TRACE_CONN_CLOSE_DBG("conn %p, skipping", conn);
@@ -173,7 +174,7 @@ static void iscsi_unreg_cmds_done_fn(struct scst_session *scst_sess)
 
        TRACE_ENTRY();
 
-       TRACE_CONN_CLOSE("sess %p (scst_sess %p)", sess, scst_sess);
+       TRACE_CONN_CLOSE_DBG("sess %p (scst_sess %p)", sess, scst_sess);
 
        sess->shutting_down = 1;
        complete_all(&sess->unreg_compl);
@@ -188,6 +189,8 @@ static void close_conn(struct iscsi_conn *conn)
        struct iscsi_session *session = conn->session;
        struct iscsi_target *target = conn->target;
        unsigned long start_waiting = jiffies;
+       unsigned long shut_start_waiting = start_waiting;
+       bool pending_reported = 0, wait_expired = 0, shut_expired = 0;
 
        TRACE_ENTRY();
 
@@ -198,12 +201,12 @@ static void close_conn(struct iscsi_conn *conn)
 
        sBUG_ON(!conn->closing);
 
-       if (conn->force_close) {
-               conn->sock->ops->shutdown(conn->sock,
-                       RCV_SHUTDOWN|SEND_SHUTDOWN);
-       } else {
+       if (conn->active_close) {
                /* We want all our already send operations to complete */
                conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
+       } else {
+               conn->sock->ops->shutdown(conn->sock,
+                       RCV_SHUTDOWN|SEND_SHUTDOWN);
        }
 
        /*
@@ -251,7 +254,7 @@ static void close_conn(struct iscsi_conn *conn)
                        struct list_head *pending_list = &session->pending_list;
                        int req_freed;
 
-                       TRACE_CONN_CLOSE("Disposing pending commands on "
+                       TRACE_CONN_CLOSE_DBG("Disposing pending commands on "
                                "connection %p (conn_ref_cnt=%d)", conn,
                                atomic_read(&conn->conn_ref_cnt));
 
@@ -267,13 +270,13 @@ static void close_conn(struct iscsi_conn *conn)
                                req_freed = 0;
                                list_for_each_entry(cmnd, pending_list,
                                                        pending_list_entry) {
-                                       TRACE_CONN_CLOSE("Pending cmd %p"
+                                       TRACE_CONN_CLOSE_DBG("Pending cmd %p"
                                                "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
                                                cmnd, conn, cmnd->pdu.bhs.sn,
                                                session->exp_cmd_sn);
                                        if ((cmnd->conn == conn) &&
                                            (session->exp_cmd_sn == cmnd->pdu.bhs.sn)) {
-                                               TRACE_CONN_CLOSE("Freeing pending cmd %p",
+                                               TRACE_CONN_CLOSE_DBG("Freeing pending cmd %p",
                                                        cmnd);
 
                                                list_del(&cmnd->pending_list_entry);
@@ -294,13 +297,16 @@ static void close_conn(struct iscsi_conn *conn)
                        spin_unlock(&session->sn_lock);
 
                        if (time_after(jiffies, start_waiting + 10*HZ)) {
-                               TRACE_CONN_CLOSE("%s", "Pending wait time expired");
+                               if (!pending_reported) {
+                                       TRACE_CONN_CLOSE("%s", "Pending wait time expired");
+                                       pending_reported = 1;
+                               }
                                spin_lock(&session->sn_lock);
                                do {
                                        req_freed = 0;
                                        list_for_each_entry(cmnd, pending_list,
                                                        pending_list_entry) {
-                                               TRACE_CONN_CLOSE("Pending cmd %p"
+                                               TRACE_CONN_CLOSE_DBG("Pending cmd %p"
                                                        "(conn %p, cmd_sn %u, exp_cmd_sn %u)",
                                                        cmnd, conn, cmnd->pdu.bhs.sn,
                                                        session->exp_cmd_sn);
@@ -330,22 +336,28 @@ static void close_conn(struct iscsi_conn *conn)
 
                iscsi_make_conn_wr_active(conn);
 
-               if (time_after(jiffies, start_waiting + 7*HZ)) {
+               if (time_after(jiffies, start_waiting + 10*HZ) && !wait_expired) {
                        TRACE_CONN_CLOSE("Wait time expired (conn %p, "
                                "sk_state %d)", conn, conn->sock->sk->sk_state);
                        conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN);
+                       wait_expired = 1;
+                       shut_start_waiting = jiffies;
                }
 
-               if (time_after(jiffies, start_waiting + 15*HZ)) {
-                       TRACE_CONN_CLOSE("Wait time after shutdown expired "
-                               "(conn %p, sk_state %d)", conn,
-                               conn->sock->sk->sk_state);
-                       conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
-               }
-
-               msleep(200);
+               if (conn->deleting) {
+                       if (wait_expired && !shut_expired &&
+                           time_after(jiffies, shut_start_waiting + 10*HZ)) {
+                               TRACE_CONN_CLOSE("Wait time after shutdown expired "
+                                       "(conn %p, sk_state %d)", conn,
+                                       conn->sock->sk->sk_state);
+                               conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
+                               shut_expired = 1;
+                       }
+                       msleep(200);
+               } else
+                       msleep(1000);
 
-               TRACE_CONN_CLOSE("conn %p, conn_ref_cnt %d left, wr_state %d, "
+               TRACE_CONN_CLOSE_DBG("conn %p, conn_ref_cnt %d left, wr_state %d, "
                        "exp_cmd_sn %u", conn, atomic_read(&conn->conn_ref_cnt),
                        conn->wr_state, session->exp_cmd_sn);
 #ifdef DEBUG
@@ -427,8 +439,8 @@ static void close_conn(struct iscsi_conn *conn)
                if (t && (atomic_read(&conn->conn_ref_cnt) == 0))
                        break;
 
-               TRACE_CONN_CLOSE("Waiting for wr thread (conn %p), wr_state %x",
-                       conn, conn->wr_state);
+               TRACE_CONN_CLOSE_DBG("Waiting for wr thread (conn %p), "
+                       "wr_state %x", conn, conn->wr_state);
                msleep(50);
        }
 
index bb8aa75..e3b0500 100644 (file)
@@ -177,8 +177,9 @@ static void iscsi_session_info_show(struct seq_file *seq, struct iscsi_target *t
        struct iscsi_session *session;
 
        list_for_each_entry(session, &target->session_list, session_list_entry) {
-               seq_printf(seq, "\tsid:%llu initiator:%s\n",
-                          (unsigned long long) session->sid, session->initiator_name);
+               seq_printf(seq, "\tsid:%llu initiator:%s shutting down %d\n",
+                          (unsigned long long)session->sid,
+                          session->initiator_name, session->shutting_down);
                conn_info_show(seq, session);
        }
 }
index c0515dc..cd47941 100644 (file)
@@ -240,7 +240,8 @@ void target_del_all(void)
                                                                conn_list_entry) {
                                                        TRACE_MGMT_DBG("Mark conn %p "
                                                                "closing", conn);
-                                                       mark_conn_closed(conn);
+                                                       __mark_conn_closed(conn,
+                                                               ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING);
                                                }
                                        } else {
                                                TRACE_MGMT_DBG("Freeing session %p "
index 85ba096..0f8c2be 100644 (file)
@@ -477,6 +477,13 @@ mode is used.
 For example, "echo "open disk1 /vdisks/disk1" >/proc/scsi_tgt/vdisk/vdisk"
 will open file /vdisks/disk1 as virtual FILEIO disk with name "disk1".
 
+CAUTION: If you partitioned/formatted your device with block size X, *NEVER*
+======== ever try to export and then mount it (even accidentally) with another
+         block size. Otherwise you can *instantly* damage it pretty
+        badly as well as all your data on it. Messages on initiator like:
+        "attempt to access beyond end of device" is the sign of such
+        damage.
+
 IMPORTANT: By default for performance reasons VDISK FILEIO devices use write
 =========  back caching policy. This is generally safe from the consistence of
            journaled file systems, laying over them, point of view, but
@@ -743,13 +750,17 @@ the target abort or reset messages, then your backstorage is too slow
 comparing with your target link speed and amount of simultaneously
 queued commands. On some seek intensive workloads even fast disks or
 RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
-can be as slow as 0.3 MB/s. So, simply processing of one or more
-commands takes too long time, hence initiator decides that they are
-stuck on the target and tries to recover. Particularly, it is known that
-the default amount of simultaneously queued commands (48) is sometimes
-too high if you do intensive writes from VMware on a target disk, which
-uses LVM in the snapshot mode. In this case value like 16 or even 8-10
-depending of your backstorage speed could be more appropriate.
+can be as slow as 0.3 MB/s. Another possible cause for that can be
+MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
+(check the whole thread as well).
+
+Thus, in such situations simply processing of one or more commands takes
+too long time, hence initiator decides that they are stuck on the target
+and tries to recover. Particularly, it is known that the default amount
+of simultaneously queued commands (48) is sometimes too high if you do
+intensive writes from VMware on a target disk, which uses LVM in the
+snapshot mode. In this case value like 16 or even 8-10 depending of your
+backstorage speed could be more appropriate.
 
 Unfortunately, currently SCST lacks dynamic I/O flow control, when the
 queue depth on the target is dynamically decreased/increased based on
index d2eb99f..121fc6a 100644 (file)
@@ -1288,7 +1288,6 @@ struct scst_mgmt_cmd
        unsigned int needs_unblocking:1;
        unsigned int lun_set:1;         /* set, if lun field is valid */
        unsigned int cmd_sn_set:1;      /* set, if cmd_sn field is valid */
-       unsigned int nexus_loss_check_active:1; /* set, if nexus loss check is active */
        unsigned int nexus_loss_check_done:1; /* set, if nexus loss check is done */
 
        /*
index 2f48036..4db63ea 100644 (file)
@@ -3399,7 +3399,7 @@ void scst_done_cmd_mgmt(struct scst_cmd *cmd)
 
                if (mcmd->completed) {
                        sBUG_ON(mcmd->nexus_loss_check_done);
-                       mcmd->nexus_loss_check_active = 1;
+                       mcmd->completed = 0;
                        mcmd->state = SCST_MGMT_CMD_STATE_CHECK_NEXUS_LOSS;
                        TRACE_MGMT_DBG("Adding mgmt cmd %p to active mgmt cmd "
                                "list", mcmd);
@@ -3451,7 +3451,7 @@ static void scst_finish_cmd_mgmt(struct scst_cmd *cmd)
                        continue;
                }
 
-               if (mcmd->completed && !mcmd->nexus_loss_check_active) {
+               if (mcmd->completed) {
                        mcmd->state = SCST_MGMT_CMD_STATE_DONE;
                        TRACE_MGMT_DBG("Adding mgmt cmd %p to active mgmt cmd "
                                "list", mcmd);
@@ -4354,7 +4354,6 @@ static int scst_mgmt_cmd_check_nexus_loss(struct scst_mgmt_cmd *mcmd)
        }
 
        mcmd->nexus_loss_check_done = 1;
-       mcmd->nexus_loss_check_active = 0;
 
        res = scst_set_mcmd_next_state(mcmd);