[MLX4] fix in error flow.
[mirror/winof/.git] / hw / mlx4 / kernel / bus / net / catas.c
1 /*
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include "mlx4.h"
34
35 enum {
36         MLX4_CATAS_POLL_INTERVAL        = 5 * HZ,
37 };
38
39 static DEFINE_SPINLOCK(catas_lock);
40 static LIST_HEAD(catas_list);
41
42 void mlx4_dispatch_reset_event(struct ib_device *ibdev, enum ib_event_type type)
43 {
44         unsigned long flags;
45         struct ib_event event;
46         struct ib_event_handler *handler;
47
48         event.device = ibdev;
49         event.event = type;
50
51         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
52
53         list_for_each_entry(handler, &ibdev->event_handler_list, list, struct ib_event_handler)
54         {
55                 // notify only soft reset handlers
56                 if ( handler->flags & IB_IVH_RESET_CB )
57                         // notify only those, that are not yet notified
58                         if ( !(handler->flags & IB_IVH_NOTIFIED) ) {
59                                 // notify only those that are ready to get the notification
60                                 if ( handler->flags & IB_IVH_NOTIF_READY ) {
61                                         // insure not to notify once more 
62                                         handler->flags |= IB_IVH_NOTIFIED;
63                                         handler->flags &= ~(IB_IVH_NOTIF_READY | 
64                                                 IB_IVH_RESET_D_PENDING | IB_IVH_RESET_C_PENDING);
65                                         handler->handler(handler, &event);
66                                 }
67                                 else {
68                                         // pend the notification
69                                         if (type == IB_EVENT_RESET_DRIVER) 
70                                                 handler->flags |= IB_IVH_RESET_D_PENDING;
71                                         else 
72                                                 handler->flags |= IB_IVH_RESET_C_PENDING;
73                                 }
74                         }
75         }
76
77         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
78 }
79
80 /**
81  * get_event_handlers - return list of handlers of the device
82  * @device:device
83  * @tlist:list
84  *
85  * get_event_handlers() remove all the device event handlers and put them in 'tlist'
86  */
87 static void get_event_handlers(struct ib_device *device, struct list_head *tlist)
88 {
89         unsigned long flags;
90         struct ib_event_handler *handler, *thandler;
91
92         spin_lock_irqsave(&device->event_handler_lock, &flags);
93
94         list_for_each_entry_safe(handler, thandler, &device->event_handler_list, 
95                 list, struct ib_event_handler, struct ib_event_handler)
96         {
97                 // take out only reset callbacks
98                 if ( handler->flags & IB_IVH_RESET_CB ) {
99                         list_del( &handler->list );
100                         list_add_tail( &handler->list, tlist );
101                 }
102         }
103
104         spin_unlock_irqrestore(&device->event_handler_lock, flags);
105 }
106
107
108 static void dump_err_buf(struct mlx4_dev *dev)
109 {
110         struct mlx4_priv *priv = mlx4_priv(dev);
111
112         u32 i;
113
114         mlx4_err(dev, "Internal error detected:\n");
115         for (i = 0; i < priv->fw.catas_size; ++i)
116                 mlx4_warn(dev, "  buf[%02x]: %08x\n",
117                          i, swab32(readl(priv->catas_err.map + i)));
118 }
119
120 static void
121 catas_reset_wi(
122         IN                              DEVICE_OBJECT*                          p_dev_obj,
123         IN                              struct mlx4_dev *                       dev )
124 {
125         NTSTATUS status;
126         long do_reset;
127         UNUSED_PARAM(p_dev_obj);
128
129         do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
130         if (do_reset == 0) {
131                 status = mlx4_reset(dev);
132                 if ( !NT_SUCCESS( status ) ) {
133                         mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status);
134                 }
135                 
136                 dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
137         }
138
139         dump_err_buf(dev);
140         mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
141         mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
142 }
143
144 /* polling on DISPATCH_LEVEL */
145 static void poll_catas(struct mlx4_dev *dev)
146 {
147         struct mlx4_priv *priv = mlx4_priv(dev);
148
149         if (readl(priv->catas_err.map)) {
150                 
151                 mlx4_warn(dev, "Detected catastrophic error on mdev %p\n", dev);
152                 IoQueueWorkItem( priv->catas_err.catas_work, catas_reset_wi, DelayedWorkQueue, dev );
153         } else {
154                 spin_lock_dpc(&catas_lock);
155                 if (!priv->catas_err.stop) {
156                         KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, 
157                                 0, &priv->catas_err.timer_dpc );
158                 }
159                 spin_unlock_dpc(&catas_lock);
160         }
161 }
162
163 static void  timer_dpc(
164         IN struct _KDPC  *Dpc,
165         IN PVOID  DeferredContext,
166         IN PVOID  SystemArgument1,
167         IN PVOID  SystemArgument2
168         )
169 {
170         struct mlx4_dev *dev = (struct mlx4_dev *)DeferredContext;
171         UNREFERENCED_PARAMETER(Dpc);
172         UNREFERENCED_PARAMETER(SystemArgument1);
173         UNREFERENCED_PARAMETER(SystemArgument2);
174         poll_catas( dev );
175 }
176
177 int mlx4_start_catas_poll(struct mlx4_dev *dev)
178 {
179         struct mlx4_priv *priv = mlx4_priv(dev);
180         u64 addr;
181         int err;
182
183         priv->catas_err.map = NULL;
184
185         addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
186                 priv->fw.catas_offset;
187
188         priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
189         if (!priv->catas_err.map) {
190                 mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
191                           addr);
192                 err = -ENOMEM;
193                 goto err_map;
194         }
195         
196         priv->catas_err.catas_work = IoAllocateWorkItem( dev->pdev->p_self_do );
197         if (!priv->catas_err.catas_work) {
198                 mlx4_warn(dev, "Failed to allocate work item from polling thread\n");
199                 err = -EFAULT;
200                 goto err_alloc;
201         }
202
203         priv->catas_err.stop = 0;
204         spin_lock_init( &catas_lock );
205         KeInitializeDpc(  &priv->catas_err.timer_dpc, timer_dpc, dev );
206         KeInitializeTimer( &priv->catas_err.timer );
207         priv->catas_err.interval.QuadPart  = (-10)* (__int64)MLX4_CATAS_POLL_INTERVAL;
208         KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, 
209                 0, &priv->catas_err.timer_dpc );
210         return 0;
211
212
213 err_alloc:
214         iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
215 err_map:
216         return err;
217 }
218
219 void mlx4_stop_catas_poll(struct mlx4_dev *dev)
220 {
221         struct mlx4_priv *priv = mlx4_priv(dev);
222
223         spin_lock_irq(&catas_lock);
224         if (priv->catas_err.stop) {
225                 spin_unlock_irq(&catas_lock);
226                 return;
227         }
228         priv->catas_err.stop = 1;
229         spin_unlock_irq(&catas_lock);
230
231         KeCancelTimer(&priv->catas_err.timer);
232         KeFlushQueuedDpcs();
233         if (priv->catas_err.map)
234                 iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
235
236         if (priv->catas_err.catas_work) 
237                 IoFreeWorkItem( priv->catas_err.catas_work );
238 }
239
240 static int wait4reset(struct ib_event_handler *event_handler)
241 {
242         int n_not_ready = 0;
243         unsigned long flags;
244         struct ib_event_handler *handler;
245         struct ib_device *ibdev = event_handler->device;
246
247         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
248
249         // mark this handler (=client) reset-ready
250         event_handler->flags |= IB_IVH_RESET_READY;
251
252         // check the number of still not ready client
253         
254         list_for_each_entry(handler, &ibdev->event_handler_list, list, struct ib_event_handler)
255                 if ( handler->flags & IB_IVH_RESET_CB )
256                         if ( !(handler->flags & IB_IVH_RESET_READY) ) 
257                                 ++n_not_ready;
258         
259         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
260
261         return n_not_ready;
262 }
263
264 int mlx4_reset_ready( struct ib_event_handler *event_handler )
265 {
266         unsigned long flags;
267         struct ib_device *ibdev = event_handler->device;
268
269         ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
270         
271         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
272         event_handler->flags |= IB_IVH_NOTIF_READY;
273         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
274         if (event_handler->flags & IB_IVH_RESET_D_PENDING)
275                 mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_DRIVER);
276         else
277         if (event_handler->flags & IB_IVH_RESET_C_PENDING)
278                 mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
279         return 0;
280 }
281
282 int mlx4_reset_execute( struct ib_event_handler *event_handler )
283 {
284         int err;
285         struct ib_event event;
286         struct list_head tlist;
287         struct ib_event_handler *handler, *thandler;
288         struct ib_device *ibdev = event_handler->device;
289         struct pci_dev *pdev = ibdev->dma_device->pdev;
290
291         // mark client as "ready for reset" and check whether we can do reset
292         if (wait4reset(event_handler)) {
293                 return 0;
294         }
295
296         // fully bar the device
297         ibdev->dma_device->flags |= MLX4_FLAG_RESET_STARTED;
298         
299         // get old handler list 
300         INIT_LIST_HEAD(&tlist);
301         get_event_handlers(ibdev, &tlist);
302
303         // restart the device
304         mlx4_err(pdev->dev, "\n Performing HCA restart ... \n\n");
305         err = mlx4_restart_one(pdev);
306         if (err || mlx4_is_livefish(pdev->dev)) {
307                 mlx4_err(pdev->dev, "\n HCA restart failed ! \n\n");
308                 event.event = IB_EVENT_RESET_FAILED;
309                 mlx4_err(pdev->dev, "\n HCa restart failed. \n\n");
310         }
311         else {
312                 // recreate interfaces
313                 fix_bus_ifc(pdev);
314                 event.event = IB_EVENT_RESET_END;
315                 mlx4_err(pdev->dev, "\n HCA restart finished. Notifying the clients ... \n\n");
316         }
317
318         // notify the clients
319         list_for_each_entry_safe(handler, thandler, &tlist, 
320                 list, struct ib_event_handler, struct ib_event_handler)
321         {
322                 // because 'handler' will be re-registered during the next call
323                 list_del( &handler->list );
324                 handler->handler(handler, &event);
325         }
326         
327         return err;
328 }
329
330 static void
331 card_reset_wi(
332         IN                              DEVICE_OBJECT*                          p_dev_obj,
333         IN                              struct ib_event_handler *       event_handler )
334 {
335         struct ib_device *ibdev = event_handler->device;
336
337         UNUSED_PARAM(p_dev_obj);
338         IoFreeWorkItem( event_handler->rsrv_ptr );
339
340         // notify the clients
341         mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
342 }
343
344 int mlx4_reset_request( struct ib_event_handler *event_handler )
345 {
346         struct ib_device *ibdev;
347         struct mlx4_dev *dev;
348
349         unsigned long flags;
350
351         ibdev = event_handler->device;
352         if (ibdev == NULL)
353                 return -EFAULT;
354
355         dev = ibdev->dma_device;
356         if (ibdev == NULL)
357                 return -EFAULT;
358
359         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
360
361         
362         // set device to RESET_PENDING mode
363         if (!(dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER))) {
364                 PIO_WORKITEM reset_work;
365
366                 // bar the device
367                 dev->flags |= MLX4_FLAG_RESET_CLIENT;
368
369                 // delay reset to a system thread
370                 // to allow for end of operations that are in progress
371                 reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
372                 if (!reset_work) {
373             spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
374                         mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset will not be propagated\n");
375                         return -EFAULT;
376                 }
377                 event_handler->rsrv_ptr = reset_work;
378                 IoQueueWorkItem( reset_work, card_reset_wi, DelayedWorkQueue, event_handler );
379         }
380
381         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
382
383
384         return 0;
385 }
386
387 int mlx4_reset_cb_register( struct ib_event_handler *event_handler )
388 {
389         if (mlx4_is_in_reset(event_handler->device->dma_device))
390                 return -EBUSY;
391
392         return ib_register_event_handler(event_handler);
393 }
394
395 int mlx4_reset_cb_unregister( struct ib_event_handler *event_handler )
396 {
397         return ib_unregister_event_handler(event_handler);
398 }
399
400