ddc70cdb507c6f974299d64499faa0ffdc5101ef
[mirror/winof/.git] / hw / mlx4 / kernel / bus / net / catas.c
1 /*
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include "mlx4.h"
34
35 enum {
36         MLX4_CATAS_POLL_INTERVAL        = 5 * HZ,
37 };
38
39 static DEFINE_SPINLOCK(catas_lock);
40 static LIST_HEAD(catas_list);
41
42 void mlx4_dispatch_reset_event(struct ib_device *ibdev, enum ib_event_type type)
43 {
44         unsigned long flags;
45         struct ib_event event;
46         struct ib_event_handler *handler;
47
48         event.device = ibdev;
49         event.event = type;
50
51         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
52
53         list_for_each_entry(handler, &ibdev->event_handler_list, list, struct ib_event_handler)
54         {
55                 // notify only soft reset handlers
56                 if ( handler->flags & IB_IVH_RESET_CB )
57                         // notify only those, that are not yet notified
58                         if ( !(handler->flags & IB_IVH_NOTIFIED) ) {
59                                 // notify only those that are ready to get the notification
60                                 if ( handler->flags & IB_IVH_NOTIF_READY ) {
61                                         // insure not to notify once more 
62                                         handler->flags |= IB_IVH_NOTIFIED;
63                                         handler->flags &= ~(IB_IVH_NOTIF_READY | 
64                                                 IB_IVH_RESET_D_PENDING | IB_IVH_RESET_C_PENDING);
65                                         handler->handler(handler, &event);
66                                 }
67                                 else {
68                                         // pend the notification
69                                         if (type == IB_EVENT_RESET_DRIVER) 
70                                                 handler->flags |= IB_IVH_RESET_D_PENDING;
71                                         else 
72                                                 handler->flags |= IB_IVH_RESET_C_PENDING;
73                                 }
74                         }
75         }
76
77         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
78 }
79
80 /**
81  * get_event_handlers - return list of handlers of the device
82  * @device:device
83  * @tlist:list
84  *
85  * get_event_handlers() remove all the device event handlers and put them in 'tlist'
86  */
87 static void get_event_handlers(struct ib_device *device, struct list_head *tlist)
88 {
89         unsigned long flags;
90         struct ib_event_handler *handler, *thandler;
91
92         spin_lock_irqsave(&device->event_handler_lock, &flags);
93
94         list_for_each_entry_safe(handler, thandler, &device->event_handler_list, 
95                 list, struct ib_event_handler, struct ib_event_handler)
96         {
97                 // take out only reset callbacks
98                 if ( handler->flags & IB_IVH_RESET_CB ) {
99                         list_del( &handler->list );
100                         list_add_tail( &handler->list, tlist );
101                 }
102         }
103
104         spin_unlock_irqrestore(&device->event_handler_lock, flags);
105 }
106
107
108 static void dump_err_buf(struct mlx4_dev *dev)
109 {
110         struct mlx4_priv *priv = mlx4_priv(dev);
111
112         u32 i;
113
114         mlx4_err(dev, "Internal error detected:\n");
115         for (i = 0; i < priv->fw.catas_size; ++i)
116                 mlx4_warn(dev, "  buf[%02x]: %08x\n",
117                          i, swab32(readl(priv->catas_err.map + i)));
118 }
119
120 static void
121 catas_reset_wi(
122         IN                              DEVICE_OBJECT*                          p_dev_obj,
123         IN                              struct mlx4_dev *                       dev )
124 {
125         NTSTATUS status;
126         long do_reset;
127         UNUSED_PARAM(p_dev_obj);
128
129         do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
130         if (do_reset == 0) {
131                 status = mlx4_reset(dev);
132                 if ( !NT_SUCCESS( status ) ) {
133                         mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n", status);
134                 }
135                 
136                 dev->flags |= MLX4_FLAG_RESET_DRIVER;   // bar the device
137         }
138
139         dump_err_buf(dev);
140         mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
141         if (dev->pdev->ib_dev)
142                 mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
143 }
144
145 /* polling on DISPATCH_LEVEL */
146 static void poll_catas(struct mlx4_dev *dev)
147 {
148         struct mlx4_priv *priv = mlx4_priv(dev);
149
150         if (readl(priv->catas_err.map)) {
151                 
152                 mlx4_warn(dev, "Detected catastrophic error on mdev %p\n", dev);
153                 IoQueueWorkItem( priv->catas_err.catas_work, catas_reset_wi, DelayedWorkQueue, dev );
154         } else {
155                 spin_lock_dpc(&catas_lock);
156                 if (!priv->catas_err.stop) {
157                         KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, 
158                                 0, &priv->catas_err.timer_dpc );
159                 }
160                 spin_unlock_dpc(&catas_lock);
161         }
162 }
163
164 static void  timer_dpc(
165         IN struct _KDPC  *Dpc,
166         IN PVOID  DeferredContext,
167         IN PVOID  SystemArgument1,
168         IN PVOID  SystemArgument2
169         )
170 {
171         struct mlx4_dev *dev = (struct mlx4_dev *)DeferredContext;
172         UNREFERENCED_PARAMETER(Dpc);
173         UNREFERENCED_PARAMETER(SystemArgument1);
174         UNREFERENCED_PARAMETER(SystemArgument2);
175         poll_catas( dev );
176 }
177
178 int mlx4_start_catas_poll(struct mlx4_dev *dev)
179 {
180         struct mlx4_priv *priv = mlx4_priv(dev);
181         u64 addr;
182         int err;
183
184         priv->catas_err.map = NULL;
185
186         addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
187                 priv->fw.catas_offset;
188
189         priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
190         if (!priv->catas_err.map) {
191                 mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
192                           addr);
193                 err = -ENOMEM;
194                 goto err_map;
195         }
196         
197         priv->catas_err.catas_work = IoAllocateWorkItem( dev->pdev->p_self_do );
198         if (!priv->catas_err.catas_work) {
199                 mlx4_warn(dev, "Failed to allocate work item from polling thread\n");
200                 err = -EFAULT;
201                 goto err_alloc;
202         }
203
204         priv->catas_err.stop = 0;
205         spin_lock_init( &catas_lock );
206         KeInitializeDpc(  &priv->catas_err.timer_dpc, timer_dpc, dev );
207         KeInitializeTimer( &priv->catas_err.timer );
208         priv->catas_err.interval.QuadPart  = (-10)* (__int64)MLX4_CATAS_POLL_INTERVAL;
209         KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval, 
210                 0, &priv->catas_err.timer_dpc );
211         return 0;
212
213
214 err_alloc:
215         iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
216 err_map:
217         return err;
218 }
219
220 void mlx4_stop_catas_poll(struct mlx4_dev *dev)
221 {
222         struct mlx4_priv *priv = mlx4_priv(dev);
223
224         spin_lock_irq(&catas_lock);
225         if (priv->catas_err.stop) {
226                 spin_unlock_irq(&catas_lock);
227                 return;
228         }
229         priv->catas_err.stop = 1;
230         spin_unlock_irq(&catas_lock);
231
232         KeCancelTimer(&priv->catas_err.timer);
233         KeFlushQueuedDpcs();
234         if (priv->catas_err.map)
235                 iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
236
237         if (priv->catas_err.catas_work) 
238                 IoFreeWorkItem( priv->catas_err.catas_work );
239 }
240
241 static int wait4reset(struct ib_event_handler *event_handler)
242 {
243         int n_not_ready = 0;
244         unsigned long flags;
245         struct ib_event_handler *handler;
246         struct ib_device *ibdev = event_handler->device;
247
248         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
249
250         // mark this handler (=client) reset-ready
251         event_handler->flags |= IB_IVH_RESET_READY;
252
253         // check the number of still not ready client
254         
255         list_for_each_entry(handler, &ibdev->event_handler_list, list, struct ib_event_handler)
256                 if ( handler->flags & IB_IVH_RESET_CB )
257                         if ( !(handler->flags & IB_IVH_RESET_READY) ) 
258                                 ++n_not_ready;
259         
260         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
261
262         return n_not_ready;
263 }
264
265 int mlx4_reset_ready( struct ib_event_handler *event_handler )
266 {
267         unsigned long flags;
268         struct ib_device *ibdev = event_handler->device;
269
270         ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
271         
272         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
273         event_handler->flags |= IB_IVH_NOTIF_READY;
274         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
275         if (event_handler->flags & IB_IVH_RESET_D_PENDING)
276                 mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_DRIVER);
277         else
278         if (event_handler->flags & IB_IVH_RESET_C_PENDING)
279                 mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
280         return 0;
281 }
282
283 int mlx4_reset_execute( struct ib_event_handler *event_handler )
284 {
285         int err;
286         struct ib_event event;
287         struct list_head tlist;
288         struct ib_event_handler *handler, *thandler;
289         struct ib_device *ibdev = event_handler->device;
290         struct pci_dev *pdev = ibdev->dma_device->pdev;
291
292         // mark client as "ready for reset" and check whether we can do reset
293         if (wait4reset(event_handler)) {
294                 return 0;
295         }
296
297         // fully bar the device
298         ibdev->dma_device->flags |= MLX4_FLAG_RESET_STARTED;
299         
300         // get old handler list 
301         INIT_LIST_HEAD(&tlist);
302         get_event_handlers(ibdev, &tlist);
303
304         // restart the device
305         mlx4_info(pdev->dev, "\n Performing HCA restart ... \n\n");
306         WriteEventLogEntryData( pdev->p_self_do, (ULONG)EVENT_MLX4_INFO_RESET_START, 0, 0, 0 );
307         err = mlx4_restart_one(pdev);
308         if (err || mlx4_is_livefish(pdev->dev)) {
309                 event.event = IB_EVENT_RESET_FAILED;
310                 mlx4_err(pdev->dev, "\n HCa restart failed. \n\n");
311         }
312         else {
313                 // recreate interfaces
314                 fix_bus_ifc(pdev);
315                 event.event = IB_EVENT_RESET_END;
316                 mlx4_info(pdev->dev, "\n HCA restart finished. Notifying the clients ... \n\n");
317                 WriteEventLogEntryData( pdev->p_self_do, (ULONG)EVENT_MLX4_INFO_RESET_END, 0, 0, 0 );
318         }
319
320         // notify the clients
321         list_for_each_entry_safe(handler, thandler, &tlist, 
322                 list, struct ib_event_handler, struct ib_event_handler)
323         {
324                 // because 'handler' will be re-registered during the next call
325                 list_del( &handler->list );
326                 handler->handler(handler, &event);
327         }
328         
329         return err;
330 }
331
332 static void
333 card_reset_wi(
334         IN                              DEVICE_OBJECT*                          p_dev_obj,
335         IN                              struct ib_event_handler *       event_handler )
336 {
337         struct ib_device *ibdev = event_handler->device;
338
339         UNUSED_PARAM(p_dev_obj);
340         IoFreeWorkItem( event_handler->rsrv_ptr );
341
342         // notify the clients
343         mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
344 }
345
346 int mlx4_reset_request( struct ib_event_handler *event_handler )
347 {
348         struct ib_device *ibdev;
349         struct mlx4_dev *dev;
350
351         unsigned long flags;
352
353         ibdev = event_handler->device;
354         if (ibdev == NULL)
355                 return -EFAULT;
356
357         dev = ibdev->dma_device;
358         if (ibdev == NULL)
359                 return -EFAULT;
360
361         spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
362
363         
364         // set device to RESET_PENDING mode
365         if (!(dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER))) {
366                 PIO_WORKITEM reset_work;
367
368                 // bar the device
369                 dev->flags |= MLX4_FLAG_RESET_CLIENT;
370
371                 // delay reset to a system thread
372                 // to allow for end of operations that are in progress
373                 reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
374                 if (!reset_work) {
375             spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
376                         mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset will not be propagated\n");
377                         return -EFAULT;
378                 }
379                 event_handler->rsrv_ptr = reset_work;
380                 IoQueueWorkItem( reset_work, card_reset_wi, DelayedWorkQueue, event_handler );
381         }
382
383         spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
384
385
386         return 0;
387 }
388
389 int mlx4_reset_cb_register( struct ib_event_handler *event_handler )
390 {
391         if (mlx4_is_in_reset(event_handler->device->dma_device))
392                 return -EBUSY;
393
394         return ib_register_event_handler(event_handler);
395 }
396
397 int mlx4_reset_cb_unregister( struct ib_event_handler *event_handler )
398 {
399         return ib_unregister_event_handler(event_handler);
400 }
401
402