Discussion:
[ofw] [Patch 40/62] Reference implementation of NDv2
Fab Tillier
2013-02-21 02:27:25 UTC
Permalink
Fix locking bug in mcast mgr.

When a device is added, the lock is held very early on, and is not released in case of errors. This patch reduces the scope of the lock to only the insertion into the mcast manager's port list.

I also added comments and moved code around a bit for the callback invocations, where the lock was being released without having been acquired in that function, just to make things clearer.

Signed-off-by: Fab Tillier <***@microsoft.com>

diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c .\core\al\kernel\al_mcast_mgr.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c Thu May 31 11:22:16 2012
+++ .\core\al\kernel\al_mcast_mgr.c Fri Aug 10 00:24:34 2012
@@ -439,8 +439,6 @@ mcast_mgr_add_ca_ports(
return IB_NOT_FOUND;
}

- cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
-
for ( i = 0; i < p_ci_ca->num_ports; i++)
{
mcast_mgr_t* p_mcast_mgr;
@@ -475,13 +473,13 @@ mcast_mgr_add_ca_ports(

p_mcast_mgr->close_pending = FALSE;

+ cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
InsertTailList(&g_mcast_mgr_db.mgr_list, &p_port_info->port_info_entry);
+ cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );

AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_ERROR, ("Add CA port 0x%I64x\n", p_port_info->port_guid) );
}

- cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );
-
return IB_SUCCESS;

error:
@@ -570,6 +568,9 @@ exit:
}


+//
+// This function is called with the global mcast_mgr lock held.
+//
ib_api_status_t
mcast_mgr_process_next_request(
IN mcast_mgr_t* p_mcast_mgr,
@@ -608,13 +609,18 @@ mcast_mgr_process_next_request(
mcast_mgr_print_node_msg(TRACE_LEVEL_ERROR, p_mcast_request, " running join request failed - inform user by callback");

p_mcast_request->req_state = RUNNING_CB;
+ //
+ // Release the lock before invoking the callback. Note that we remove the
+ // request from the list before invoking the callback, so as to allow the
+ // list to change during the callback without worry.
+ //
+ RemoveEntryList(&p_mcast_request->request_entry);
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
mcast_rec.mcast_context = p_join_req->mcast_req.mcast_context;
mcast_rec.status = status;
p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);

- RemoveEntryList(&p_mcast_request->request_entry);
cl_event_signal( &p_mcast_request->cb_finished );
p_mcast_request->req_state = DONE;
p_mcast_node->h_mcast = NULL;
@@ -645,12 +651,17 @@ mcast_mgr_process_next_request(
p_mcast_node->connections++;

p_mcast_request->req_state = RUNNING_CB;
+ //
+ // Release the lock before invoking the callback. Note that we remove the
+ // request from the list before invoking the callback, so as to allow the
+ // list to change during the callback without worry.
+ //
+ RemoveEntryList(&p_mcast_request->request_entry);
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
mcast_rec.mcast_context = p_mcast_request->p_join_req->mcast_req.mcast_context;
p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);

- RemoveEntryList(&p_mcast_request->request_entry);
InsertTailList(&p_mcast_node->connected_req_list, &p_mcast_request->request_entry);
p_mcast_request->req_state = DONE;
cl_event_signal( &p_mcast_request->cb_finished );
@@ -679,15 +690,19 @@ mcast_mgr_process_next_request(
p_mcast_node->connections--;

p_mcast_request->req_state = RUNNING_CB;
+ //
+ // Release the lock before invoking the callback. Note that we remove the
+ // request from the list before invoking the callback, so as to allow the
+ // list to change during the callback without worry.
+ //
+ RemoveEntryList(&p_mcast_request->request_entry);
+ RemoveEntryList(&p_join_request->request_entry);
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
user_context = (void*)p_join_request->p_join_req->mcast_req.mcast_context;
p_join_request->req_dereg_status = IB_SUCCESS;
if (p_mcast_request->p_leave_req->leave_cb)
p_mcast_request->p_leave_req->leave_cb(user_context);
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
-
- RemoveEntryList(&p_mcast_request->request_entry);
- RemoveEntryList(&p_join_request->request_entry);

deref_request(p_join_request);
deref_request(p_mcast_request);

Loading...