[PATCH net] rxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975]

From: David Howells
Date: Wed Nov 16 2022 - 09:08:35 EST


After rxrpc_unbundle_conn() has removed a connection from a bundle, it
checks to see if there are any conns with available channels and, if not,
removes and attempts to destroy the bundle.

Whilst it does check after grabbing client_bundles_lock that there are no
connections attached, this races with rxrpc_look_up_bundle() retrieving the
bundle, but not attaching a connection for the connection to be attached
later.

There is therefore a window in which the bundle can get destroyed before we
manage to attach a new connection to it.

Fix this by adding an "active" counter to struct rxrpc_bundle:

(1) rxrpc_connect_call() obtains an active count by prepping/looking up a
bundle and ditches it before returning.

(2) If, during rxrpc_connect_call(), a connection is added to the bundle,
this obtains an active count, which is held until the connection is
discarded.

(3) rxrpc_deactivate_bundle() is created to drop an active count on a
bundle and destroy it when the active count reaches 0. The active
count is checked inside client_bundles_lock() to prevent a race with
rxrpc_look_up_bundle().

(4) rxrpc_unbundle_conn() then calls rxrpc_deactivate_bundle().

Fixes: 245500d853e9 ("rxrpc: Rewrite the client connection manager")
Reported-by: zdi-disclosures@xxxxxxxxxxxxxx # ZDI-CAN-15975
Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
Tested-by: zdi-disclosures@xxxxxxxxxxxxxx
cc: Marc Dionne <marc.dionne@xxxxxxxxxxxx>
cc: linux-afs@xxxxxxxxxxxxxxxxxxx
---

net/rxrpc/ar-internal.h | 1 +
net/rxrpc/conn_client.c | 38 +++++++++++++++++++++++---------------
2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1ad0ec5afb50..8499ceb7719c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -399,6 +399,7 @@ enum rxrpc_conn_proto_state {
struct rxrpc_bundle {
struct rxrpc_conn_parameters params;
refcount_t ref;
+ atomic_t active; /* Number of active users */
unsigned int debug_id;
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3c9eeb5b750c..bdb335cb2d05 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);

+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
+
/*
* Get a connection ID and epoch for a client connection from the global pool.
* The connection struct pointer is then recorded in the idr radix tree. The
@@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
bundle->params = *cp;
rxrpc_get_peer(bundle->params.peer);
refcount_set(&bundle->ref, 1);
+ atomic_set(&bundle->active, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
}
@@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle)

dead = __refcount_dec_and_test(&bundle->ref, &r);

- _debug("PUT B=%x %d", d, r);
+ _debug("PUT B=%x %d", d, r - 1);
if (dead)
rxrpc_free_bundle(bundle);
}
@@ -338,6 +341,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
rxrpc_free_bundle(candidate);
found_bundle:
rxrpc_get_bundle(bundle);
+ atomic_inc(&bundle->active);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [found]", bundle->debug_id);
return bundle;
@@ -435,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
if (old)
trace_rxrpc_client(old, -1, rxrpc_client_replace);
candidate->bundle_shift = shift;
+ atomic_inc(&bundle->active);
bundle->conns[i] = candidate;
for (j = 0; j < RXRPC_MAXCALLS; j++)
set_bit(shift + j, &bundle->avail_chans);
@@ -725,6 +730,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
smp_rmb();

out_put_bundle:
+ rxrpc_deactivate_bundle(bundle);
rxrpc_put_bundle(bundle);
out:
_leave(" = %d", ret);
@@ -900,9 +906,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
- struct rxrpc_local *local = bundle->params.local;
unsigned int bindex;
- bool need_drop = false, need_put = false;
+ bool need_drop = false;
int i;

_enter("C=%x", conn->debug_id);
@@ -921,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
}
spin_unlock(&bundle->channel_lock);

- /* If there are no more connections, remove the bundle */
- if (!bundle->avail_chans) {
- _debug("maybe unbundle");
- spin_lock(&local->client_bundles_lock);
+ if (need_drop) {
+ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_connection(conn);
+ }
+}

- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
- if (bundle->conns[i])
- break;
- if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) {
+/*
+ * Drop the active count on a bundle.
+ */
+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_local *local = bundle->params.local;
+ bool need_put = false;
+
+ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
+ if (!bundle->params.exclusive) {
_debug("erase bundle");
rb_erase(&bundle->local_node, &local->client_bundles);
need_put = true;
@@ -939,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (need_put)
rxrpc_put_bundle(bundle);
}
-
- if (need_drop)
- rxrpc_put_connection(conn);
- _leave("");
}

/*