mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 08:45:26 -05:00
The current way that afs_server refs are accounted and cleaned up sometimes
cause rmmod to hang when it is waiting for cell records to be removed. The
problem is that the cell cleanup might occasionally happen before the
server cleanup and then there's nothing that causes the cell to
garbage-collect the remaining servers as they become inactive.
Partially fix this by:
(1) Give each afs_server record its own management timer that rather than
relying on the cell manager's central timer to drive each individual
cell's maintenance work item to garbage collect servers.
This timer is set when afs_unuse_server() reduces a server's activity
count to zero and will schedule the server's destroyer work item upon
firing.
(2) Give each afs_server record its own destroyer work item that removes
the record from the cell's database, shuts down the timer, cancels any
pending work for itself, sends an RPC to the server to cancel
outstanding callbacks.
This change, in combination with the timer, obviates the need to try
and coordinate so closely between the cell record and a bunch of other
server records to try and tear everything down in a coordinated
fashion. With this, the cell record is pinned until the server RCU is
complete and namespace/module removal will wait until all the cell
records are removed.
(3) Now that incoming calls are mapped to servers (and thus cells) using
data attached to an rxrpc_peer, the UUID-to-server mapping tree is
moved from the namespace to the cell (cell->fs_servers). This means
there can no longer be duplicates therein - and that allows the
mapping tree to be simpler as there doesn't need to be a chain of
same-UUID servers that are in different cells.
(4) The lock protecting the UUID mapping tree is switched to an
rw_semaphore on the cell rather than a seqlock on the namespace as
it's now only used during mounting in contexts in which we're allowed
to sleep.
(5) When it comes time for a cell that is being removed to purge its set
of servers, it just needs to iterate over them and wake them up. Once
a server becomes inactive, its destroyer work item will observe the
state of the cell and immediately remove that record.
(6) When a server record is removed, it is marked AFS_SERVER_FL_EXPIRED to
prevent reattempts at removal. The record will be dispatched to RCU
for destruction once its refcount reaches 0.
(7) The AFS_SERVER_FL_UNCREATED/CREATING flags are used to synchronise
simultaneous creation attempts. If one attempt fails, it will abandon
the attempt and allow another to try again.
Note that the record can't just be abandoned when dead as it's bound
into a server list attached to a volume and only subject to
replacement if the server list obtained for the volume from the VLDB
changes.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20250224234154.2014840-15-dhowells@redhat.com/ # v1
Link: https://lore.kernel.org/r/20250310094206.801057-11-dhowells@redhat.com/ # v4
250 lines
6.2 KiB
C
250 lines
6.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* AFS fileserver list management.
|
|
*
|
|
* Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include "internal.h"
|
|
|
|
void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
|
|
{
|
|
int i;
|
|
|
|
if (slist && refcount_dec_and_test(&slist->usage)) {
|
|
for (i = 0; i < slist->nr_servers; i++)
|
|
afs_unuse_server(net, slist->servers[i].server,
|
|
afs_server_trace_unuse_slist);
|
|
kfree_rcu(slist, rcu);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Build a server list from a VLDB record.
|
|
*/
|
|
struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
|
|
struct key *key,
|
|
struct afs_vldb_entry *vldb)
|
|
{
|
|
struct afs_server_list *slist;
|
|
struct afs_server *server;
|
|
unsigned int type_mask = 1 << volume->type;
|
|
bool use_newrepsites = false;
|
|
int ret = -ENOMEM, nr_servers = 0, newrep = 0, i, j, usable = 0;
|
|
|
|
/* Work out if we're going to restrict to NEWREPSITE-marked servers or
|
|
* not. If at least one site is marked as NEWREPSITE, then it's likely
|
|
* that "vos release" is busy updating RO sites. We cut over from one
|
|
* to the other when >=50% of the sites have been updated. Sites that
|
|
* are in the process of being updated are marked DONTUSE.
|
|
*/
|
|
for (i = 0; i < vldb->nr_servers; i++) {
|
|
if (!(vldb->fs_mask[i] & type_mask))
|
|
continue;
|
|
nr_servers++;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
|
|
continue;
|
|
usable++;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE)
|
|
newrep++;
|
|
}
|
|
|
|
slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
|
|
if (!slist)
|
|
goto error;
|
|
|
|
if (newrep) {
|
|
if (newrep < usable / 2) {
|
|
slist->ro_replicating = AFS_RO_REPLICATING_USE_OLD;
|
|
} else {
|
|
slist->ro_replicating = AFS_RO_REPLICATING_USE_NEW;
|
|
use_newrepsites = true;
|
|
}
|
|
}
|
|
|
|
refcount_set(&slist->usage, 1);
|
|
rwlock_init(&slist->lock);
|
|
|
|
/* Make sure a records exists for each server in the list. */
|
|
for (i = 0; i < vldb->nr_servers; i++) {
|
|
unsigned long se_flags = 0;
|
|
bool newrepsite = vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE;
|
|
|
|
if (!(vldb->fs_mask[i] & type_mask))
|
|
continue;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
|
|
__set_bit(AFS_SE_EXCLUDED, &se_flags);
|
|
if (newrep && (newrepsite ^ use_newrepsites))
|
|
__set_bit(AFS_SE_EXCLUDED, &se_flags);
|
|
|
|
server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
|
|
vldb->addr_version[i]);
|
|
if (IS_ERR(server)) {
|
|
ret = PTR_ERR(server);
|
|
if (ret == -ENOENT ||
|
|
ret == -ENOMEDIUM)
|
|
continue;
|
|
goto error_2;
|
|
}
|
|
|
|
/* Insertion-sort by UUID */
|
|
for (j = 0; j < slist->nr_servers; j++)
|
|
if (memcmp(&slist->servers[j].server->uuid,
|
|
&server->uuid,
|
|
sizeof(server->uuid)) >= 0)
|
|
break;
|
|
if (j < slist->nr_servers) {
|
|
if (slist->servers[j].server == server) {
|
|
afs_unuse_server_notime(volume->cell->net, server,
|
|
afs_server_trace_unuse_slist_isort);
|
|
continue;
|
|
}
|
|
|
|
memmove(slist->servers + j + 1,
|
|
slist->servers + j,
|
|
(slist->nr_servers - j) * sizeof(struct afs_server_entry));
|
|
}
|
|
|
|
slist->servers[j].server = server;
|
|
slist->servers[j].volume = volume;
|
|
slist->servers[j].flags = se_flags;
|
|
slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE;
|
|
slist->nr_servers++;
|
|
}
|
|
|
|
if (slist->nr_servers == 0) {
|
|
ret = -EDESTADDRREQ;
|
|
goto error_2;
|
|
}
|
|
|
|
return slist;
|
|
|
|
error_2:
|
|
afs_put_serverlist(volume->cell->net, slist);
|
|
error:
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
/*
|
|
* Copy the annotations from an old server list to its potential replacement.
|
|
*/
|
|
bool afs_annotate_server_list(struct afs_server_list *new,
|
|
struct afs_server_list *old)
|
|
{
|
|
unsigned long mask = 1UL << AFS_SE_EXCLUDED;
|
|
int i;
|
|
|
|
if (old->nr_servers != new->nr_servers ||
|
|
old->ro_replicating != new->ro_replicating)
|
|
goto changed;
|
|
|
|
for (i = 0; i < old->nr_servers; i++) {
|
|
if (old->servers[i].server != new->servers[i].server)
|
|
goto changed;
|
|
if ((old->servers[i].flags & mask) != (new->servers[i].flags & mask))
|
|
goto changed;
|
|
}
|
|
return false;
|
|
changed:
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Attach a volume to the servers it is going to use.
|
|
*/
|
|
void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
|
|
{
|
|
struct afs_server_entry *se, *pe;
|
|
struct afs_server *server;
|
|
struct list_head *p;
|
|
unsigned int i;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
se = &slist->servers[i];
|
|
server = se->server;
|
|
|
|
list_for_each(p, &server->volumes) {
|
|
pe = list_entry(p, struct afs_server_entry, slink);
|
|
if (volume->vid <= pe->volume->vid)
|
|
break;
|
|
}
|
|
list_add_tail(&se->slink, p);
|
|
}
|
|
|
|
slist->attached = true;
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|
|
|
|
/*
|
|
* Reattach a volume to the servers it is going to use when server list is
|
|
* replaced. We try to switch the attachment points to avoid rewalking the
|
|
* lists.
|
|
*/
|
|
void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
|
|
struct afs_server_list *old)
|
|
{
|
|
unsigned int n = 0, o = 0;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
while (n < new->nr_servers || o < old->nr_servers) {
|
|
struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
|
|
struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
|
|
struct afs_server_entry *s;
|
|
struct list_head *p;
|
|
int diff;
|
|
|
|
if (pn && po && pn->server == po->server) {
|
|
pn->cb_expires_at = po->cb_expires_at;
|
|
list_replace(&po->slink, &pn->slink);
|
|
n++;
|
|
o++;
|
|
continue;
|
|
}
|
|
|
|
if (pn && po)
|
|
diff = memcmp(&pn->server->uuid, &po->server->uuid,
|
|
sizeof(pn->server->uuid));
|
|
else
|
|
diff = pn ? -1 : 1;
|
|
|
|
if (diff < 0) {
|
|
list_for_each(p, &pn->server->volumes) {
|
|
s = list_entry(p, struct afs_server_entry, slink);
|
|
if (volume->vid <= s->volume->vid)
|
|
break;
|
|
}
|
|
list_add_tail(&pn->slink, p);
|
|
n++;
|
|
} else {
|
|
list_del(&po->slink);
|
|
o++;
|
|
}
|
|
}
|
|
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|
|
|
|
/*
|
|
* Detach a volume from the servers it has been using.
|
|
*/
|
|
void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (!slist->attached)
|
|
return;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
for (i = 0; i < slist->nr_servers; i++)
|
|
list_del(&slist->servers[i].slink);
|
|
|
|
slist->attached = false;
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|