From 7e0a0e38fcfea47e74b0ff6da6266f00bcd2af43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 May 2019 10:49:27 -0400 Subject: SUNRPC: Replace the queue timer with a delayed work function The queue timer function, which walks the RPC queue in order to locate candidates for waking up is one of the current constraints against removing the bh-safe queue spin locks. Replace it with a delayed work queue, so that we can do the actual rpc task wake ups from an ordinary process context. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d0e451868f02..7d8db5dcac04 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -183,8 +183,9 @@ struct rpc_task_setup { #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW) struct rpc_timer { - struct timer_list timer; struct list_head list; + unsigned long expires; + struct delayed_work dwork; }; /* -- cgit v1.2.3 From 4f8943f8088348ec01456b075d44ad19dce3d698 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 May 2019 16:28:29 -0400 Subject: SUNRPC: Replace direct task wakeups from softirq context Replace the direct task wakeups from inside a softirq context with wakeups from a process context. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 5 +++ net/sunrpc/xprtsock.c | 78 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 6 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index b81d0b3e0799..7638dbe7bc50 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -56,6 +56,7 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct error_worker; struct work_struct recv_worker; struct mutex recv_mutex; struct sockaddr_storage srcaddr; @@ -84,6 +85,10 @@ struct sock_xprt { #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) +#define XPRT_SOCK_WAKE_ERROR (4) +#define XPRT_SOCK_WAKE_WRITE (5) +#define XPRT_SOCK_WAKE_PENDING (6) +#define XPRT_SOCK_WAKE_DISCONNECT (7) #endif /* __KERNEL__ */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 36652352a38c..92af57019b96 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1211,6 +1211,15 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); + clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); +} + +static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) +{ + set_bit(nr, &transport->sock_state); + queue_work(xprtiod_workqueue, &transport->error_worker); } static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) @@ -1231,6 +1240,7 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) */ static void xs_error_report(struct sock *sk) { + struct sock_xprt *transport; struct rpc_xprt *xprt; int err; @@ -1238,13 +1248,14 @@ static void xs_error_report(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; + transport = container_of(xprt, struct sock_xprt, xprt); err = -sk->sk_err; if (err == 0) goto out; dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); - xprt_wake_pending_tasks(xprt, err); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1507,7 +1518,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; - xprt_wake_pending_tasks(xprt, -EAGAIN); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); } spin_unlock(&xprt->transport_lock); break; @@ -1525,7 +1536,7 @@ static void xs_tcp_state_change(struct sock *sk) /* The server initiated a shutdown of the socket */ xprt->connect_cookie++; clear_bit(XPRT_CONNECTED, &xprt->state); - xs_tcp_force_close(xprt); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); /* fall through */ case TCP_CLOSING: /* @@ -1547,7 +1558,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt_clear_connecting(xprt); clear_bit(XPRT_CLOSING, &xprt->state); /* Trigger the socket release */ - xs_tcp_force_close(xprt); + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); } out: read_unlock_bh(&sk->sk_callback_lock); @@ -1556,6 +1567,7 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { struct socket_wq *wq; + struct sock_xprt *transport; struct rpc_xprt *xprt; if (!sk->sk_socket) @@ -1564,13 +1576,14 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; + transport = container_of(xprt, struct sock_xprt, xprt); rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) goto out; - if (xprt_write_space(xprt)) - sk->sk_write_pending--; + xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); + sk->sk_write_pending--; out: rcu_read_unlock(); } @@ -2461,6 +2474,56 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) delay); } +static void xs_wake_disconnect(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state)) + xs_tcp_force_close(&transport->xprt); +} + +static void xs_wake_write(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state)) + xprt_write_space(&transport->xprt); +} + +static void xs_wake_error(struct sock_xprt *transport) +{ + int sockerr; + int sockerr_len = sizeof(sockerr); + + if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) + return; + mutex_lock(&transport->recv_mutex); + if (transport->sock == NULL) + goto out; + if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) + goto out; + if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, + (char *)&sockerr, &sockerr_len) != 0) + goto out; + if (sockerr < 0) + xprt_wake_pending_tasks(&transport->xprt, sockerr); +out: + mutex_unlock(&transport->recv_mutex); +} + +static void xs_wake_pending(struct sock_xprt *transport) +{ + if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state)) + xprt_wake_pending_tasks(&transport->xprt, -EAGAIN); +} + +static void xs_error_handle(struct work_struct *work) +{ + struct sock_xprt *transport = container_of(work, + struct sock_xprt, error_worker); + + xs_wake_disconnect(transport); + xs_wake_write(transport); + xs_wake_error(transport); + xs_wake_pending(transport); +} + /** * xs_local_print_stats - display AF_LOCAL socket-specifc stats * @xprt: rpc_xprt struct containing statistics @@ -2873,6 +2936,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->timeout = &xs_local_default_timeout; INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); switch (sun->sun_family) { @@ -2943,6 +3007,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt->timeout = &xs_udp_default_timeout; INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); switch (addr->sa_family) { @@ -3024,6 +3089,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) (xprt->timeout->to_retries + 1); INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); switch (addr->sa_family) { -- cgit v1.2.3 From 21f0ffaff510b0530bfdf77da7133c0b99dee2fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Apr 2017 10:52:42 -0400 Subject: SUNRPC: Add basic load balancing to the transport switch For now, just count the queue length. It is less accurate than counting number of bytes queued, but easier to implement. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + include/linux/sunrpc/xprtmultipath.h | 2 ++ net/sunrpc/clnt.c | 40 +++++++++++++++++++++++++++++++++--- net/sunrpc/xprtmultipath.c | 20 +++++++++++++++++- 4 files changed, 59 insertions(+), 4 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a6d9fce7f20e..15322c1d9c8c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -238,6 +238,7 @@ struct rpc_xprt { /* * Send stuff */ + atomic_long_t queuelen; spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ spinlock_t queue_lock; /* send/receive queue lock */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index af1257c030d2..c6cce3fbf29d 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -15,6 +15,8 @@ struct rpc_xprt_switch { struct kref xps_kref; unsigned int xps_nxprts; + unsigned int xps_nactive; + atomic_long_t xps_queuelen; struct list_head xps_xprt_list; struct net * xps_net; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b03bfa055c08..976eab68bb5d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -968,13 +968,47 @@ out: } EXPORT_SYMBOL_GPL(rpc_bind_new_program); +static struct rpc_xprt * +rpc_task_get_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt_switch *xps; + struct rpc_xprt *xprt= xprt_iter_get_next(&clnt->cl_xpi); + + if (!xprt) + return NULL; + rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + atomic_long_inc(&xps->xps_queuelen); + rcu_read_unlock(); + atomic_long_inc(&xprt->queuelen); + + return xprt; +} + +static void +rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + struct rpc_xprt_switch *xps; + + atomic_long_dec(&xprt->queuelen); + rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + atomic_long_dec(&xps->xps_queuelen); + rcu_read_unlock(); + + xprt_put(xprt); +} + void rpc_task_release_transport(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; if (xprt) { task->tk_xprt = NULL; - xprt_put(xprt); + if (task->tk_client) + rpc_task_release_xprt(task->tk_client, xprt); + else + xprt_put(xprt); } } EXPORT_SYMBOL_GPL(rpc_task_release_transport); @@ -983,6 +1017,7 @@ void rpc_task_release_client(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + rpc_task_release_transport(task); if (clnt != NULL) { /* Remove from client task list */ spin_lock(&clnt->cl_lock); @@ -992,14 +1027,13 @@ void rpc_task_release_client(struct rpc_task *task) rpc_release_client(clnt); } - rpc_task_release_transport(task); } static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { if (!task->tk_xprt) - task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); + task->tk_xprt = rpc_task_get_xprt(clnt); } static diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 8394124126f8..394e427533be 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -36,6 +36,7 @@ static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, if (xps->xps_nxprts == 0) xps->xps_net = xprt->xprt_net; xps->xps_nxprts++; + xps->xps_nactive++; } /** @@ -62,6 +63,7 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, { if (unlikely(xprt == NULL)) return; + xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) xps->xps_net = NULL; @@ -317,8 +319,24 @@ struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head, static struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) { - return xprt_iter_next_entry_multiple(xpi, + struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); + struct rpc_xprt *xprt; + unsigned long xprt_queuelen; + unsigned long xps_queuelen; + unsigned long xps_avglen; + + do { + xprt = xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_roundrobin); + if (xprt == NULL) + break; + xprt_queuelen = atomic_long_read(&xprt->queuelen); + if (xprt_queuelen <= 2) + break; + xps_queuelen = atomic_long_read(&xps->xps_queuelen); + xps_avglen = DIV_ROUND_UP(xps_queuelen, xps->xps_nactive); + } while (xprt_queuelen > xps_avglen); + return xprt; } static -- cgit v1.2.3 From 612b41f808a98a124b23d72229693c3181733291 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Apr 2017 08:50:51 -0400 Subject: SUNRPC: Allow creation of RPC clients with multiple connections Add an argument to struct rpc_create_args that allows the specification of how many transport connections you want to set up to the server. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 17 ++++++++++++++++- net/sunrpc/xprtmultipath.c | 3 +-- 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6e8073140a5d..4619098affa3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ struct rpc_create_args { u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; + u32 nconnect; unsigned long flags; char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 976eab68bb5d..b6aca8cb5ae6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -528,6 +528,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .bc_xprt = args->bc_xprt, }; char servername[48]; + struct rpc_clnt *clnt; + int i; if (args->bc_xprt) { WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); @@ -590,7 +592,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - return rpc_create_xprt(args, xprt); + clnt = rpc_create_xprt(args, xprt); + if (IS_ERR(clnt) || args->nconnect <= 1) + return clnt; + + for (i = 0; i < args->nconnect - 1; i++) { + if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0) + break; + } + return clnt; } EXPORT_SYMBOL_GPL(rpc_create); @@ -2730,6 +2740,10 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, return -ENOMEM; data->xps = xprt_switch_get(xps); data->xprt = xprt_get(xprt); + if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) { + rpc_cb_add_xprt_release(data); + goto success; + } task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, @@ -2737,6 +2751,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); +success: return 1; } EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 394e427533be..9d66ce53355d 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -52,8 +52,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, if (xprt == NULL) return; spin_lock(&xps->xps_lock); - if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) && - !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) + if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); } -- cgit v1.2.3 From 5a0c257f8e0f4c4b3c33dff545317c21a921303e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 30 May 2019 10:41:28 +1000 Subject: NFS: send state management on a single connection. With NFSv4.1, different network connections need to be explicitly bound to a session. During session startup, this is not possible so only a single connection must be used for session startup. So add a task flag to disable the default round-robin choice of connections (when nconnect > 1) and force the use of a single connection. Then use that flag on all requests for session management - for consistence, include NFSv4.0 management (SETCLIENTID) and session destruction Reported-by: Chuck Lever Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 22 +++++++++++++--------- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 24 +++++++++++++++++++++++- 3 files changed, 37 insertions(+), 10 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26626ea1f197..d115d9973efc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5992,7 +5992,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_message = &msg, .callback_ops = &nfs4_setclientid_ops, .callback_data = &setclientid, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; int status; @@ -6058,7 +6058,8 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; @@ -7639,7 +7640,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("NFS reply secinfo: %d\n", status); put_cred(cred); @@ -7977,7 +7978,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, .rpc_client = clp->cl_rpcclient, .callback_ops = &nfs4_exchange_id_call_ops, .rpc_message = &msg, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; struct nfs41_exchange_id_data *calldata; int status; @@ -8202,7 +8203,8 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, }; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " @@ -8481,7 +8483,8 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, nfs4_init_channel_attrs(&args, clp->cl_rpcclient); args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_create_session(clp, status); switch (status) { @@ -8557,7 +8560,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) return 0; - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_session(session->clp, status); if (status) @@ -8811,7 +8815,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs4_reclaim_complete_call_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN, }; int status = -ENOMEM; @@ -9330,7 +9334,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("--> %s\n", __func__); status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("<-- %s status=%d\n", __func__, status); put_cred(cred); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d0e451868f02..11424bdf09e6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -126,6 +126,7 @@ struct rpc_task_setup { #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ +#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b6aca8cb5ae6..d599fab8adcb 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -995,6 +995,24 @@ rpc_task_get_xprt(struct rpc_clnt *clnt) return xprt; } +static struct rpc_xprt * +rpc_task_get_first_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt_switch *xps; + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + if (xprt) { + atomic_long_inc(&xprt->queuelen); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + atomic_long_inc(&xps->xps_queuelen); + } + rcu_read_unlock(); + + return xprt; +} + static void rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { @@ -1042,7 +1060,11 @@ void rpc_task_release_client(struct rpc_task *task) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (!task->tk_xprt) + if (task->tk_xprt) + return; + if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) + task->tk_xprt = rpc_task_get_first_xprt(clnt); + else task->tk_xprt = rpc_task_get_xprt(clnt); } -- cgit v1.2.3 From a332518fda4731c07394164b3edcbb6efaf4c4d7 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 23 May 2019 16:13:50 -0400 Subject: SUNRPC: Count ops completing with tk_status < 0 We often see various error conditions with NFS4.x that show up with a very high operation count all completing with tk_status < 0 in a short period of time. Add a count to rpc_iostats to record on a per-op basis the ops that complete in this manner, which will enable lower overhead diagnostics. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/linux/sunrpc/metrics.h | 7 ++++++- net/sunrpc/stats.c | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1b3751327575..0ee3f7052846 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -30,7 +30,7 @@ #include #include -#define RPC_IOSTATS_VERS "1.0" +#define RPC_IOSTATS_VERS "1.1" struct rpc_iostats { spinlock_t om_lock; @@ -66,6 +66,11 @@ struct rpc_iostats { ktime_t om_queue, /* queued for xmit */ om_rtt, /* RPC RTT */ om_execute; /* RPC execution */ + /* + * The count of operations that complete with tk_status < 0. + * These statuses usually indicate error conditions. + */ + unsigned long om_error_status; } ____cacheline_aligned; struct rpc_task; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 2f7bde82450b..48ea776364f8 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, execute = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); + if (task->tk_status < 0) + op_metrics->om_error_status++; spin_unlock(&op_metrics->om_lock); @@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) a->om_queue = ktime_add(a->om_queue, b->om_queue); a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); a->om_execute = ktime_add(a->om_execute, b->om_execute); + a->om_error_status += b->om_error_status; } static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, int op, const struct rpc_procinfo *procs) { _print_name(seq, op, procs); - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", stats->om_ops, stats->om_ntrans, stats->om_timeouts, @@ -233,7 +236,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, stats->om_bytes_recv, ktime_to_ms(stats->om_queue), ktime_to_ms(stats->om_rtt), - ktime_to_ms(stats->om_execute)); + ktime_to_ms(stats->om_execute), + stats->om_error_status); } void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) -- cgit v1.2.3 From 675dd90ad0932f2c03912a5252458d792bd7033a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:33:42 -0400 Subject: xprtrdma: Modernize ops->connect Adapt and apply changes that were made to the TCP socket connect code. See the following commits for details on the purpose of these changes: Commit 7196dbb02ea0 ("SUNRPC: Allow changing of the TCP timeout parameters on the fly") Commit 3851f1cdb2b8 ("SUNRPC: Limit the reconnect backoff timer to the max RPC message timeout") Commit 02910177aede ("SUNRPC: Fix reconnection timeouts") Some common transport code is moved to xprt.c to satisfy the code duplication police. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 3 ++ include/trace/events/rpcrdma.h | 31 +++++++++++++++++++ net/sunrpc/sched.c | 1 + net/sunrpc/xprt.c | 32 ++++++++++++++++++++ net/sunrpc/xprtrdma/transport.c | 66 +++++++++++++++++++++++++++++++---------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + net/sunrpc/xprtsock.c | 23 ++------------ 7 files changed, 121 insertions(+), 36 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a6d9fce7f20e..cc78fd38ea7d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -334,6 +334,9 @@ struct xprt_class { */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); +unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt); +void xprt_reconnect_backoff(struct rpc_xprt *xprt, + unsigned long init_to); void xprt_reserve(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 98023d91a72d..f6a4eaa85a3e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -375,6 +375,37 @@ DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_connect); +TRACE_EVENT(xprtrdma_op_set_cto, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned long connect, + unsigned long reconnect + ), + + TP_ARGS(r_xprt, connect, reconnect), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned long, connect) + __field(unsigned long, reconnect) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->connect = connect; + __entry->reconnect = reconnect; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->connect / HZ, __entry->reconnect / HZ + ) +); + TRACE_EVENT(xprtrdma_qp_event, TP_PROTO( const struct rpcrdma_xprt *r_xprt, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index bb04ae52803a..5ad5dead7bfc 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -58,6 +58,7 @@ static struct rpc_wait_queue delay_queue; */ struct workqueue_struct *rpciod_workqueue __read_mostly; struct workqueue_struct *xprtiod_workqueue __read_mostly; +EXPORT_SYMBOL_GPL(xprtiod_workqueue); unsigned long rpc_task_timeout(const struct rpc_task *task) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ad21880d5601..b1f54b7ccc0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -850,6 +850,38 @@ void xprt_connect(struct rpc_task *task) xprt_release_write(xprt, task); } +/** + * xprt_reconnect_delay - compute the wait before scheduling a connect + * @xprt: transport instance + * + */ +unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt) +{ + unsigned long start, now = jiffies; + + start = xprt->stat.connect_start + xprt->reestablish_timeout; + if (time_after(start, now)) + return start - now; + return 0; +} +EXPORT_SYMBOL_GPL(xprt_reconnect_delay); + +/** + * xprt_reconnect_backoff - compute the new re-establish timeout + * @xprt: transport instance + * @init_to: initial reestablish timeout + * + */ +void xprt_reconnect_backoff(struct rpc_xprt *xprt, unsigned long init_to) +{ + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) + xprt->reestablish_timeout = xprt->max_reconnect_timeout; + if (xprt->reestablish_timeout < init_to) + xprt->reestablish_timeout = init_to; +} +EXPORT_SYMBOL_GPL(xprt_reconnect_backoff); + enum xprt_xid_rb_cmp { XID_RB_EQUAL, XID_RB_LEFT, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 3688e0782587..4993aa49ecbe 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -298,6 +298,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) module_put(THIS_MODULE); } +/* 60 second timeout, no retries */ static const struct rpc_timeout xprt_rdma_default_timeout = { .to_initval = 60 * HZ, .to_maxval = 60 * HZ, @@ -323,8 +324,9 @@ xprt_setup_rdma(struct xprt_create *args) if (!xprt) return ERR_PTR(-ENOMEM); - /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; + xprt->connect_timeout = xprt->timeout->to_initval; + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; @@ -487,31 +489,64 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) } /** - * xprt_rdma_connect - try to establish a transport connection + * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection + * @xprt: controlling transport instance + * @connect_timeout: reconnect timeout after client disconnects + * @reconnect_timeout: reconnect timeout after server disconnects + * + */ +static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt, + unsigned long connect_timeout, + unsigned long reconnect_timeout) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout); + + spin_lock(&xprt->transport_lock); + + if (connect_timeout < xprt->connect_timeout) { + struct rpc_timeout to; + unsigned long initval; + + to = *xprt->timeout; + initval = connect_timeout; + if (initval < RPCRDMA_INIT_REEST_TO << 1) + initval = RPCRDMA_INIT_REEST_TO << 1; + to.to_initval = initval; + to.to_maxval = initval; + r_xprt->rx_timeout = to; + xprt->timeout = &r_xprt->rx_timeout; + xprt->connect_timeout = connect_timeout; + } + + if (reconnect_timeout < xprt->max_reconnect_timeout) + xprt->max_reconnect_timeout = reconnect_timeout; + + spin_unlock(&xprt->transport_lock); +} + +/** + * xprt_rdma_connect - schedule an attempt to reconnect * @xprt: transport state - * @task: RPC scheduler context + * @task: RPC scheduler context (unused) * */ static void xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + unsigned long delay; trace_xprtrdma_op_connect(r_xprt); + + delay = 0; if (r_xprt->rx_ep.rep_connected != 0) { - /* Reconnect */ - schedule_delayed_work(&r_xprt->rx_connect_worker, - xprt->reestablish_timeout); - xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) - xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; - else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) - xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; - } else { - schedule_delayed_work(&r_xprt->rx_connect_worker, 0); - if (!RPC_IS_ASYNC(task)) - flush_delayed_work(&r_xprt->rx_connect_worker); + delay = xprt_reconnect_delay(xprt); + xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } + queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, + delay); } /** @@ -769,6 +804,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, + .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout, .print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 117e32816e4f..8378f45d2da7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -432,6 +432,7 @@ struct rpcrdma_xprt { struct rpcrdma_ep rx_ep; struct rpcrdma_buffer rx_buf; struct delayed_work rx_connect_worker; + struct rpc_timeout rx_timeout; struct rpcrdma_stats rx_stats; }; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c69951ed2ebc..b154600085d6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2402,25 +2402,6 @@ out: xprt_wake_pending_tasks(xprt, status); } -static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt) -{ - unsigned long start, now = jiffies; - - start = xprt->stat.connect_start + xprt->reestablish_timeout; - if (time_after(start, now)) - return start - now; - return 0; -} - -static void xs_reconnect_backoff(struct rpc_xprt *xprt) -{ - xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) - xprt->reestablish_timeout = xprt->max_reconnect_timeout; - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; -} - /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2450,8 +2431,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) /* Start by resetting any existing state */ xs_reset_transport(transport); - delay = xs_reconnect_delay(xprt); - xs_reconnect_backoff(xprt); + delay = xprt_reconnect_delay(xprt); + xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); -- cgit v1.2.3 From a101b043c44dfcb63bed7f29a675e9fa0259005e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Jul 2019 16:33:12 -0400 Subject: SUNRPC: Fix transport accounting when caller specifies an rpc_xprt Ensure that we do the required accounting for the round robin queue when the caller to rpc_init_task() has passed in a transport to be used. Reported-by: Olga Kornievskaia Reported-by: Neil Brown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 42 ++++++++++++++++++++---------------------- net/sunrpc/sched.c | 3 ++- 3 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4619098affa3..4e070e00c143 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -164,6 +164,8 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_transport(struct rpc_task *); void rpc_task_release_client(struct rpc_task *); +struct rpc_xprt *rpc_task_get_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt); int rpcb_create_local(struct net *); void rpcb_put_local(struct net *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d599fab8adcb..383555d2b522 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -978,11 +978,10 @@ out: } EXPORT_SYMBOL_GPL(rpc_bind_new_program); -static struct rpc_xprt * -rpc_task_get_xprt(struct rpc_clnt *clnt) +struct rpc_xprt * +rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { struct rpc_xprt_switch *xps; - struct rpc_xprt *xprt= xprt_iter_get_next(&clnt->cl_xpi); if (!xprt) return NULL; @@ -995,24 +994,6 @@ rpc_task_get_xprt(struct rpc_clnt *clnt) return xprt; } -static struct rpc_xprt * -rpc_task_get_first_xprt(struct rpc_clnt *clnt) -{ - struct rpc_xprt_switch *xps; - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); - if (xprt) { - atomic_long_inc(&xprt->queuelen); - xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); - atomic_long_inc(&xps->xps_queuelen); - } - rcu_read_unlock(); - - return xprt; -} - static void rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { @@ -1057,6 +1038,23 @@ void rpc_task_release_client(struct rpc_task *task) } } +static struct rpc_xprt * +rpc_task_get_first_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + return rpc_task_get_xprt(clnt, xprt); +} + +static struct rpc_xprt * +rpc_task_get_next_xprt(struct rpc_clnt *clnt) +{ + return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi)); +} + static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { @@ -1065,7 +1063,7 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else - task->tk_xprt = rpc_task_get_xprt(clnt); + task->tk_xprt = rpc_task_get_next_xprt(clnt); } static diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8a0779e963f9..1f275aba786f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1092,7 +1092,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize workqueue for async tasks */ task->tk_workqueue = task_setup_data->workqueue; - task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); + task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client, + xprt_get(task_setup_data->rpc_xprt)); task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred); -- cgit v1.2.3 From 7402a4fedc2bc448100c2d086406c708451b16dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Jul 2019 13:51:29 -0400 Subject: SUNRPC: Fix up backchannel slot table accounting Add a per-transport maximum limit in the socket case, and add helpers to allow the NFSv4 code to discover that limit. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ include/linux/sunrpc/bc_xprt.h | 1 + include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 6 ++++-- net/sunrpc/backchannel_rqst.c | 40 +++++++++++++++++++++------------------ net/sunrpc/clnt.c | 13 +++++++++++++ net/sunrpc/svc.c | 2 +- net/sunrpc/xprtrdma/backchannel.c | 7 +++++++ net/sunrpc/xprtrdma/transport.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 1 + net/sunrpc/xprtsock.c | 1 + 11 files changed, 55 insertions(+), 21 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 52de7245a2ee..39896afc6edf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8380,6 +8380,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, { unsigned int max_rqst_sz, max_resp_sz; unsigned int max_bc_payload = rpc_max_bc_payload(clnt); + unsigned int max_bc_slots = rpc_num_bc_slots(clnt); max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; @@ -8402,6 +8403,8 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1); + if (args->bc_attrs.max_reqs > max_bc_slots) + args->bc_attrs.max_reqs = max_bc_slots; dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d4229a78524a..87d27e13d885 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -43,6 +43,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); void xprt_free_bc_rqst(struct rpc_rqst *req); +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); /* * Determine if a shared backchannel is in use diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4e070e00c143..abc63bd1be2b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -194,6 +194,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_bc_payload(struct rpc_clnt *); +unsigned int rpc_num_bc_slots(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ed76e5fb36c1..13e108bcc9eb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ struct rpc_xprt_ops { int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); size_t (*bc_maxpayload)(struct rpc_xprt *xprt); + unsigned int (*bc_num_slots)(struct rpc_xprt *xprt); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); @@ -251,8 +252,9 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - int bc_alloc_count; /* Total number of preallocs */ - atomic_t bc_free_slots; + unsigned int bc_alloc_max; + unsigned int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_slot_count; /* Number of allocated slots */ spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index c47d82622fd1..339e8c077c2d 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -31,25 +31,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RPCDBG_FACILITY RPCDBG_TRANS #endif +#define BC_MAX_SLOTS 64U + +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt) +{ + return BC_MAX_SLOTS; +} + /* * Helper routines that track the number of preallocation elements * on the transport. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); -} - -static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) -{ - atomic_add(n, &xprt->bc_free_slots); - xprt->bc_alloc_count += n; -} - -static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) -{ - atomic_sub(n, &xprt->bc_free_slots); - return xprt->bc_alloc_count -= n; + return xprt->bc_alloc_count < xprt->bc_alloc_max; } /* @@ -145,6 +140,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) dprintk("RPC: setup backchannel transport\n"); + if (min_reqs > BC_MAX_SLOTS) + min_reqs = BC_MAX_SLOTS; + /* * We use a temporary list to keep track of the preallocated * buffers. Once we're done building the list we splice it @@ -172,7 +170,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) */ spin_lock(&xprt->bc_pa_lock); list_splice(&tmp_list, &xprt->bc_pa_list); - xprt_inc_alloc_count(xprt, min_reqs); + xprt->bc_alloc_count += min_reqs; + xprt->bc_alloc_max += min_reqs; + atomic_add(min_reqs, &xprt->bc_slot_count); spin_unlock(&xprt->bc_pa_lock); dprintk("RPC: setup backchannel transport done\n"); @@ -220,11 +220,13 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs) goto out; spin_lock_bh(&xprt->bc_pa_lock); - xprt_dec_alloc_count(xprt, max_reqs); + xprt->bc_alloc_max -= max_reqs; list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { dprintk("RPC: req=%p\n", req); list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); + xprt->bc_alloc_count--; + atomic_dec(&xprt->bc_slot_count); if (--max_reqs == 0) break; } @@ -241,13 +243,14 @@ static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid, struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (atomic_read(&xprt->bc_free_slots) <= 0) - goto not_found; if (list_empty(&xprt->bc_pa_list)) { if (!new) goto not_found; + if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS) + goto not_found; list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; + atomic_inc(&xprt->bc_slot_count); } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -291,6 +294,7 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) if (xprt_need_to_requeue(xprt)) { list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; + atomic_inc(&xprt->bc_slot_count); req = NULL; } spin_unlock_bh(&xprt->bc_pa_lock); @@ -357,7 +361,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); - xprt_dec_alloc_count(xprt, 1); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 383555d2b522..79c849391cb9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1526,6 +1526,19 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_max_bc_payload); +unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + unsigned int ret; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + ret = xprt->ops->bc_num_slots(xprt); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_num_bc_slots); + /** * rpc_force_rebind - force transport to check that remote port is unchanged * @clnt: client to rebind diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e15cb704453e..220b79988000 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1595,7 +1595,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* Parse and execute the bc call */ proc_error = svc_process_common(rqstp, argv, resv); - atomic_inc(&req->rq_xprt->bc_free_slots); + atomic_dec(&req->rq_xprt->bc_slot_count); if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ce986591f213..59e624b1d7a0 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -52,6 +52,13 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) return maxmsg - RPCRDMA_HDRLEN_MIN; } +unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + return r_xprt->rx_buf.rb_bc_srv_max_requests; +} + static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 4993aa49ecbe..52abddac19e5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -812,6 +812,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { #if defined(CONFIG_SUNRPC_BACKCHANNEL) .bc_setup = xprt_rdma_bc_setup, .bc_maxpayload = xprt_rdma_bc_maxpayload, + .bc_num_slots = xprt_rdma_bc_max_slots, .bc_free_rqst = xprt_rdma_bc_free_rqst, .bc_destroy = xprt_rdma_bc_destroy, #endif diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8378f45d2da7..92ce09fcea74 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -605,6 +605,7 @@ void xprt_rdma_cleanup(void); #if defined(CONFIG_SUNRPC_BACKCHANNEL) int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); +unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3c2cc96afcaa..6b1fca51028a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2788,6 +2788,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { #ifdef CONFIG_SUNRPC_BACKCHANNEL .bc_setup = xprt_setup_bc, .bc_maxpayload = xs_tcp_bc_maxpayload, + .bc_num_slots = xprt_bc_max_slots, .bc_free_rqst = xprt_free_bc_rqst, .bc_destroy = xprt_destroy_bc, #endif -- cgit v1.2.3