From 42d42a5b0cd263757f8e519debbc744fdaefdaf7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 May 2016 09:24:55 -0400 Subject: SUNRPC: Small optimisation of client receive Do not queue the client receive work if we're still processing. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 0ece4ba06f06..bef3fb0abb8f 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -80,6 +80,7 @@ struct sock_xprt { #define TCP_RPC_REPLY (1UL << 6) #define XPRT_SOCK_CONNECTING 1U +#define XPRT_SOCK_DATA_READY (2) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 40a5f1b19bacb2de7a051be952dee85e38c9e5f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 May 2016 10:39:50 -0400 Subject: SUNRPC: RPC transport queue must be low latency rpciod can easily get congested due to the long list of queued rpc_tasks. Having the receive queue wait in turn for those tasks to complete can therefore be a bottleneck. Address the problem by separating the workqueues into: - rpciod: manages rpc_tasks - xprtiod: manages transport related work. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/sched.c | 24 ++++++++++++++++++++---- net/sunrpc/xprt.c | 8 ++++---- net/sunrpc/xprtsock.c | 6 +++--- 4 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 05a1809c44d9..ef780b3b5e31 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -247,6 +247,7 @@ void rpc_show_tasks(struct net *); int rpc_init_mempool(void); void rpc_destroy_mempool(void); extern struct workqueue_struct *rpciod_workqueue; +extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); static inline int rpc_wait_for_completion_task(struct rpc_task *task) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fcfd48d263f6..a9f786247ffb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -54,7 +54,8 @@ static struct rpc_wait_queue delay_queue; /* * rpciod-related stuff */ -struct workqueue_struct *rpciod_workqueue; +struct workqueue_struct *rpciod_workqueue __read_mostly; +struct workqueue_struct *xprtiod_workqueue __read_mostly; /* * Disable the timer for a given RPC task. Should be called with @@ -1071,10 +1072,22 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - /* Note: highpri because network receive is latency sensitive */ - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); + if (!wq) + goto out_failed; rpciod_workqueue = wq; - return rpciod_workqueue != NULL; + /* Note: highpri because network receive is latency sensitive */ + wq = alloc_workqueue("xprtiod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!wq) + goto free_rpciod; + xprtiod_workqueue = wq; + return 1; +free_rpciod: + wq = rpciod_workqueue; + rpciod_workqueue = NULL; + destroy_workqueue(wq); +out_failed: + return 0; } static void rpciod_stop(void) @@ -1088,6 +1101,9 @@ static void rpciod_stop(void) wq = rpciod_workqueue; rpciod_workqueue = NULL; destroy_workqueue(wq); + wq = xprtiod_workqueue; + xprtiod_workqueue = NULL; + destroy_workqueue(wq); } void diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 216a1385718a..71df082b84a9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -220,7 +220,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_atomic(); } else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + queue_work(xprtiod_workqueue, &xprt->task_cleanup); } /* @@ -645,7 +645,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) set_bit(XPRT_CLOSE_WAIT, &xprt->state); /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(rpciod_workqueue, &xprt->task_cleanup); + queue_work(xprtiod_workqueue, &xprt->task_cleanup); xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } @@ -672,7 +672,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) set_bit(XPRT_CLOSE_WAIT, &xprt->state); /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(rpciod_workqueue, &xprt->task_cleanup); + queue_work(xprtiod_workqueue, &xprt->task_cleanup); xprt_wake_pending_tasks(xprt, -EAGAIN); out: spin_unlock_bh(&xprt->transport_lock); @@ -689,7 +689,7 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - queue_work(rpciod_workqueue, &xprt->task_cleanup); + queue_work(xprtiod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62b4f5a2a331..646170d0cb86 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1095,7 +1095,7 @@ static void xs_data_ready(struct sock *sk) if (xprt->reestablish_timeout) xprt->reestablish_timeout = 0; if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) - queue_work(rpciod_workqueue, &transport->recv_worker); + queue_work(xprtiod_workqueue, &transport->recv_worker); } read_unlock_bh(&sk->sk_callback_lock); } @@ -2378,7 +2378,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) /* Start by resetting any existing state */ xs_reset_transport(transport); - queue_delayed_work(rpciod_workqueue, + queue_delayed_work(xprtiod_workqueue, &transport->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; @@ -2388,7 +2388,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - queue_delayed_work(rpciod_workqueue, + queue_delayed_work(xprtiod_workqueue, &transport->connect_worker, 0); } } -- cgit v1.2.3 From f1dc237c60a5fdecc83062a28a702193f881cb19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 May 2016 12:59:33 -0400 Subject: SUNRPC: Reduce latency when send queue is congested Use the low latency transport workqueue to process the task that is next in line on the xprt->sending queue. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 4 ++++ net/sunrpc/sched.c | 43 +++++++++++++++++++++++++++++++++---------- net/sunrpc/xprt.c | 6 ++++-- 3 files changed, 41 insertions(+), 12 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ef780b3b5e31..817af0b4385e 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -230,6 +230,10 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, + struct rpc_wait_queue *, + bool (*)(struct rpc_task *, void *), + void *); struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, bool (*)(struct rpc_task *, void *), void *); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index a9f786247ffb..9ae588511aaf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -330,7 +330,8 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); * lockless RPC_IS_QUEUED() test) before we've had a chance to test * the RPC_TASK_RUNNING flag. */ -static void rpc_make_runnable(struct rpc_task *task) +static void rpc_make_runnable(struct workqueue_struct *wq, + struct rpc_task *task) { bool need_wakeup = !rpc_test_and_set_running(task); @@ -339,7 +340,7 @@ static void rpc_make_runnable(struct rpc_task *task) return; if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); - queue_work(rpciod_workqueue, &task->u.tk_work); + queue_work(wq, &task->u.tk_work); } else wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } @@ -408,13 +409,16 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); /** - * __rpc_do_wake_up_task - wake up a single rpc_task + * __rpc_do_wake_up_task_on_wq - wake up a single rpc_task + * @wq: workqueue on which to run task * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, + struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -429,7 +433,7 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task __rpc_remove_wait_queue(queue, task); - rpc_make_runnable(task); + rpc_make_runnable(wq, task); dprintk("RPC: __rpc_wake_up_task done\n"); } @@ -437,15 +441,24 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task /* * Wake up a queued task while the queue lock is being held */ -static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) +static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, struct rpc_task *task) { if (RPC_IS_QUEUED(task)) { smp_rmb(); if (task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + __rpc_do_wake_up_task_on_wq(wq, queue, task); } } +/* + * Wake up a queued task while the queue lock is being held + */ +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) +{ + rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task); +} + /* * Wake up a task on a specific queue */ @@ -519,7 +532,8 @@ static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) /* * Wake up the first task on the wait queue. */ -struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, +struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; @@ -530,7 +544,7 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, task = __rpc_find_next_queued(queue); if (task != NULL) { if (func(task, data)) - rpc_wake_up_task_queue_locked(queue, task); + rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); else task = NULL; } @@ -538,6 +552,15 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, return task; } + +/* + * Wake up the first task on the wait queue. + */ +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) +{ + return rpc_wake_up_first_on_wq(rpciod_workqueue, queue, func, data); +} EXPORT_SYMBOL_GPL(rpc_wake_up_first); static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) @@ -815,7 +838,7 @@ void rpc_execute(struct rpc_task *task) bool is_async = RPC_IS_ASYNC(task); rpc_set_active(task); - rpc_make_runnable(task); + rpc_make_runnable(rpciod_workqueue, task); if (!is_async) __rpc_execute(task); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 71df082b84a9..8313960cac52 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -295,7 +295,8 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) + if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, + __xprt_lock_write_func, xprt)) return; xprt_clear_locked(xprt); } @@ -324,7 +325,8 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) return; if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; - if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) + if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending, + __xprt_lock_write_cong_func, xprt)) return; out_unlock: xprt_clear_locked(xprt); -- cgit v1.2.3 From 65b80179f9b8171b74625febf3457f41e792fa23 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Jun 2016 13:55:06 -0400 Subject: xprtrdma: No direct data placement with krb5i and krb5p Direct data placement is not allowed when using flavors that guarantee integrity or privacy. When such security flavors are in effect, don't allow the use of Read and Write chunks for moving individual data items. All messages larger than the inline threshold are sent via Long Call or Long Reply. On my systems (CX-3 Pro on FDR), for small I/O operations, the use of Long messages adds only around 5 usecs of latency in each direction. Note that when integrity or encryption is used, the host CPU touches every byte in these messages. Even if it could be used, data movement offload doesn't buy much in this case. Signed-off-by: Chuck Lever Tested-by: Steve Wise Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 3 +++ include/linux/sunrpc/gss_api.h | 2 ++ net/sunrpc/auth_gss/auth_gss.c | 2 ++ net/sunrpc/auth_gss/gss_krb5_mech.c | 2 ++ net/sunrpc/auth_gss/gss_mech_switch.c | 12 ++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 12 ++++++++++-- 6 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 899791573a40..3a40287b4d27 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -107,6 +107,9 @@ struct rpc_auth { /* per-flavor data */ }; +/* rpc_auth au_flags */ +#define RPCAUTH_AUTH_DATATOUCH 0x00000002 + struct rpc_auth_create_args { rpc_authflavor_t pseudoflavor; const char *target_name; diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 1f911ccb2a75..68ec78c1aa48 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -73,6 +73,7 @@ u32 gss_delete_sec_context( rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 qop, u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); +bool gss_pseudoflavor_to_datatouch(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { @@ -81,6 +82,7 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + bool datatouch; }; /* Different mechanisms (e.g., krb5 or spkm3) may implement gss-api, and diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e64ae93d5b4f..bca3537efffd 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1017,6 +1017,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth->au_rslack = GSS_VERF_SLACK >> 2; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; + if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) + auth->au_flags |= RPCAUTH_AUTH_DATATOUCH; atomic_set(&auth->au_count, 1); kref_init(&gss_auth->kref); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 65427492b1c9..60595835317a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -745,12 +745,14 @@ static struct pf_desc gss_kerberos_pfs[] = { .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", + .datatouch = true, }, [2] = { .pseudoflavor = RPC_AUTH_GSS_KRB5P, .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_PRIVACY, .name = "krb5p", + .datatouch = true, }, }; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 7063d856a598..5fec3abbe19b 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -361,6 +361,18 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) } EXPORT_SYMBOL(gss_pseudoflavor_to_service); +bool +gss_pseudoflavor_to_datatouch(struct gss_api_mech *gm, u32 pseudoflavor) +{ + int i; + + for (i = 0; i < gm->gm_pf_num; i++) { + if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) + return gm->gm_pfs[i].datatouch; + } + return false; +} + char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index dac2990ae2f7..a47f170b20ef 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -570,6 +570,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) struct rpcrdma_req *req = rpcr_to_rdmar(rqst); enum rpcrdma_chunktype rtype, wtype; struct rpcrdma_msg *headerp; + bool ddp_allowed; ssize_t hdrlen; size_t rpclen; __be32 *iptr; @@ -586,6 +587,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); headerp->rm_type = rdma_msg; + /* When the ULP employs a GSS flavor that guarantees integrity + * or privacy, direct data placement of individual data items + * is not allowed. + */ + ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags & + RPCAUTH_AUTH_DATATOUCH); + /* * Chunks needed for results? * @@ -597,7 +605,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) */ if (rpcrdma_results_inline(r_xprt, rqst)) wtype = rpcrdma_noch; - else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) + else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) wtype = rpcrdma_writech; else wtype = rpcrdma_replych; @@ -620,7 +628,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) rtype = rpcrdma_noch; rpcrdma_inline_pullup(rqst); rpclen = rqst->rq_svec[0].iov_len; - } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { + } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { rtype = rpcrdma_readch; rpclen = rqst->rq_svec[0].iov_len; rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); -- cgit v1.2.3 From d8d29138b17c9965484427b34cf8046601aef8c4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Jun 2016 16:31:03 +1000 Subject: sunrpc: remove 'inuse' flag from struct cache_detail. This field is not currently in use. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 -- net/sunrpc/cache.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ed03c9f7f908..62a60eeacb0a 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -78,8 +78,6 @@ struct cache_detail { struct hlist_head * hash_table; rwlock_t hash_lock; - atomic_t inuse; /* active user-space update or lookup */ - char *name; void (*cache_put)(struct kref *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 553bf95f7003..4d8e11f94a35 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -362,7 +362,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) cache_purge(cd); spin_lock(&cache_list_lock); write_lock(&cd->hash_lock); - if (cd->entries || atomic_read(&cd->inuse)) { + if (cd->entries) { write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); goto out; -- cgit v1.2.3 From ff3ac5c3dc2323ba54c3d9ef30ef4942a71b251d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Jun 2016 10:55:50 -0400 Subject: SUNRPC: Add a server side per-connection limit Allow the user to limit the number of requests serviced through a single connection, to help prevent faster clients from starving slower clients. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- Documentation/kernel-parameters.txt | 6 ++++++ include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 39 ++++++++++++++++++++++++++++++++++--- 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c958d1c..48ba6d2e670a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3832,6 +3832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. using these two parameters to set the minimum and maximum port values. + sunrpc.svc_rpc_per_connection_limit= + [NFS,SUNRPC] + Limit the number of requests that the server will + process in parallel from a single connection. + The default value is 0 (no limit). + sunrpc.pool_mode= [NFS] Control how the NFS server code allocates CPUs to diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7ca44fb5b675..7321ae933867 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -268,6 +268,7 @@ struct svc_rqst { * cache pages */ #define RQ_VICTIM (5) /* about to be shut down */ #define RQ_BUSY (6) /* request is busy */ +#define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ void * rq_argp; /* decoded arguments */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 79ba50856707..ad899ffed3be 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -69,6 +69,7 @@ struct svc_xprt { struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ + atomic_t xpt_nr_rqsts; /* Number of requests */ struct mutex xpt_mutex; /* to serialize sending data */ spinlock_t xpt_lock; /* protects sk_deferred * and xpt_auth_cache */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e7082a4aeb56..2adc8db6aaf5 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -21,6 +21,10 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +static unsigned int svc_rpc_per_connection_limit __read_mostly; +module_param(svc_rpc_per_connection_limit, uint, 0644); + + static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -329,12 +333,41 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) } EXPORT_SYMBOL_GPL(svc_print_addr); +static bool svc_xprt_slots_in_range(struct svc_xprt *xprt) +{ + unsigned int limit = svc_rpc_per_connection_limit; + int nrqsts = atomic_read(&xprt->xpt_nr_rqsts); + + return limit == 0 || (nrqsts >= 0 && nrqsts < limit); +} + +static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt) +{ + if (!test_bit(RQ_DATA, &rqstp->rq_flags)) { + if (!svc_xprt_slots_in_range(xprt)) + return false; + atomic_inc(&xprt->xpt_nr_rqsts); + set_bit(RQ_DATA, &rqstp->rq_flags); + } + return true; +} + +static void svc_xprt_release_slot(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; + if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) { + atomic_dec(&xprt->xpt_nr_rqsts); + svc_xprt_enqueue(xprt); + } +} + static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) { if (xprt->xpt_flags & ((1<xpt_flags & ((1<xpt_ops->xpo_has_wspace(xprt)) + if (xprt->xpt_ops->xpo_has_wspace(xprt) && + svc_xprt_slots_in_range(xprt)) return true; trace_svc_xprt_no_write_space(xprt); return false; @@ -516,8 +549,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp) rqstp->rq_res.head[0].iov_len = 0; svc_reserve(rqstp, 0); + svc_xprt_release_slot(rqstp); rqstp->rq_xprt = NULL; - svc_xprt_put(xprt); } @@ -785,7 +818,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) svc_add_new_temp_xprt(serv, newxpt); else module_put(xprt->xpt_class->xcl_owner); - } else { + } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, rqstp->rq_pool->sp_id, xprt, -- cgit v1.2.3 From f4a4906e563522aa2eab61cf080460d13b85725c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Jun 2016 10:55:52 -0400 Subject: SUNRPC: Remove unused callback xpo_adjust_wspace() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 - net/sunrpc/svc_xprt.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ad899ffed3be..ab02a457da1f 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,7 +25,6 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); - void (*xpo_adjust_wspace)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2adc8db6aaf5..c3f652395a80 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -517,8 +517,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space) atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; - if (xprt->xpt_ops->xpo_adjust_wspace) - xprt->xpt_ops->xpo_adjust_wspace(xprt); svc_xprt_enqueue(xprt); } } -- cgit v1.2.3 From ce52914eb76efd62aa48d738cf845b37852bf920 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 7 Jun 2016 15:14:48 -0400 Subject: sunrpc: move NO_CRKEY_TIMEOUT to the auth->au_flags A generic_cred can be used to look up a unx_cred or a gss_cred, so it's not really safe to use the the generic_cred->acred->ac_flags to store the NO_CRKEY_TIMEOUT flag. A lookup for a unx_cred triggered while the KEY_EXPIRE_SOON flag is already set will cause both NO_CRKEY_TIMEOUT and KEY_EXPIRE_SOON to be set in the ac_flags, leaving the user associated with the auth_cred to be in a state where they're perpetually doing 4K NFS_FILE_SYNC writes. This can be reproduced as follows: 1. Mount two NFS filesystems, one with sec=krb5 and one with sec=sys. They do not need to be the same export, nor do they even need to be from the same NFS server. Also, v3 is fine. $ sudo mount -o v3,sec=krb5 server1:/export /mnt/krb5 $ sudo mount -o v3,sec=sys server2:/export /mnt/sys 2. As the normal user, before accessing the kerberized mount, kinit with a short lifetime (but not so short that renewing the ticket would leave you within the 4-minute window again by the time the original ticket expires), e.g. $ kinit -l 10m -r 60m 3. Do some I/O to the kerberized mount and verify that the writes are wsize, UNSTABLE: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 4. Wait until you're within 4 minutes of key expiry, then do some more I/O to the kerberized mount to ensure that RPC_CRED_KEY_EXPIRE_SOON gets set. Verify that the writes are 4K, FILE_SYNC: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 5. Now do some I/O to the sec=sys mount. This will cause RPC_CRED_NO_CRKEY_TIMEOUT to be set: $ dd if=/dev/zero of=/mnt/sys/file bs=1M count=1 6. Writes for that user will now be permanently 4K, FILE_SYNC for that user, regardless of which mount is being written to, until you reboot the client. Renewing the kerberos ticket (assuming it hasn't already expired) will have no effect. Grabbing a new kerberos ticket at this point will have no effect either. Move the flag to the auth->au_flags field (which is currently unused) and rename it slightly to reflect that it's no longer associated with the auth_cred->ac_flags. Add the rpc_auth to the arg list of rpcauth_cred_key_to_expire and check the au_flags there too. Finally, add the inode to the arg list of nfs_ctx_key_to_expire so we can determine the rpc_auth to pass to rpcauth_cred_key_to_expire. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ++-- fs/nfs/internal.h | 2 +- fs/nfs/write.c | 6 ++++-- include/linux/sunrpc/auth.h | 6 ++++-- net/sunrpc/auth.c | 4 +++- net/sunrpc/auth_generic.c | 9 +-------- net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_null.c | 1 + net/sunrpc/auth_unix.c | 1 + 9 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 717a8d6af52d..6bcd8913e8a9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -432,7 +432,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, return status; NFS_I(mapping->host)->write_io += copied; - if (nfs_ctx_key_to_expire(ctx)) { + if (nfs_ctx_key_to_expire(ctx, mapping->host)) { status = nfs_wb_all(mapping->host); if (status < 0) return status; @@ -645,7 +645,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode) ctx = nfs_file_open_context(filp); if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || - nfs_ctx_key_to_expire(ctx)) + nfs_ctx_key_to_expire(ctx, inode)) return 1; return 0; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fa88609f85e3..d2260e67334f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -497,7 +497,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); -bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode); void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); #ifdef CONFIG_MIGRATION diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..0b949a06b297 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1195,9 +1195,11 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) /* * Test if the open context credential key is marked to expire soon. */ -bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx) +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode) { - return rpcauth_cred_key_to_expire(ctx->cred); + struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + + return rpcauth_cred_key_to_expire(auth, ctx->cred); } /* diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 899791573a40..f890a295a7ff 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -37,7 +37,6 @@ struct rpcsec_gss_info; /* auth_cred ac_flags bits */ enum { - RPC_CRED_NO_CRKEY_TIMEOUT = 0, /* underlying cred has no key timeout */ RPC_CRED_KEY_EXPIRE_SOON = 1, /* underlying cred key will expire soon */ RPC_CRED_NOTIFY_TIMEOUT = 2, /* nofity generic cred when underlying key will expire soon */ @@ -82,6 +81,9 @@ struct rpc_cred { #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 +/* rpc_auth au_flags */ +#define RPCAUTH_AUTH_NO_CRKEY_TIMEOUT 0x0001 /* underlying cred has no key timeout */ + /* * Client authentication handle */ @@ -196,7 +198,7 @@ void rpcauth_destroy_credcache(struct rpc_auth *); void rpcauth_clear_credcache(struct rpc_cred_cache *); int rpcauth_key_timeout_notify(struct rpc_auth *, struct rpc_cred *); -bool rpcauth_cred_key_to_expire(struct rpc_cred *); +bool rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *); char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 040ff627c18a..696eb39fc1cb 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -359,8 +359,10 @@ rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred) EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify); bool -rpcauth_cred_key_to_expire(struct rpc_cred *cred) +rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred) { + if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) + return false; if (!cred->cr_ops->crkey_to_expire) return false; return cred->cr_ops->crkey_to_expire(cred); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 54dd3fdead54..168219535a34 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -224,7 +224,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) /* Fast track for non crkey_timeout (no key) underlying credentials */ - if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags)) + if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) return 0; /* Fast track for the normal case */ @@ -236,12 +236,6 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) if (IS_ERR(tcred)) return -EACCES; - if (!tcred->cr_ops->crkey_timeout) { - set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags); - ret = 0; - goto out_put; - } - /* Test for the almost error case */ ret = tcred->cr_ops->crkey_timeout(tcred); if (ret != 0) { @@ -257,7 +251,6 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); } -out_put: put_rpccred(tcred); return ret; } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e64ae93d5b4f..813a3cdfb573 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1015,6 +1015,7 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; + auth->au_flags = 0; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 8d9eb4d5ddd8..4d17376b2acb 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -115,6 +115,7 @@ static struct rpc_auth null_auth = { .au_cslack = NUL_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, .au_count = ATOMIC_INIT(0), diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 9f65452b7cbc..a99278c984e8 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -228,6 +228,7 @@ static struct rpc_auth unix_auth = { .au_cslack = UNX_CALLSLACK, .au_rslack = NUL_REPLYSLACK, + .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), -- cgit v1.2.3