Skip to content

Commit 787f774

Browse files
Jack Morgensteingregkh
authored andcommitted
IB/core: Add an unbound WQ type to the new CQ API
commit f794809 upstream. The upstream kernel commit cited below modified the workqueue in the new CQ API to be bound to a specific CPU (instead of being unbound). This caused ALL users of the new CQ API to use the same bound WQ. Specifically, MAD handling was severely delayed when the CPU bound to the WQ was busy handling (higher priority) interrupts. This caused a delay in the MAD "heartbeat" response handling, which resulted in ports being incorrectly classified as "down". To fix this, add a new "unbound" WQ type to the new CQ API, so that users have the option to choose either a bound WQ or an unbound WQ. For MADs, choose the new "unbound" WQ. Fixes: b7363e6 ("IB/device: Convert ib-comp-wq to be CPU-bound") Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.m> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 6b48ef1 commit 787f774

4 files changed

Lines changed: 27 additions & 7 deletions

File tree

drivers/infiniband/core/cq.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ static void ib_cq_poll_work(struct work_struct *work)
102102
completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
103103
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
104104
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
105-
queue_work(ib_comp_wq, &cq->work);
105+
queue_work(cq->comp_wq, &cq->work);
106106
}
107107

108108
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
109109
{
110-
queue_work(ib_comp_wq, &cq->work);
110+
queue_work(cq->comp_wq, &cq->work);
111111
}
112112

113113
/**
@@ -159,9 +159,12 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
159159
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
160160
break;
161161
case IB_POLL_WORKQUEUE:
162+
case IB_POLL_UNBOUND_WORKQUEUE:
162163
cq->comp_handler = ib_cq_completion_workqueue;
163164
INIT_WORK(&cq->work, ib_cq_poll_work);
164165
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
166+
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
167+
ib_comp_wq : ib_comp_unbound_wq;
165168
break;
166169
default:
167170
ret = -EINVAL;
@@ -196,6 +199,7 @@ void ib_free_cq(struct ib_cq *cq)
196199
irq_poll_disable(&cq->iop);
197200
break;
198201
case IB_POLL_WORKQUEUE:
202+
case IB_POLL_UNBOUND_WORKQUEUE:
199203
cancel_work_sync(&cq->work);
200204
break;
201205
default:

drivers/infiniband/core/device.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct ib_client_data {
5959
};
6060

6161
struct workqueue_struct *ib_comp_wq;
62+
struct workqueue_struct *ib_comp_unbound_wq;
6263
struct workqueue_struct *ib_wq;
6364
EXPORT_SYMBOL_GPL(ib_wq);
6465

@@ -1005,10 +1006,19 @@ static int __init ib_core_init(void)
10051006
goto err;
10061007
}
10071008

1009+
ib_comp_unbound_wq =
1010+
alloc_workqueue("ib-comp-unb-wq",
1011+
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
1012+
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
1013+
if (!ib_comp_unbound_wq) {
1014+
ret = -ENOMEM;
1015+
goto err_comp;
1016+
}
1017+
10081018
ret = class_register(&ib_class);
10091019
if (ret) {
10101020
pr_warn("Couldn't create InfiniBand device class\n");
1011-
goto err_comp;
1021+
goto err_comp_unbound;
10121022
}
10131023

10141024
ret = ibnl_init();
@@ -1055,6 +1065,8 @@ static int __init ib_core_init(void)
10551065
ibnl_cleanup();
10561066
err_sysfs:
10571067
class_unregister(&ib_class);
1068+
err_comp_unbound:
1069+
destroy_workqueue(ib_comp_unbound_wq);
10581070
err_comp:
10591071
destroy_workqueue(ib_comp_wq);
10601072
err:
@@ -1071,6 +1083,7 @@ static void __exit ib_core_cleanup(void)
10711083
addr_cleanup();
10721084
ibnl_cleanup();
10731085
class_unregister(&ib_class);
1086+
destroy_workqueue(ib_comp_unbound_wq);
10741087
destroy_workqueue(ib_comp_wq);
10751088
/* Make sure that any pending umem accounting work is done. */
10761089
destroy_workqueue(ib_wq);

drivers/infiniband/core/mad.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3163,7 +3163,7 @@ static int ib_mad_port_open(struct ib_device *device,
31633163
}
31643164

31653165
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
3166-
IB_POLL_WORKQUEUE);
3166+
IB_POLL_UNBOUND_WORKQUEUE);
31673167
if (IS_ERR(port_priv->cq)) {
31683168
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
31693169
ret = PTR_ERR(port_priv->cq);

include/rdma/ib_verbs.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363

6464
extern struct workqueue_struct *ib_wq;
6565
extern struct workqueue_struct *ib_comp_wq;
66+
extern struct workqueue_struct *ib_comp_unbound_wq;
6667

6768
union ib_gid {
6869
u8 raw[16];
@@ -1415,9 +1416,10 @@ struct ib_ah {
14151416
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
14161417

14171418
enum ib_poll_context {
1418-
IB_POLL_DIRECT, /* caller context, no hw completions */
1419-
IB_POLL_SOFTIRQ, /* poll from softirq context */
1420-
IB_POLL_WORKQUEUE, /* poll from workqueue */
1419+
IB_POLL_DIRECT, /* caller context, no hw completions */
1420+
IB_POLL_SOFTIRQ, /* poll from softirq context */
1421+
IB_POLL_WORKQUEUE, /* poll from workqueue */
1422+
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
14211423
};
14221424

14231425
struct ib_cq {
@@ -1434,6 +1436,7 @@ struct ib_cq {
14341436
struct irq_poll iop;
14351437
struct work_struct work;
14361438
};
1439+
struct workqueue_struct *comp_wq;
14371440
};
14381441

14391442
struct ib_srq {

0 commit comments

Comments
 (0)