From 9447082ae666fbf3adbe9c9152117daa31a8b737 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 16 Nov 2016 12:21:19 -0200 Subject: [media] smiapp: Implement power-on and power-off sequences without runtime PM Power on the sensor when the module is loaded and power it off when it is removed. Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/smiapp/smiapp-core.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index 59872b31f832..620f8ce8185e 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -2741,8 +2741,6 @@ static const struct v4l2_subdev_internal_ops smiapp_internal_ops = { * I2C Driver */ -#ifdef CONFIG_PM - static int smiapp_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -2783,13 +2781,6 @@ static int smiapp_resume(struct device *dev) return rval; } -#else - -#define smiapp_suspend NULL -#define smiapp_resume NULL - -#endif /* CONFIG_PM */ - static struct smiapp_hwconfig *smiapp_get_hwconfig(struct device *dev) { struct smiapp_hwconfig *hwcfg; @@ -2913,13 +2904,9 @@ static int smiapp_probe(struct i2c_client *client, if (IS_ERR(sensor->xshutdown)) return PTR_ERR(sensor->xshutdown); - pm_runtime_enable(&client->dev); - - rval = pm_runtime_get_sync(&client->dev); - if (rval < 0) { - rval = -ENODEV; - goto out_power_off; - } + rval = smiapp_power_on(&client->dev); + if (rval < 0) + return rval; rval = smiapp_identify_module(sensor); if (rval) { @@ -3100,6 +3087,9 @@ static int smiapp_probe(struct i2c_client *client, if (rval < 0) goto out_media_entity_cleanup; + pm_runtime_set_active(&client->dev); + pm_runtime_get_noresume(&client->dev); + pm_runtime_enable(&client->dev); pm_runtime_set_autosuspend_delay(&client->dev, 1000); pm_runtime_use_autosuspend(&client->dev); pm_runtime_put_autosuspend(&client->dev); @@ -3113,8 +3103,7 @@ out_cleanup: smiapp_cleanup(sensor); out_power_off: - pm_runtime_put(&client->dev); - pm_runtime_disable(&client->dev); + smiapp_power_off(&client->dev); return rval; } @@ -3127,8 +3116,10 @@ static int smiapp_remove(struct i2c_client *client) v4l2_async_unregister_subdev(subdev); - pm_runtime_suspend(&client->dev); pm_runtime_disable(&client->dev); + if (!pm_runtime_status_suspended(&client->dev)) + smiapp_power_off(&client->dev); + pm_runtime_set_suspended(&client->dev); for (i = 0; i < sensor->ssds_used; i++) { v4l2_device_unregister_subdev(&sensor->ssds[i].sd); -- cgit From 4bfb934b0067b7f6a24470682c5f7254fd4d8282 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sat, 19 Nov 2016 19:50:10 -0200 Subject: [media] smiapp: Make suspend and resume functions __maybe_unused The smiapp_suspend() and smiapp_resume() functions will end up being unused if CONFIG_PM is enabled but CONFIG_PM_SLEEP is disabled, causing a compiler warning from both of the function definitions. Fix this by marking the functions with __maybe_unused. Suggested-by: Arnd Bergmann Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/smiapp/smiapp-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index 620f8ce8185e..f4e92bdfe192 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -2741,7 +2741,7 @@ static const struct v4l2_subdev_internal_ops smiapp_internal_ops = { * I2C Driver */ -static int smiapp_suspend(struct device *dev) +static int __maybe_unused smiapp_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct v4l2_subdev *subdev = i2c_get_clientdata(client); @@ -2766,7 +2766,7 @@ static int smiapp_suspend(struct device *dev) return 0; } -static int smiapp_resume(struct device *dev) +static int __maybe_unused smiapp_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct v4l2_subdev *subdev = i2c_get_clientdata(client); -- cgit From f3854973f196baad5be6b62d8f5ea24b0346b63f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 6 Dec 2016 11:17:12 -0200 Subject: [media] cec: fix report_current_latency In the (very) small print of the REPORT_CURRENT_LATENCY message there is a line that says that the last byte of the message (audio out delay) is only present if the 'audio out compensated' value is 3. I missed this, and so if this message was sent with a total length of 6 (i.e. without the audio out delay byte), then it was rejected by the framework since a minimum length of 7 was expected. Fix this minimum length check and update the wrappers in cec-funcs.h to do the right thing based on the message length. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 2 +- include/uapi/linux/cec-funcs.h | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 0ea4efb3de66..f15f6ffd75b2 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -851,7 +851,7 @@ static const u8 cec_msg_size[256] = { [CEC_MSG_REQUEST_ARC_TERMINATION] = 2 | DIRECTED, [CEC_MSG_TERMINATE_ARC] = 2 | DIRECTED, [CEC_MSG_REQUEST_CURRENT_LATENCY] = 4 | BCAST, - [CEC_MSG_REPORT_CURRENT_LATENCY] = 7 | BCAST, + [CEC_MSG_REPORT_CURRENT_LATENCY] = 6 | BCAST, [CEC_MSG_CDC_MESSAGE] = 2 | BCAST, }; diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index 3cbc327801d6..c451eec42a83 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -1665,14 +1665,15 @@ static inline void cec_msg_report_current_latency(struct cec_msg *msg, __u8 audio_out_compensated, __u8 audio_out_delay) { - msg->len = 7; + msg->len = 6; msg->msg[0] |= 0xf; /* broadcast */ msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; msg->msg[4] = video_latency; msg->msg[5] = (low_latency_mode << 2) | audio_out_compensated; - msg->msg[6] = audio_out_delay; + if (audio_out_compensated == 3) + msg->msg[msg->len++] = audio_out_delay; } static inline void cec_ops_report_current_latency(const struct cec_msg *msg, @@ -1686,7 +1687,10 @@ static inline void cec_ops_report_current_latency(const struct cec_msg *msg, *video_latency = msg->msg[4]; *low_latency_mode = (msg->msg[5] >> 2) & 1; *audio_out_compensated = msg->msg[5] & 3; - *audio_out_delay = msg->msg[6]; + if (*audio_out_compensated == 3 && msg->len >= 7) + *audio_out_delay = msg->msg[6]; + else + *audio_out_delay = 0; } static inline void cec_msg_request_current_latency(struct cec_msg *msg, -- cgit From 120476123646ba3619c90db7bcbc6f8eea53c990 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 11:14:32 -0200 Subject: [media] cec: when canceling a message, don't overwrite old status info When a pending message was canceled (e.g. due to a timeout), then the old tx_status info was overwritten instead of ORed. The same happened with the tx_error_cnt field. So just modify them instead of overwriting them. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index f15f6ffd75b2..3191c0c5eae1 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -288,10 +288,10 @@ static void cec_data_cancel(struct cec_data *data) /* Mark it as an error */ data->msg.tx_ts = ktime_get_ns(); - data->msg.tx_status = CEC_TX_STATUS_ERROR | - CEC_TX_STATUS_MAX_RETRIES; + data->msg.tx_status |= CEC_TX_STATUS_ERROR | + CEC_TX_STATUS_MAX_RETRIES; + data->msg.tx_error_cnt++; data->attempts = 0; - data->msg.tx_error_cnt = 1; /* Queue transmitted message for monitoring purposes */ cec_queue_msg_monitor(data->adap, &data->msg, 1); -- cgit From a24f56d47930492c94ef6875bf45adf7607ca1a4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 11:28:19 -0200 Subject: [media] cec: CEC_MSG_GIVE_FEATURES should abort for CEC version < 2 This is a 2.0 only message, so it should return Feature Abort if the adapter is configured for CEC version 1.4. Right now it does nothing, which means that the sender will time out. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 3191c0c5eae1..c05956fdd500 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1777,9 +1777,9 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, } case CEC_MSG_GIVE_FEATURES: - if (adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0) - return cec_report_features(adap, la_idx); - return 0; + if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0) + return cec_feature_abort(adap, msg); + return cec_report_features(adap, la_idx); default: /* -- cgit From 7af26f889eb67db272021a939f7d4a57e96dd961 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 11:54:06 -0200 Subject: [media] cec: update log_addr[] before finishing configuration The loop that sets the unused logical addresses to INVALID should be done before 'configured' is set to true. This ensures that cec_log_addrs is consistent before it will be used. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index c05956fdd500..f3fef487f28d 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1250,6 +1250,8 @@ configured: for (i = 1; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; } + for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; adap->is_configured = true; adap->is_configuring = false; cec_post_state_event(adap); @@ -1268,8 +1270,6 @@ configured: cec_report_features(adap, i); cec_report_phys_addr(adap, i); } - for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) - las->log_addr[i] = CEC_LOG_ADDR_INVALID; mutex_lock(&adap->lock); adap->kthread_config = NULL; mutex_unlock(&adap->lock); -- cgit From 52bc30fda9622f492427d484bd4dd8ee42cc4667 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 11:36:03 -0200 Subject: [media] cec: replace cec_report_features by cec_fill_msg_report_features The fill function just fills in the cec_msg struct, it doesn't transmit the message. This is now done explicitly. This makes it possible to switch to transmitting this message with adap->lock held. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 48 ++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index f3fef487f28d..2b668510ca36 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -30,8 +30,10 @@ #include "cec-priv.h" -static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx); static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx); +static void cec_fill_msg_report_features(struct cec_adapter *adap, + struct cec_msg *msg, + unsigned int la_idx); /* * 400 ms is the time it takes for one 16 byte message to be @@ -1258,16 +1260,21 @@ configured: mutex_unlock(&adap->lock); for (i = 0; i < las->num_log_addrs; i++) { + struct cec_msg msg = {}; + if (las->log_addr[i] == CEC_LOG_ADDR_INVALID || (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY)) continue; - /* - * Report Features must come first according - * to CEC 2.0 - */ - if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED) - cec_report_features(adap, i); + msg.msg[0] = (las->log_addr[i] << 4) | 0x0f; + + /* Report Features must come first according to CEC 2.0 */ + if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED && + adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0) { + cec_fill_msg_report_features(adap, &msg, i); + cec_transmit_msg(adap, &msg, false); + } + cec_report_phys_addr(adap, i); } mutex_lock(&adap->lock); @@ -1526,36 +1533,32 @@ EXPORT_SYMBOL_GPL(cec_s_log_addrs); /* High-level core CEC message handling */ -/* Transmit the Report Features message */ -static int cec_report_features(struct cec_adapter *adap, unsigned int la_idx) +/* Fill in the Report Features message */ +static void cec_fill_msg_report_features(struct cec_adapter *adap, + struct cec_msg *msg, + unsigned int la_idx) { - struct cec_msg msg = { }; const struct cec_log_addrs *las = &adap->log_addrs; const u8 *features = las->features[la_idx]; bool op_is_dev_features = false; unsigned int idx; - /* This is 2.0 and up only */ - if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0) - return 0; - /* Report Features */ - msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f; - msg.len = 4; - msg.msg[1] = CEC_MSG_REPORT_FEATURES; - msg.msg[2] = adap->log_addrs.cec_version; - msg.msg[3] = las->all_device_types[la_idx]; + msg->msg[0] = (las->log_addr[la_idx] << 4) | 0x0f; + msg->len = 4; + msg->msg[1] = CEC_MSG_REPORT_FEATURES; + msg->msg[2] = adap->log_addrs.cec_version; + msg->msg[3] = las->all_device_types[la_idx]; /* Write RC Profiles first, then Device Features */ for (idx = 0; idx < ARRAY_SIZE(las->features[0]); idx++) { - msg.msg[msg.len++] = features[idx]; + msg->msg[msg->len++] = features[idx]; if ((features[idx] & CEC_OP_FEAT_EXT) == 0) { if (op_is_dev_features) break; op_is_dev_features = true; } } - return cec_transmit_msg(adap, &msg, false); } /* Transmit the Report Physical Address message */ @@ -1779,7 +1782,8 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, case CEC_MSG_GIVE_FEATURES: if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0) return cec_feature_abort(adap, msg); - return cec_report_features(adap, la_idx); + cec_fill_msg_report_features(adap, &tx_cec_msg, la_idx); + return cec_transmit_msg(adap, &tx_cec_msg, false); default: /* -- cgit From d3d64bc7408f1ff0b0ff8354056e2a48eda5886d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 11:48:34 -0200 Subject: [media] cec: move cec_report_phys_addr into cec_config_thread_func It's only a small function and this makes it easier to switch to transmitting the message with adap->lock held in the next patch. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 2b668510ca36..f3d495654a53 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -30,7 +30,6 @@ #include "cec-priv.h" -static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx); static void cec_fill_msg_report_features(struct cec_adapter *adap, struct cec_msg *msg, unsigned int la_idx); @@ -1275,7 +1274,13 @@ configured: cec_transmit_msg(adap, &msg, false); } - cec_report_phys_addr(adap, i); + /* Report Physical Address */ + cec_msg_report_physical_addr(&msg, adap->phys_addr, + las->primary_device_type[i]); + dprintk(2, "config: la %d pa %x.%x.%x.%x\n", + las->log_addr[i], + cec_phys_addr_exp(adap->phys_addr)); + cec_transmit_msg(adap, &msg, false); } mutex_lock(&adap->lock); adap->kthread_config = NULL; @@ -1561,22 +1566,6 @@ static void cec_fill_msg_report_features(struct cec_adapter *adap, } } -/* Transmit the Report Physical Address message */ -static int cec_report_phys_addr(struct cec_adapter *adap, unsigned int la_idx) -{ - const struct cec_log_addrs *las = &adap->log_addrs; - struct cec_msg msg = { }; - - /* Report Physical Address */ - msg.msg[0] = (las->log_addr[la_idx] << 4) | 0x0f; - cec_msg_report_physical_addr(&msg, adap->phys_addr, - las->primary_device_type[la_idx]); - dprintk(2, "config: la %d pa %x.%x.%x.%x\n", - las->log_addr[la_idx], - cec_phys_addr_exp(adap->phys_addr)); - return cec_transmit_msg(adap, &msg, false); -} - /* Transmit the Feature Abort message */ static int cec_feature_abort_reason(struct cec_adapter *adap, struct cec_msg *msg, u8 reason) -- cgit From f60f35609f89ef4fee73776bc1ef697923251995 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Dec 2016 12:00:49 -0200 Subject: [media] cec: fix race between configuring and unconfiguring This race was discovered by running cec-compliance -A with the cec module debug parameter set to 2: suddenly the test would fail. It turns out that this happens when the test configures the adapter in non-blocking mode, then it waits for the CEC_EVENT_STATE_CHANGE event and once the event is received it unconfigures the adapter. What happened was that the unconfigure was executed while the configure was still transmitting the Report Features and Report Physical Address messages. This messed up the internal state of the cec_adapter. The fix is to transmit those messages with the adap->lock mutex held (this will just queue them up in the internal transmit queue, and not actually transmit anything yet). Only unlock the mutex once everything is done. The main thread will dequeue the messages from the internal transmit queue and transmit them one by one, unless an unconfigure was done, and in that case any messages are just dropped. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index f3d495654a53..ebb5e391b800 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1256,8 +1256,17 @@ configured: adap->is_configured = true; adap->is_configuring = false; cec_post_state_event(adap); - mutex_unlock(&adap->lock); + /* + * Now post the Report Features and Report Physical Address broadcast + * messages. Note that these are non-blocking transmits, meaning that + * they are just queued up and once adap->lock is unlocked the main + * thread will kick in and start transmitting these. + * + * If after this function is done (but before one or more of these + * messages are actually transmitted) the CEC adapter is unconfigured, + * then any remaining messages will be dropped by the main thread. + */ for (i = 0; i < las->num_log_addrs; i++) { struct cec_msg msg = {}; @@ -1271,7 +1280,7 @@ configured: if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED && adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0) { cec_fill_msg_report_features(adap, &msg, i); - cec_transmit_msg(adap, &msg, false); + cec_transmit_msg_fh(adap, &msg, NULL, false); } /* Report Physical Address */ @@ -1280,12 +1289,11 @@ configured: dprintk(2, "config: la %d pa %x.%x.%x.%x\n", las->log_addr[i], cec_phys_addr_exp(adap->phys_addr)); - cec_transmit_msg(adap, &msg, false); + cec_transmit_msg_fh(adap, &msg, NULL, false); } - mutex_lock(&adap->lock); adap->kthread_config = NULL; - mutex_unlock(&adap->lock); complete(&adap->config_completion); + mutex_unlock(&adap->lock); return 0; unconfigure: -- cgit From 78ccbf9ff89bd7a20d36be039cb3eab71081648c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 10:31:28 -0300 Subject: [media] media/cobalt: use pci_irq_allocate_vectors Simply the interrupt setup by using the new PCI layer helpers. Despite using pci_enable_msi_range, this driver was only requesting a single MSI vector anyway. Signed-off-by: Christoph Hellwig Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/cobalt/cobalt-driver.c | 8 ++------ drivers/media/pci/cobalt/cobalt-driver.h | 2 -- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/media/pci/cobalt/cobalt-driver.c b/drivers/media/pci/cobalt/cobalt-driver.c index 979634000597..d5c911c09e2b 100644 --- a/drivers/media/pci/cobalt/cobalt-driver.c +++ b/drivers/media/pci/cobalt/cobalt-driver.c @@ -308,9 +308,7 @@ static void cobalt_pci_iounmap(struct cobalt *cobalt, struct pci_dev *pci_dev) static void cobalt_free_msi(struct cobalt *cobalt, struct pci_dev *pci_dev) { free_irq(pci_dev->irq, (void *)cobalt); - - if (cobalt->msi_enabled) - pci_disable_msi(pci_dev); + pci_free_irq_vectors(pci_dev); } static int cobalt_setup_pci(struct cobalt *cobalt, struct pci_dev *pci_dev, @@ -387,14 +385,12 @@ static int cobalt_setup_pci(struct cobalt *cobalt, struct pci_dev *pci_dev, from being generated. */ cobalt_set_interrupt(cobalt, false); - if (pci_enable_msi_range(pci_dev, 1, 1) < 1) { + if (pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_MSI) < 1) { cobalt_err("Could not enable MSI\n"); - cobalt->msi_enabled = false; ret = -EIO; goto err_release; } msi_config_show(cobalt, pci_dev); - cobalt->msi_enabled = true; /* Register IRQ */ if (request_irq(pci_dev->irq, cobalt_irq_handler, IRQF_SHARED, diff --git a/drivers/media/pci/cobalt/cobalt-driver.h b/drivers/media/pci/cobalt/cobalt-driver.h index ed00dc9d9399..00f773ec359a 100644 --- a/drivers/media/pci/cobalt/cobalt-driver.h +++ b/drivers/media/pci/cobalt/cobalt-driver.h @@ -287,8 +287,6 @@ struct cobalt { u32 irq_none; u32 irq_full_fifo; - bool msi_enabled; - /* omnitek dma */ int dma_channels; int first_fifo_channel; -- cgit From 48775cb73c2e26b7ca9d679875a6e570c8b8e124 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 15 Dec 2016 19:51:07 -0200 Subject: [media] pctv452e: move buffer to heap, no mutex commit 73d5c5c864f4 ("[media] pctv452e: don't do DMA on stack") caused a NULL pointer dereference which occurs when dvb_usb_init() calls dvb_usb_device_power_ctrl() for the first time, before the frontend has been attached. It also caused a recursive deadlock because tt3650_ci_msg_locked() has already locked the mutex. So, partially revert it, but move the buffer to the heap (DMA capable), not to the stack (may not be DMA capable). Instead of sharing one buffer which needs mutex protection, do a new heap allocation for each call. Fixes: commit 73d5c5c864f4 ("[media] pctv452e: don't do DMA on stack") Cc: stable@vger.kernel.org # For Kernel 4.9 Signed-off-by: Max Kellermann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/pctv452e.c | 133 +++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index 07fa08be9e99..d54ebe7e0215 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -97,14 +97,13 @@ struct pctv452e_state { u8 c; /* transaction counter, wraps around... */ u8 initialized; /* set to 1 if 0x15 has been sent */ u16 last_rc_key; - - unsigned char data[80]; }; static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, unsigned int write_len, unsigned int read_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; unsigned int rlen; int ret; @@ -114,36 +113,39 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, return -EIO; } - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = cmd; - state->data[3] = write_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = cmd; + buf[3] = write_len; - memcpy(state->data + 4, data, write_len); + memcpy(buf + 4, data, write_len); rlen = (read_len > 0) ? 64 : 0; - ret = dvb_usb_generic_rw(d, state->data, 4 + write_len, - state->data, rlen, /* delay_ms */ 0); + ret = dvb_usb_generic_rw(d, buf, 4 + write_len, + buf, rlen, /* delay_ms */ 0); if (0 != ret) goto failed; ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; - memcpy(data, state->data + 4, read_len); + memcpy(data, buf + 4, read_len); - mutex_unlock(&state->ca_mutex); + kfree(buf); return 0; failed: err("CI error %d; %02X %02X %02X -> %*ph.", - ret, SYNC_BYTE_OUT, id, cmd, 3, state->data); + ret, SYNC_BYTE_OUT, id, cmd, 3, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -410,53 +412,57 @@ static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *rcv_buf, u8 rcv_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; int ret; - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; ret = -EINVAL; if (snd_len > 64 - 7 || rcv_len > 64 - 7) goto failed; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_I2C; - state->data[3] = snd_len + 3; - state->data[4] = addr << 1; - state->data[5] = snd_len; - state->data[6] = rcv_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = PCTV_CMD_I2C; + buf[3] = snd_len + 3; + buf[4] = addr << 1; + buf[5] = snd_len; + buf[6] = rcv_len; - memcpy(state->data + 7, snd_buf, snd_len); + memcpy(buf + 7, snd_buf, snd_len); - ret = dvb_usb_generic_rw(d, state->data, 7 + snd_len, - state->data, /* rcv_len */ 64, + ret = dvb_usb_generic_rw(d, buf, 7 + snd_len, + buf, /* rcv_len */ 64, /* delay_ms */ 0); if (ret < 0) goto failed; /* TT USB protocol error. */ ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; /* I2C device didn't respond as expected. */ ret = -EREMOTEIO; - if (state->data[5] < snd_len || state->data[6] < rcv_len) + if (buf[5] < snd_len || buf[6] < rcv_len) goto failed; - memcpy(rcv_buf, state->data + 7, rcv_len); - mutex_unlock(&state->ca_mutex); + memcpy(rcv_buf, buf + 7, rcv_len); + kfree(buf); return rcv_len; failed: err("I2C error %d; %02X %02X %02X %02X %02X -> %*ph", ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len, - 7, state->data); + 7, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -505,7 +511,7 @@ static u32 pctv452e_i2c_func(struct i2c_adapter *adapter) static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 *rx; + u8 *b0, *rx; int ret; info("%s: %d\n", __func__, i); @@ -516,11 +522,12 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) if (state->initialized) return 0; - rx = kmalloc(PCTV_ANSWER_LEN, GFP_KERNEL); - if (!rx) + b0 = kmalloc(5 + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b0) return -ENOMEM; - mutex_lock(&state->ca_mutex); + rx = b0 + 5; + /* hmm where shoud this should go? */ ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE); if (ret != 0) @@ -528,66 +535,70 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) __func__, ret); /* this is a one-time initialization, dont know where to put */ - state->data[0] = 0xaa; - state->data[1] = state->c++; - state->data[2] = PCTV_CMD_RESET; - state->data[3] = 1; - state->data[4] = 0; + b0[0] = 0xaa; + b0[1] = state->c++; + b0[2] = PCTV_CMD_RESET; + b0[3] = 1; + b0[4] = 0; /* reset board */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; - state->data[1] = state->c++; - state->data[4] = 1; + b0[1] = state->c++; + b0[4] = 1; /* reset board (again?) */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; state->initialized = 1; ret: - mutex_unlock(&state->ca_mutex); - kfree(rx); + kfree(b0); return ret; } static int pctv452e_rc_query(struct dvb_usb_device *d) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *b, *rx; int ret, i; u8 id; - mutex_lock(&state->ca_mutex); + b = kmalloc(CMD_BUFFER_SIZE + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b) + return -ENOMEM; + + rx = b + CMD_BUFFER_SIZE; + id = state->c++; /* prepare command header */ - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_IR; - state->data[3] = 0; + b[0] = SYNC_BYTE_OUT; + b[1] = id; + b[2] = PCTV_CMD_IR; + b[3] = 0; /* send ir request */ - ret = dvb_usb_generic_rw(d, state->data, 4, - state->data, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0); if (ret != 0) goto ret; if (debug > 3) { - info("%s: read: %2d: %*ph: ", __func__, ret, 3, state->data); - for (i = 0; (i < state->data[3]) && ((i + 3) < PCTV_ANSWER_LEN); i++) - info(" %02x", state->data[i + 3]); + info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx); + for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++) + info(" %02x", rx[i+3]); info("\n"); } - if ((state->data[3] == 9) && (state->data[12] & 0x01)) { + if ((rx[3] == 9) && (rx[12] & 0x01)) { /* got a "press" event */ - state->last_rc_key = RC_SCANCODE_RC5(state->data[7], state->data[6]); + state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]); if (debug > 2) info("%s: cmd=0x%02x sys=0x%02x\n", - __func__, state->data[6], state->data[7]); + __func__, rx[6], rx[7]); rc_keydown(d->rc_dev, RC_TYPE_RC5, state->last_rc_key, 0); } else if (state->last_rc_key) { @@ -595,7 +606,7 @@ static int pctv452e_rc_query(struct dvb_usb_device *d) state->last_rc_key = 0; } ret: - mutex_unlock(&state->ca_mutex); + kfree(b); return ret; } -- cgit From aff808e813fc2d311137754165cf53d4ee6ddcc2 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:17 -0200 Subject: [media] v4l: tvp5150: Reset device at probe time, not in get/set format handlers The tvp5150 doesn't support format setting through the subdev pad API and thus implements the set format handler as a get format operation. The single handler, tvp5150_fill_fmt(), resets the device by calling tvp5150_reset(). This causes malfunction as the device can be reset at will, possibly from userspace when the subdev userspace API is enabled. The reset call was added in commit ec2c4f3f93cb ("[media] media: tvp5150: Add mbus_fmt callbacks"), probably as an attempt to set the device to a known state before detecting the current TV standard. However, the get format handler doesn't access the hardware to get the TV standard since commit 963ddc63e20d ("[media] media: tvp5150: Add cropping support"). There is thus no need to reset the device when getting the format. However, removing the tvp5150_reset() from the get/set format handlers results in the function not being called at all if the bridge driver doesn't use the .reset() operation. The operation is nowadays abused and shouldn't be used, so shouldn't expect bridge drivers to call it. To make sure the device is properly initialize, move the reset call from the format handlers to the probe function. Cc: stable@vger.kernel.org # For Kernel 4.5 and upper Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/tvp5150.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 3a0fe8cc64e9..a30bfcb4eec6 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -861,8 +861,6 @@ static int tvp5150_fill_fmt(struct v4l2_subdev *sd, f = &format->format; - tvp5150_reset(sd, 0); - f->width = decoder->rect.width; f->height = decoder->rect.height / 2; @@ -1524,7 +1522,6 @@ static int tvp5150_probe(struct i2c_client *c, res = core->hdl.error; goto err; } - v4l2_ctrl_handler_setup(&core->hdl); /* Default is no cropping */ core->rect.top = 0; @@ -1535,6 +1532,8 @@ static int tvp5150_probe(struct i2c_client *c, core->rect.left = 0; core->rect.width = TVP5150_H_MAX; + tvp5150_reset(sd, 0); /* Calls v4l2_ctrl_handler_setup() */ + res = v4l2_async_register_subdev(sd); if (res < 0) goto err; -- cgit From b4b2de386bbb6589d81596999d4a924928dc119b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:18 -0200 Subject: [media] v4l: tvp5150: Fix comment regarding output pin muxing The FID/GLCO/VLK/HVLK and INTREQ/GPCL/VBLK pins are muxed differently depending on whether the input is an S-Video or composite signal. The comment that explains the logic doesn't reflect the code. It appears that the comment is incorrect, as disabling the output data bus in composite mode makes no sense. Update the comment to match the code. While at it define macros for the MISC_CTL register bits, the code is too confusing with numerical values. Cc: stable@vger.kernel.org # For Kernel 4.5 and upper Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/tvp5150.c | 24 +++++++++++++++++------- drivers/media/i2c/tvp5150_reg.h | 9 +++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index a30bfcb4eec6..8852fa8c957b 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -291,8 +291,12 @@ static void tvp5150_selmux(struct v4l2_subdev *sd) tvp5150_write(sd, TVP5150_OP_MODE_CTL, opmode); tvp5150_write(sd, TVP5150_VD_IN_SRC_SEL_1, input); - /* Svideo should enable YCrCb output and disable GPCL output - * For Composite and TV, it should be the reverse + /* + * Setup the FID/GLCO/VLK/HVLK and INTREQ/GPCL/VBLK output signals. For + * S-Video we output the vertical lock (VLK) signal on FID/GLCO/VLK/HVLK + * and set INTREQ/GPCL/VBLK to logic 0. For composite we output the + * field indicator (FID) signal on FID/GLCO/VLK/HVLK and set + * INTREQ/GPCL/VBLK to logic 1. */ val = tvp5150_read(sd, TVP5150_MISC_CTL); if (val < 0) { @@ -301,9 +305,9 @@ static void tvp5150_selmux(struct v4l2_subdev *sd) } if (decoder->input == TVP5150_SVIDEO) - val = (val & ~0x40) | 0x10; + val = (val & ~TVP5150_MISC_CTL_GPCL) | TVP5150_MISC_CTL_HVLK; else - val = (val & ~0x10) | 0x40; + val = (val & ~TVP5150_MISC_CTL_HVLK) | TVP5150_MISC_CTL_GPCL; tvp5150_write(sd, TVP5150_MISC_CTL, val); }; @@ -455,7 +459,12 @@ static const struct i2c_reg_value tvp5150_init_enable[] = { },{ /* Automatic offset and AGC enabled */ TVP5150_ANAL_CHL_CTL, 0x15 },{ /* Activate YCrCb output 0x9 or 0xd ? */ - TVP5150_MISC_CTL, 0x6f + TVP5150_MISC_CTL, TVP5150_MISC_CTL_GPCL | + TVP5150_MISC_CTL_INTREQ_OE | + TVP5150_MISC_CTL_YCBCR_OE | + TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_VBLANK | + TVP5150_MISC_CTL_CLOCK_OE, },{ /* Activates video std autodetection for all standards */ TVP5150_AUTOSW_MSK, 0x0 },{ /* Default format: 0x47. For 4:2:2: 0x40 */ @@ -1050,11 +1059,12 @@ static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable) { struct tvp5150 *decoder = to_tvp5150(sd); /* Output format: 8-bit ITU-R BT.656 with embedded syncs */ - int val = 0x09; + int val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; /* Output format: 8-bit 4:2:2 YUV with discrete sync */ if (decoder->mbus_type == V4L2_MBUS_PARALLEL) - val = 0x0d; + val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE + | TVP5150_MISC_CTL_CLOCK_OE; /* Initializes TVP5150 to its default values */ /* # set PCLK (27MHz) */ diff --git a/drivers/media/i2c/tvp5150_reg.h b/drivers/media/i2c/tvp5150_reg.h index 25a994944918..30a48c28d05a 100644 --- a/drivers/media/i2c/tvp5150_reg.h +++ b/drivers/media/i2c/tvp5150_reg.h @@ -9,6 +9,15 @@ #define TVP5150_ANAL_CHL_CTL 0x01 /* Analog channel controls */ #define TVP5150_OP_MODE_CTL 0x02 /* Operation mode controls */ #define TVP5150_MISC_CTL 0x03 /* Miscellaneous controls */ +#define TVP5150_MISC_CTL_VBLK_GPCL BIT(7) +#define TVP5150_MISC_CTL_GPCL BIT(6) +#define TVP5150_MISC_CTL_INTREQ_OE BIT(5) +#define TVP5150_MISC_CTL_HVLK BIT(4) +#define TVP5150_MISC_CTL_YCBCR_OE BIT(3) +#define TVP5150_MISC_CTL_SYNC_OE BIT(2) +#define TVP5150_MISC_CTL_VBLANK BIT(1) +#define TVP5150_MISC_CTL_CLOCK_OE BIT(0) + #define TVP5150_AUTOSW_MSK 0x04 /* Autoswitch mask: TVP5150A / TVP5150AM */ /* Reserved 05h */ -- cgit From 79d6205a3f741c9fb89cfc47dfa0eddb1526726d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:19 -0200 Subject: [media] v4l: tvp5150: Don't override output pinmuxing at stream on/off time The s_stream() handler incorrectly writes the whole MISC_CTL register to enable or disable the outputs, overriding the output pinmuxing configuration. Fix it to only touch the output enable bits. The CONF_SHARED_PIN register is also written by the same function, resulting in muxing the INTREQ signal instead of the VBLK/GPCL signal on the INTREQ/GPCL/VBLK pin. As the driver doesn't support interrupts this is obviously incorrect, and breaks operation on other devices. Fix it by removing the write. Cc: stable@vger.kernel.org # For Kernel 4.5 and upper Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/tvp5150.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 8852fa8c957b..48646a7f3fb0 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -1058,22 +1058,27 @@ static const struct media_entity_operations tvp5150_sd_media_ops = { static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable) { struct tvp5150 *decoder = to_tvp5150(sd); - /* Output format: 8-bit ITU-R BT.656 with embedded syncs */ - int val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; - - /* Output format: 8-bit 4:2:2 YUV with discrete sync */ - if (decoder->mbus_type == V4L2_MBUS_PARALLEL) - val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE - | TVP5150_MISC_CTL_CLOCK_OE; + int val; - /* Initializes TVP5150 to its default values */ - /* # set PCLK (27MHz) */ - tvp5150_write(sd, TVP5150_CONF_SHARED_PIN, 0x00); + /* Enable or disable the video output signals. */ + val = tvp5150_read(sd, TVP5150_MISC_CTL); + if (val < 0) + return val; + + val &= ~(TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_CLOCK_OE); + + if (enable) { + /* + * Enable the YCbCr and clock outputs. In discrete sync mode + * (non-BT.656) additionally enable the the sync outputs. + */ + val |= TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; + if (decoder->mbus_type == V4L2_MBUS_PARALLEL) + val |= TVP5150_MISC_CTL_SYNC_OE; + } - if (enable) - tvp5150_write(sd, TVP5150_MISC_CTL, val); - else - tvp5150_write(sd, TVP5150_MISC_CTL, 0x00); + tvp5150_write(sd, TVP5150_MISC_CTL, val); return 0; } -- cgit From 4dd19196c5539c377beaa9850fac30c18318c7a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Dec 2016 09:36:29 -0200 Subject: [media] dvb: avoid warning in dvb_net With gcc-5 or higher on x86, we can get a bogus warning in the dvb-net code: drivers/media/dvb-core/dvb_net.c: In function 'dvb_net_ule': arch/x86/include/asm/string_32.h:78:22: error: '*((void *)&dest_addr+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem here is that gcc doesn't track all of the conditions to prove it can't end up copying uninitialized data. This changes the logic around so we zero out the destination address earlier when we determine that it is not set here. This allows the compiler to figure it out. Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_net.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index bd833b0824c6..eb60cb1442f2 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -719,6 +719,9 @@ static void dvb_net_ule_check_crc(struct dvb_net_ule_handle *h, skb_copy_from_linear_data(h->priv->ule_skb, dest_addr, ETH_ALEN); skb_pull(h->priv->ule_skb, ETH_ALEN); + } else { + /* dest_addr buffer is only valid if h->priv->ule_dbit == 0 */ + eth_zero_addr(dest_addr); } /* Handle ULE Extension Headers. */ @@ -750,16 +753,8 @@ static void dvb_net_ule_check_crc(struct dvb_net_ule_handle *h, if (!h->priv->ule_bridged) { skb_push(h->priv->ule_skb, ETH_HLEN); h->ethh = (struct ethhdr *)h->priv->ule_skb->data; - if (!h->priv->ule_dbit) { - /* - * dest_addr buffer is only valid if - * h->priv->ule_dbit == 0 - */ - memcpy(h->ethh->h_dest, dest_addr, ETH_ALEN); - eth_zero_addr(h->ethh->h_source); - } else /* zeroize source and dest */ - memset(h->ethh, 0, ETH_ALEN * 2); - + memcpy(h->ethh->h_dest, dest_addr, ETH_ALEN); + eth_zero_addr(h->ethh->h_source); h->ethh->h_proto = htons(h->priv->ule_sndu_type); } /* else: skb is in correct state; nothing to do. */ -- cgit From c739c0a7c3c2472d7562b8f802cdce44d2597c8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Dec 2016 09:41:29 -0200 Subject: [media] s5k4ecgx: select CRC32 helper A rare randconfig build failure shows up in this driver when the CRC32 helper is not there: drivers/media/built-in.o: In function `s5k4ecgx_s_power': s5k4ecgx.c:(.text+0x9eb4): undefined reference to `crc32_le' This adds the 'select' that all other users of this function have. Fixes: 8b99312b7214 ("[media] Add v4l2 subdev driver for S5K4ECGX sensor") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index b31fa6fae009..b979ea148251 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -655,6 +655,7 @@ config VIDEO_S5K6A3 config VIDEO_S5K4ECGX tristate "Samsung S5K4ECGX sensor support" depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + select CRC32 ---help--- This is a V4L2 sensor-level driver for Samsung S5K4ECGX 5M camera sensor with an embedded SoC image signal processor. -- cgit From 4bbc84ffd137fe43d68aa633d317b0a96de8a828 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 19 Dec 2016 19:17:08 -0800 Subject: sparc: use symbolic names for tsb indexing Use symbolic names MM_TSB_BASE and MM_TSB_HUGE instead of numeric values 0 and 1 in __tsb_context_switch. Code cleanup only, no functional change. Suggested-by: Sam Ravnborg Signed-off-by: Mike Kravetz Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index b84be675e507..d0317993e947 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -35,15 +35,15 @@ void __tsb_context_switch(unsigned long pgd_pa, static inline void tsb_context_switch(struct mm_struct *mm) { __tsb_context_switch(__pa(mm->pgd), - &mm->context.tsb_block[0], + &mm->context.tsb_block[MM_TSB_BASE], #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - (mm->context.tsb_block[1].tsb ? - &mm->context.tsb_block[1] : + (mm->context.tsb_block[MM_TSB_HUGE].tsb ? + &mm->context.tsb_block[MM_TSB_HUGE] : NULL) #else NULL #endif - , __pa(&mm->context.tsb_descr[0])); + , __pa(&mm->context.tsb_descr[MM_TSB_BASE])); } void tsb_grow(struct mm_struct *mm, -- cgit From 2f884e6e688a0deb69e6c9552e51aef8b7e3f5f1 Mon Sep 17 00:00:00 2001 From: "Strashko, Grygorii" Date: Thu, 8 Dec 2016 17:33:10 -0600 Subject: irqchip/keystone: Fix "scheduling while atomic" on rt The below call chain generates "scheduling while atomic" backtrace and causes system crash when Keystone 2 IRQ chip driver is used with RT-kernel: gic_handle_irq() |-__handle_domain_irq() |-generic_handle_irq() |-keystone_irq_handler() |-regmap_read() |-regmap_lock_spinlock() |-rt_spin_lock() The reason is that Keystone driver dispatches IRQ using chained IRQ handler and accesses I/O memory through syscon->regmap(mmio) which is implemented as fast_io regmap and uses regular spinlocks for synchronization, but spinlocks transformed to rt_mutexes on RT. Hence, convert Keystone 2 IRQ driver to use generic irq handler instead of chained IRQ handler. This way it will be compatible with RT kernel where it will be forced thread IRQ handler while in non-RT kernel it still will be executed in HW IRQ context. Cc: Suman Anna Signed-off-by: Grygorii Strashko Tested-by: Suman Anna Link: https://lkml.kernel.org/r/20161208233310.10329-1-grygorii.strashko@ti.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-keystone.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index 54a5e870a8f5..efbcf8435185 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -19,9 +19,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -39,6 +39,7 @@ struct keystone_irq_device { struct irq_domain *irqd; struct regmap *devctrl_regs; u32 devctrl_offset; + raw_spinlock_t wa_lock; }; static inline u32 keystone_irq_readl(struct keystone_irq_device *kirq) @@ -83,17 +84,15 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(struct irq_desc *desc) +static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq) { - unsigned int irq = irq_desc_get_irq(desc); - struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); + struct keystone_irq_device *kirq = keystone_irq; + unsigned long wa_lock_flags; unsigned long pending; int src, virq; dev_dbg(kirq->dev, "start irq %d\n", irq); - chained_irq_enter(irq_desc_get_chip(desc), desc); - pending = keystone_irq_readl(kirq); keystone_irq_writel(kirq, pending); @@ -111,13 +110,15 @@ static void keystone_irq_handler(struct irq_desc *desc) if (!virq) dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n", src, virq); + raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags); generic_handle_irq(virq); + raw_spin_unlock_irqrestore(&kirq->wa_lock, + wa_lock_flags); } } - chained_irq_exit(irq_desc_get_chip(desc), desc); - dev_dbg(kirq->dev, "end irq %d\n", irq); + return IRQ_HANDLED; } static int keystone_irq_map(struct irq_domain *h, unsigned int virq, @@ -182,9 +183,16 @@ static int keystone_irq_probe(struct platform_device *pdev) return -ENODEV; } + raw_spin_lock_init(&kirq->wa_lock); + platform_set_drvdata(pdev, kirq); - irq_set_chained_handler_and_data(kirq->irq, keystone_irq_handler, kirq); + ret = request_irq(kirq->irq, keystone_irq_handler, + 0, dev_name(dev), kirq); + if (ret) { + irq_domain_remove(kirq->irqd); + return ret; + } /* clear all source bits */ keystone_irq_writel(kirq, ~0x0); @@ -199,6 +207,8 @@ static int keystone_irq_remove(struct platform_device *pdev) struct keystone_irq_device *kirq = platform_get_drvdata(pdev); int hwirq; + free_irq(kirq->irq, kirq); + for (hwirq = 0; hwirq < KEYSTONE_N_IRQ; hwirq++) irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq)); -- cgit From 88e20c74ee020f9e0c99dfce0dd9aa61c3f0cca0 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 27 Dec 2016 18:29:57 +0000 Subject: irqchip/mxs: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND The ICOLL controller doesn't provide any facility to configure the wakeup sources. That's the reason why this implementation lacks the irq_set_wake implementation. But this prevent us from properly entering power management states like "suspend to idle". So enable the flags IRQCHIP_SKIP_SET_WAKE and IRQCHIP_MASK_ON_SUSPEND to let the irqchip core allows and handles the power management. Signed-off-by: Stefan Wahren Reviewed-by: Fabio Estevam Link: https://lkml.kernel.org/r/1482863397-11400-1-git-send-email-stefan.wahren@i2se.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-mxs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 17304705f2cf..05fa9f7af53c 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -131,12 +131,16 @@ static struct irq_chip mxs_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = icoll_mask_irq, .irq_unmask = icoll_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; static struct irq_chip asm9260_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = asm9260_mask_irq, .irq_unmask = asm9260_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) -- cgit From 4ea33ef0f9e95b69db9131d7afd98563713e81b0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 27 Dec 2016 08:51:17 +0100 Subject: batman-adv: Decrease hardif refcnt on fragmentation send error An error before the hardif is found has to free the skb. But every error after that has to free the skb + put the hard interface. Fixes: 8def0be82dd1 ("batman-adv: Consume skb in batadv_frag_send_packet") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 9c561e683f4b..0854ebd8613e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -474,7 +474,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) { ret = -EINVAL; - goto put_primary_if; + goto free_skb; } /* Create one header to be copied to all fragments */ @@ -502,7 +502,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, skb_fragment = batadv_frag_create(skb, &frag_header, mtu); if (!skb_fragment) { ret = -ENOMEM; - goto free_skb; + goto put_primary_if; } batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); @@ -511,7 +511,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, ret = batadv_send_unicast_skb(skb_fragment, neigh_node); if (ret != NET_XMIT_SUCCESS) { ret = NET_XMIT_DROP; - goto free_skb; + goto put_primary_if; } frag_header.no++; @@ -519,7 +519,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, /* The initial check in this function should cover this case */ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { ret = -EINVAL; - goto free_skb; + goto put_primary_if; } } @@ -527,7 +527,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, if (batadv_skb_head_push(skb, header_size) < 0 || pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { ret = -ENOMEM; - goto free_skb; + goto put_primary_if; } memcpy(skb->data, &frag_header, header_size); -- cgit From e0edc8c546463f268d41d064d855bcff994c52fa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2017 11:48:50 -0500 Subject: libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marko reports that CX1-JB512-HP shows the same timeout issues as CX1-JB256-HP. Let's apply MAX_SEC_128 to all devices in the series. Signed-off-by: Tejun Heo Reported-by: Marko Koski-Vähälä Cc: stable@vger.kernel.org # v3.19+ --- drivers/ata/libata-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 9cd0a2d41816..a2bd5acc8a89 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4356,10 +4356,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, /* - * Device times out with higher max sects. + * These devices time out with higher max sects. * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ - { "LITEON CX1-JB256-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ -- cgit From fd7c99142d77dc4a851879a66715abf12a3193fb Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 6 Jan 2017 17:54:51 +0000 Subject: tile/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Cc: stable@vger.kernel.org Signed-off-by: Dave Martin Signed-off-by: Chris Metcalf --- arch/tile/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index d89b7011667c..e279572824b1 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -111,7 +111,7 @@ static int tile_gpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - struct pt_regs regs; + struct pt_regs regs = *task_pt_regs(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, sizeof(regs)); -- cgit From 2dae99558e86894e9e5dbf097477baaa5eb70134 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 19 Dec 2016 10:17:40 +0900 Subject: libata: Fix ATA request sense For an ATA device supporting the sense data reporting feature set, a failed command will trigger the execution of ata_eh_request_sense if the result task file of the failed command has the ATA_SENSE bit set (sense data available bit). ata_eh_request_sense executes the REQUEST SENSE DATA EXT command to retrieve the sense data of the failed command. On success of REQUEST SENSE DATA EXT, the ATA_SENSE bit will NOT be set (the command succeeded) but ata_eh_request_sense nevertheless tests the availability of sense data by testing that bit presence in the result tf of the REQUEST SENSE DATA EXT command. This leads us to falsely assume that request sense data failed and to the warning message: atax.xx: request sense failed stat 50 emask 0 Upon success of REQUEST SENSE DATA EXT, set the ATA_SENSE bit in the result task file command so that sense data can be returned by ata_eh_request_sense. Signed-off-by: Damien Le Moal Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index a2bd5acc8a89..c2d3785ec227 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1702,6 +1702,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; + } else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) { + qc->result_tf.command |= ATA_SENSE; } /* finish up */ -- cgit From 064c3db9c564cc5be514ac21fb4aa26cc33db746 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 12 Dec 2016 23:13:27 +0530 Subject: ata: sata_mv:- Handle return value of devm_ioremap. Here, If devm_ioremap will fail. It will return NULL. Then hpriv->base = NULL - 0x20000; Kernel can run into a NULL-pointer dereference. This error check will avoid NULL pointer dereference. Signed-off-by: Arvind Yadav Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/sata_mv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 823e938c9a78..2f32782cea6d 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -4132,6 +4132,9 @@ static int mv_platform_probe(struct platform_device *pdev) host->iomap = NULL; hpriv->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!hpriv->base) + return -ENOMEM; + hpriv->base -= SATAHC0_REG_BASE; hpriv->clk = clk_get(&pdev->dev, NULL); -- cgit From 3e70c5d6ea510e38f612d07fa0fd7487277b7087 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Mon, 26 Dec 2016 14:52:23 +0100 Subject: drm/i915/gvt: verify functions types in new_mmio_info() The current prototype of new_mmio_info() uses void* for parameters read and write, which are functions with precise calling conventions (argument types and return type). Write down these conventions in new_mmio_info() definition. This has been reported by the following warnings when clang is used to build the kernel: drivers/gpu/drm/i915/gvt/handlers.c:124:21: error: pointer type mismatch ('void *' and 'int (*)(struct intel_vgpu *, unsigned int, void *, unsigned int)') [-Werror,-Wpointer-type-mismatch] info->read = read ? read : intel_vgpu_default_mmio_read; ^ ~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gvt/handlers.c:125:23: error: pointer type mismatch ('void *' and 'int (*)(struct intel_vgpu *, unsigned int, void *, unsigned int)') [-Werror,-Wpointer-type-mismatch] info->write = write ? write : intel_vgpu_default_mmio_write; ^ ~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This allows the compiler to detect that sbi_ctl_mmio_write() returns a "bool" value instead of an expected "int" one. Fix this. Signed-off-by: Nicolas Iooss Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 522809710312..052e57124c0a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -93,7 +93,8 @@ static void write_vreg(struct intel_vgpu *vgpu, unsigned int offset, static int new_mmio_info(struct intel_gvt *gvt, u32 offset, u32 flags, u32 size, u32 addr_mask, u32 ro_mask, u32 device, - void *read, void *write) + int (*read)(struct intel_vgpu *, unsigned int, void *, unsigned int), + int (*write)(struct intel_vgpu *, unsigned int, void *, unsigned int)) { struct intel_gvt_mmio_info *info, *p; u32 start, end, i; @@ -974,7 +975,7 @@ static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -static bool sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, +static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 data; -- cgit From a12010534d0984f91bc5bdcf9e27bd55e20d82da Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Dec 2016 13:24:52 +0800 Subject: drm/i915/gvt: fix error handing of tlb_control emulation Return ealier for a invalid access, else it would false set tlb flag for RCS. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 052e57124c0a..e06d5f37bb92 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1367,7 +1367,6 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - int rc = 0; unsigned int id = 0; write_vreg(vgpu, offset, p_data, bytes); @@ -1390,12 +1389,11 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, id = VECS; break; default: - rc = -EINVAL; - break; + return -EINVAL; } set_bit(id, (void *)vgpu->tlb_handle_pending); - return rc; + return 0; } static int ring_reset_ctl_write(struct intel_vgpu *vgpu, -- cgit From 39762ad437f1149b904e6baeaf28824da34a89c1 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 27 Dec 2016 13:25:06 +0800 Subject: drm/i915/gvt: fix return value in mul_force_wake_write All mmio handlers should return a negetive value for failure, not 1. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e06d5f37bb92..8cbaf1c83720 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -220,7 +220,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, default: /*should not hit here*/ gvt_err("invalid forcewake offset 0x%x\n", offset); - return 1; + return -EINVAL; } } else { ack_reg_offset = FORCEWAKE_ACK_HSW_REG; -- cgit From 905a5035ebe79e89712cda0bed1887c73aa8e9bb Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 30 Dec 2016 14:10:53 +0800 Subject: drm/i915/gvt: always use readq and writeq The readq and writeq are already offered by drm_os_linux.h. So we can use them directly whithout dectecting their presence. This patch removed the duplicated code. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6c5fdf5b2ce2..a32e59de0eff 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -240,15 +240,8 @@ static inline int get_pse_type(int type) static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) { void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; - u64 pte; -#ifdef readq - pte = readq(addr); -#else - pte = ioread32(addr); - pte |= (u64)ioread32(addr + 4) << 32; -#endif - return pte; + return readq(addr); } static void write_pte64(struct drm_i915_private *dev_priv, @@ -256,12 +249,8 @@ static void write_pte64(struct drm_i915_private *dev_priv, { void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; -#ifdef writeq writeq(pte, addr); -#else - iowrite32((u32)pte, addr); - iowrite32(pte >> 32, addr + 4); -#endif + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); } -- cgit From 901a14b721feef1b37cfe6362ee103e135133677 Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Wed, 4 Jan 2017 22:32:23 +0800 Subject: drm/i915/gvt: print correct value for untracked mmio In function intel_vgpu_emulate_mmio_read, the untracked mmio register is dumped through kernel log, but the register value is not correct. This patch fixes this issue. V2: fix the fromat warning from checkpatch.pl. Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 09c9450a1946..e60701397ac2 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -125,25 +125,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, if (WARN_ON(!reg_is_mmio(gvt, offset + bytes - 1))) goto err; - mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); - if (!mmio && !vgpu->mmio.disable_warn_untrack) { - gvt_err("vgpu%d: read untracked MMIO %x len %d val %x\n", - vgpu->id, offset, bytes, *(u32 *)p_data); - - if (offset == 0x206c) { - gvt_err("------------------------------------------\n"); - gvt_err("vgpu%d: likely triggers a gfx reset\n", - vgpu->id); - gvt_err("------------------------------------------\n"); - vgpu->mmio.disable_warn_untrack = true; - } - } - if (!intel_gvt_mmio_is_unalign(gvt, offset)) { if (WARN_ON(!IS_ALIGNED(offset, bytes))) goto err; } + mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); if (mmio) { if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) { if (WARN_ON(offset + bytes > mmio->offset + mmio->size)) @@ -152,9 +139,23 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, goto err; } ret = mmio->read(vgpu, offset, p_data, bytes); - } else + } else { ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); + if (!vgpu->mmio.disable_warn_untrack) { + gvt_err("vgpu%d: read untracked MMIO %x(%dB) val %x\n", + vgpu->id, offset, bytes, *(u32 *)p_data); + + if (offset == 0x206c) { + gvt_err("------------------------------------------\n"); + gvt_err("vgpu%d: likely triggers a gfx reset\n", + vgpu->id); + gvt_err("------------------------------------------\n"); + vgpu->mmio.disable_warn_untrack = true; + } + } + } + if (ret) goto err; -- cgit From 888530b57f88f2bc856f181479df732c9622fa22 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 5 Jan 2017 10:26:13 +0800 Subject: drm/i915/gvt: adjust high memory size for default vGPU type Previous high mem size initialized for vGPU type was too small which caused failure for some VMs. This trys to take minimal value of 384MB for each VM and enlarge default high mem size to make guest driver happy. Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/vgpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 536d2b9d5777..398abb98dd0a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -177,7 +177,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) if (low_avail / min_low == 0) break; gvt->types[i].low_gm_size = min_low; - gvt->types[i].high_gm_size = 3 * gvt->types[i].low_gm_size; + gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U)); gvt->types[i].fence = 4; gvt->types[i].max_instance = low_avail / min_low; gvt->types[i].avail_instance = gvt->types[i].max_instance; @@ -217,7 +217,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) */ low_gm_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE - gvt->gm.vgpu_allocated_low_gm_size; - high_gm_avail = MB_TO_BYTES(256) * 3 - HOST_HIGH_GM_SIZE - + high_gm_avail = MB_TO_BYTES(256) * 8UL - HOST_HIGH_GM_SIZE - gvt->gm.vgpu_allocated_high_gm_size; fence_avail = gvt_fence_sz(gvt) - HOST_FENCE - gvt->fence.vgpu_allocated_fence_num; -- cgit From 2fcdb66364ee467d69228a3d2ea074498c177211 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 5 Jan 2017 10:26:24 +0800 Subject: drm/i915/gvt: remove duplicated definition Remove duplicated definition for resource size in aperture_gm.c which are already defined in gvt.h. Need only one to take effect. Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 0d41ebc4aea6..65200313515c 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -37,13 +37,6 @@ #include "i915_drv.h" #include "gvt.h" -#define MB_TO_BYTES(mb) ((mb) << 20ULL) -#define BYTES_TO_MB(b) ((b) >> 20ULL) - -#define HOST_LOW_GM_SIZE MB_TO_BYTES(128) -#define HOST_HIGH_GM_SIZE MB_TO_BYTES(384) -#define HOST_FENCE 4 - static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) { struct intel_gvt *gvt = vgpu->gvt; -- cgit From 2e51ef32b0d66fcd5fe45c437cf7c6aef8350746 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 5 Jan 2017 13:28:05 +0800 Subject: drm/i915/gvt: fix use after free for workload In the function workload_thread(), we invoke complete_current_workload() to cleanup the just processed workload (workload will be freed there). So we cannot access workload->req after that. This patch move complete_current_workload() afterward. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 4db242250235..c694dd039f3b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -459,11 +459,11 @@ complete: gvt_dbg_sched("will complete workload %p\n, status: %d\n", workload, workload->status); - complete_current_workload(gvt, ring_id); - if (workload->req) i915_gem_request_put(fetch_and_zero(&workload->req)); + complete_current_workload(gvt, ring_id); + if (need_force_wake) intel_uncore_forcewake_put(gvt->dev_priv, FORCEWAKE_ALL); -- cgit From 440a9b9fae37dfd7e4c7d76db34fada57f9afd92 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 5 Jan 2017 16:49:03 +0800 Subject: drm/i915/gvt: dec vgpu->running_workload_num after the workload is really done The vgpu->running_workload_num is used to determine whether a vgpu has any workload running or not. So we should make sure the workload is really done before we dec running_workload_num. Function complete_current_workload is not the right place to do it, since this function is still processing the workload. This patch move the dec op afterward. v2: move dec op before wake_up(&scheduler->workload_complete_wq) (Min He) Signed-off-by: Changbin Du Reviewed-by: Min He Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c694dd039f3b..e91885dffeff 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -350,13 +350,15 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload; + struct intel_vgpu *vgpu; int event; mutex_lock(&gvt->lock); workload = scheduler->current_workload[ring_id]; + vgpu = workload->vgpu; - if (!workload->status && !workload->vgpu->resetting) { + if (!workload->status && !vgpu->resetting) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); @@ -364,8 +366,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) for_each_set_bit(event, workload->pending_events, INTEL_GVT_EVENT_MAX) - intel_vgpu_trigger_virtual_event(workload->vgpu, - event); + intel_vgpu_trigger_virtual_event(vgpu, event); } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -373,11 +374,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id] = NULL; - atomic_dec(&workload->vgpu->running_workload_num); - list_del_init(&workload->list); workload->complete(workload); + atomic_dec(&vgpu->running_workload_num); wake_up(&scheduler->workload_complete_wq); mutex_unlock(&gvt->lock); } -- cgit From 59c0573dfbd5f66e3aa54c2ce0bebcb0953d4db4 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Fri, 6 Jan 2017 15:16:21 +0800 Subject: drm/i915/gvt: init/destroy vgpu_idr properly An idr should be initialized before use and destroyed afterwards. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 398877c3d2fd..e6bf5c533fbe 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -201,6 +201,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); intel_gvt_clean_vgpu_types(gvt); + idr_destroy(&gvt->vgpu_idr); + kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -237,6 +239,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) gvt_dbg_core("init gvt device\n"); + idr_init(&gvt->vgpu_idr); + mutex_init(&gvt->lock); gvt->dev_priv = dev_priv; @@ -244,7 +248,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) ret = intel_gvt_setup_mmio_info(gvt); if (ret) - return ret; + goto out_clean_idr; ret = intel_gvt_load_firmware(gvt); if (ret) @@ -313,6 +317,8 @@ out_free_firmware: intel_gvt_free_firmware(gvt); out_clean_mmio_info: intel_gvt_clean_mmio_info(gvt); +out_clean_idr: + idr_destroy(&gvt->vgpu_idr); kfree(gvt); return ret; } -- cgit From 4e5378918b5b96e6b93fcadf1ab84a8486ca60a1 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Fri, 6 Jan 2017 15:16:22 +0800 Subject: drm/i915/gvt: destroy the allocated idr on vgpu creating failures Once idr_alloc gets called data is allocated within the idr list, if any error occurs afterwards, we should undo that by idr_remove on the error path. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/vgpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 398abb98dd0a..f0e86123e45b 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -304,7 +304,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, ret = setup_vgpu_mmio(vgpu); if (ret) - goto out_free_vgpu; + goto out_clean_idr; ret = intel_vgpu_alloc_resource(vgpu, param); if (ret) @@ -355,6 +355,8 @@ out_clean_vgpu_resource: intel_vgpu_free_resource(vgpu); out_clean_vgpu_mmio: clean_vgpu_mmio(vgpu); +out_clean_idr: + idr_remove(&gvt->vgpu_idr, vgpu->id); out_free_vgpu: vfree(vgpu); mutex_unlock(&gvt->lock); -- cgit From 03551e971f6e52c8dedd5741bf48631e65675759 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Fri, 6 Jan 2017 15:16:23 +0800 Subject: drm/i915/gvt: cleanup opregion memory allocation code According to the spec, ACPI OpRegion must be placed at a physical address below 4G. That is, for a vGPU it must be associated with a GPA below 4G, but on host side, it doesn't matter where the backing pages actually are. So when allocating pages from host, the GFP_DMA32 flag is unnecessary. Also the allocation is from a sleepable context, so GFP_ATOMIC is also unnecessary. This patch also removes INTEL_GVT_OPREGION_PORDER and use get_order() instead. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/opregion.c | 8 ++++---- drivers/gpu/drm/i915/gvt/reg.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 81cd921770c6..d9fb41ab7119 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -36,9 +36,9 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) vgpu->id)) return -EINVAL; - vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_ATOMIC | - GFP_DMA32 | __GFP_ZERO, - INTEL_GVT_OPREGION_PORDER); + vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | + __GFP_ZERO, + get_order(INTEL_GVT_OPREGION_SIZE)); if (!vgpu_opregion(vgpu)->va) return -ENOMEM; @@ -97,7 +97,7 @@ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { map_vgpu_opregion(vgpu, false); free_pages((unsigned long)vgpu_opregion(vgpu)->va, - INTEL_GVT_OPREGION_PORDER); + get_order(INTEL_GVT_OPREGION_SIZE)); vgpu_opregion(vgpu)->va = NULL; } diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 0dfe789d8f02..fbd023a16f18 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -50,8 +50,7 @@ #define INTEL_GVT_OPREGION_PARM 0x204 #define INTEL_GVT_OPREGION_PAGES 2 -#define INTEL_GVT_OPREGION_PORDER 1 -#define INTEL_GVT_OPREGION_SIZE (2 * 4096) +#define INTEL_GVT_OPREGION_SIZE (INTEL_GVT_OPREGION_PAGES * PAGE_SIZE) #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) -- cgit From 5753394b64a07dd502cb288a5fd52e71fb01fc5d Mon Sep 17 00:00:00 2001 From: Jike Song Date: Fri, 6 Jan 2017 15:16:20 +0800 Subject: drm/i915/gvt/kvmgt: return meaningful error for vgpu creating failure The vgpu_create() routine we called returns meaningful errors to indicate failures, so we'd better to pass it to our caller, the mdev framework, whereby the sysfs is able to tell userspace what happened. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index faaae07ae487..0c9234a87a20 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -398,6 +398,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) struct intel_vgpu_type *type; struct device *pdev; void *gvt; + int ret; pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; @@ -406,13 +407,15 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) if (!type) { gvt_err("failed to find type %s to create\n", kobject_name(kobj)); - return -EINVAL; + ret = -EINVAL; + goto out; } vgpu = intel_gvt_ops->vgpu_create(gvt, type); if (IS_ERR_OR_NULL(vgpu)) { - gvt_err("create intel vgpu failed\n"); - return -EINVAL; + ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); + gvt_err("failed to create intel vgpu: %d\n", ret); + goto out; } INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work); @@ -422,7 +425,10 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", dev_name(mdev_dev(mdev))); - return 0; + ret = 0; + +out: + return ret; } static int intel_vgpu_remove(struct mdev_device *mdev) -- cgit From f0a8b49c03d22a511a601dc54b2a3425a41e35fa Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 30 Dec 2016 10:57:46 +0100 Subject: drm/bridge: analogix dp: Fix runtime PM state on driver bind Analogix_dp_bind() can be called from component framework, which doesn't guarantee proper runtime PM state of the device during bind operation, so ensure that device is runtime active before doing any register access. This ensures that the power domain, to which DP module belongs, is turned on. While at it, also fix the unbalanced call to phy_power_on() in analogix_dp_bind() function. This patch solves the following kernel oops on Samsung Exynos5250 Snow board: Unhandled fault: imprecise external abort (0x406) at 0x00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: : 406 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 75 Comm: kworker/0:2 Not tainted 4.9.0 #1046 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) Workqueue: events deferred_probe_work_func task: ee272300 task.stack: ee312000 PC is at analogix_dp_enable_sw_function+0x18/0x2c LR is at analogix_dp_init_dp+0x2c/0x50 ... [] (analogix_dp_enable_sw_function) from [] (analogix_dp_init_dp+0x2c/0x50) [] (analogix_dp_init_dp) from [] (analogix_dp_bind+0x184/0x42c) [] (analogix_dp_bind) from [] (component_bind_all+0xf0/0x218) [] (component_bind_all) from [] (exynos_drm_load+0x134/0x200) [] (exynos_drm_load) from [] (drm_dev_register+0xa0/0xd0) [] (drm_dev_register) from [] (drm_platform_init+0x58/0xb0) [] (drm_platform_init) from [] (try_to_bring_up_master+0x14c/0x188) [] (try_to_bring_up_master) from [] (component_add+0x88/0x138) [] (component_add) from [] (platform_drv_probe+0x50/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x1f0/0x2a8) [] (driver_probe_device) from [] (bus_for_each_drv+0x44/0x8c) [] (bus_for_each_drv) from [] (__device_attach+0x9c/0x100) [] (__device_attach) from [] (bus_probe_device+0x84/0x8c) [] (bus_probe_device) from [] (deferred_probe_work_func+0x60/0x8c) [] (deferred_probe_work_func) from [] (process_one_work+0x120/0x318) [] (process_one_work) from [] (process_scheduled_works+0x28/0x38) [] (process_scheduled_works) from [] (worker_thread+0x204/0x4ac) [] (worker_thread) from [] (kthread+0xd8/0xf4) [] (kthread) from [] (ret_from_fork+0x14/0x3c) Code: e59035f0 e5935018 f57ff04f e3c55001 (f57ff04e) ---[ end trace 3d1d0d87796de344 ]--- Reviewed-by: Sean Paul Signed-off-by: Marek Szyprowski Signed-off-by: Archit Taneja Link: http://patchwork.freedesktop.org/patch/msgid/1483091866-1088-1-git-send-email-m.szyprowski@samsung.com --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index eb9bf8786c24..18eefdcbf1ba 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1382,6 +1382,7 @@ int analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, pm_runtime_enable(dev); + pm_runtime_get_sync(dev); phy_power_on(dp->phy); analogix_dp_init_dp(dp); @@ -1414,9 +1415,15 @@ int analogix_dp_bind(struct device *dev, struct drm_device *drm_dev, goto err_disable_pm_runtime; } + phy_power_off(dp->phy); + pm_runtime_put(dev); + return 0; err_disable_pm_runtime: + + phy_power_off(dp->phy); + pm_runtime_put(dev); pm_runtime_disable(dev); return ret; -- cgit From 9631739f8196ec80b5d9bf955f79b711490c0205 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Mon, 9 Jan 2017 15:38:38 +0800 Subject: drm/i915/gvt: cleanup GFP flags In gvt, almost all memory allocations are in sleepable contexts. It's fault-prone to use GFP_ATOMIC everywhere. Replace it with GFP_KERNEL wherever possible. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index a32e59de0eff..3cf0df0bb391 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1369,8 +1369,7 @@ static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) info->gtt_entry_size; mem = kzalloc(mm->has_shadow_page_table ? mm->page_table_entry_size * 2 - : mm->page_table_entry_size, - GFP_ATOMIC); + : mm->page_table_entry_size, GFP_KERNEL); if (!mem) return -ENOMEM; mm->virtual_page_table = mem; @@ -1521,7 +1520,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm; int ret; - mm = kzalloc(sizeof(*mm), GFP_ATOMIC); + mm = kzalloc(sizeof(*mm), GFP_KERNEL); if (!mm) { ret = -ENOMEM; goto fail; @@ -1875,30 +1874,27 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; int page_entry_num = GTT_PAGE_SIZE >> vgpu->gvt->device_info.gtt_entry_size_shift; - struct page *scratch_pt; + void *scratch_pt; unsigned long mfn; int i; - void *p; if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) return -EINVAL; - scratch_pt = alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); + scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); if (!scratch_pt) { gvt_err("fail to allocate scratch page\n"); return -ENOMEM; } - p = kmap_atomic(scratch_pt); - mfn = intel_gvt_hypervisor_virt_to_mfn(p); + mfn = intel_gvt_hypervisor_virt_to_mfn(scratch_pt); if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_err("fail to translate vaddr:0x%llx\n", (u64)p); - kunmap_atomic(p); - __free_page(scratch_pt); + gvt_err("fail to translate vaddr:0x%lx\n", (unsigned long)scratch_pt); + free_page((unsigned long)scratch_pt); return -EFAULT; } gtt->scratch_pt[type].page_mfn = mfn; - gtt->scratch_pt[type].page = scratch_pt; + gtt->scratch_pt[type].page = virt_to_page(scratch_pt); gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", vgpu->id, type, mfn); @@ -1907,7 +1903,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, * scratch_pt[type] indicate the scratch pt/scratch page used by the * 'type' pt. * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by - * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scatch_pt it self + * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. */ if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { @@ -1925,11 +1921,9 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, se.val64 |= PPAT_CACHED_INDEX; for (i = 0; i < page_entry_num; i++) - ops->set_entry(p, &se, i, false, 0, vgpu); + ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); } - kunmap_atomic(p); - return 0; } @@ -2197,7 +2191,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, int intel_gvt_init_gtt(struct intel_gvt *gvt) { int ret; - void *page_addr; + void *page; gvt_dbg_core("init gtt\n"); @@ -2210,17 +2204,14 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) return -ENODEV; } - gvt->gtt.scratch_ggtt_page = - alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); - if (!gvt->gtt.scratch_ggtt_page) { + page = (void *)get_zeroed_page(GFP_KERNEL); + if (!page) { gvt_err("fail to allocate scratch ggtt page\n"); return -ENOMEM; } + gvt->gtt.scratch_ggtt_page = virt_to_page(page); - page_addr = page_address(gvt->gtt.scratch_ggtt_page); - - gvt->gtt.scratch_ggtt_mfn = - intel_gvt_hypervisor_virt_to_mfn(page_addr); + gvt->gtt.scratch_ggtt_mfn = intel_gvt_hypervisor_virt_to_mfn(page); if (gvt->gtt.scratch_ggtt_mfn == INTEL_GVT_INVALID_ADDR) { gvt_err("fail to translate scratch ggtt page\n"); __free_page(gvt->gtt.scratch_ggtt_page); -- cgit From cc31d43b4154ad5a7d8aa5543255a93b7e89edc2 Mon Sep 17 00:00:00 2001 From: Pau Espin Pedrol Date: Fri, 6 Jan 2017 20:33:27 +0100 Subject: netfilter: use fwmark_reflect in nf_send_reset Otherwise, RST packets generated by ipt_REJECT always have mark 0 when the routing is checked later in the same code path. Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies") Cc: Lorenzo Colitti Signed-off-by: Pau Espin Pedrol Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 2 ++ net/ipv6/netfilter/nf_reject_ipv6.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fd8220213afc..146d86105183 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); + skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, ip4_dst_hoplimit(skb_dst(nskb))); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 10090400c72f..eedee5d108d9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); + fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { @@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) skb_dst_set(nskb, dst); + nskb->mark = fl6.flowi6_mark; + skb_reserve(nskb, hh_len + dst->header_len); ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); -- cgit From bf99b4ded5f8a4767dbb9d180626f06c51f9881f Mon Sep 17 00:00:00 2001 From: Pau Espin Pedrol Date: Fri, 6 Jan 2017 20:33:28 +0100 Subject: tcp: fix mark propagation with fwmark_reflect enabled Otherwise, RST packets generated by the TCP stack for non-existing sockets always have mark 0. The mark from the original packet is assigned to the netns_ipv4/6 socket used to send the response so that it can get copied into the response skb when the socket sends it. Fixes: e110861f8609 ("net: add a sysctl to reflect the fwmark on replies") Cc: Lorenzo Colitti Signed-off-by: Pau Espin Pedrol Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_output.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fac275c48108..b67719f45953 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1629,6 +1629,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 73bc8fc68acd..2b20622a5824 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -840,6 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); + ctl_sk->sk_mark = fl6.flowi6_mark; ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) -- cgit From 68f458eec7069d618a6c884ca007426e0cea411b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 9 Jan 2017 16:31:58 +0200 Subject: drm: Schedule the output_poll_work with 1s delay if we have delayed event Instead of scheduling the work to handle the initial delayed event, use 1s delay. This delay should not be needed, but Optimus/nouveau will fail in a mysterious way if the delayed event is handled as soon as possible like it is done in drm_helper_probe_single_connector_modes() in case the poll was enabled before. Reverting 339fd36238dd would give back the 10 sec (!) delay to handle the delayed event. Adding 1sec delay to the poll_work is enough to work around the issue in Optimus setups and gives shorter response on handling the initial delayed event. Fixes: 339fd36238dd ("drm: drm_probe_helper: Fix output_poll_work scheduling") Cc: stable@vger.kernel.org # v4.9 Signed-off-by: Peter Ujfalusi [danvet: Add FIXME to the comment to make it stick out more.] Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170109143158.21917-1-peter.ujfalusi@ti.com --- drivers/gpu/drm/drm_probe_helper.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index ac953f037be7..cf8f0128c161 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -143,8 +143,18 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev) } if (dev->mode_config.delayed_event) { + /* + * FIXME: + * + * Use short (1s) delay to handle the initial delayed event. + * This delay should not be needed, but Optimus/nouveau will + * fail in a mysterious way if the delayed event is handled as + * soon as possible like it is done in + * drm_helper_probe_single_connector_modes() in case the poll + * was enabled before. + */ poll = true; - delay = 0; + delay = HZ; } if (poll) -- cgit From 71d3f6ef7f5af38dea2975ec5715c88bae92e92d Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 28 Nov 2016 08:52:20 +0100 Subject: drm/virtio: fix framebuffer sparse warning virtio uses normal ram as backing storage for the framebuffer, so we should assign the address to new screen_buffer (added by commit 17a7b0b4d9749f80d365d7baff5dec2f54b0e992) instead of screen_base. Reported-by: Michael S. Tsirkin Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c index dd21f950e129..cde9f3758106 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fb.c +++ b/drivers/gpu/drm/virtio/virtgpu_fb.c @@ -331,7 +331,7 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper, info->fbops = &virtio_gpufb_ops; info->pixmap.flags = FB_PIXMAP_SYSTEM; - info->screen_base = obj->vmap; + info->screen_buffer = obj->vmap; info->screen_size = obj->gem_base.size; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth); drm_fb_helper_fill_var(info, &vfbdev->helper, -- cgit From 0c19f97f12bbb1c2370cb62e31d0f749642937ee Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 21 Nov 2016 19:00:30 +0100 Subject: drm: update MAINTAINERS for qemu drivers (bochs, cirrus, qxl, virtio-gpu) Changes: * add myself as maintainer, so patches land in my inbox. * add virtualization@lists.linux-foundation.org mailing list. * add drm-qemu git repo. * flip bochs and qxl status to "Maintained". Signed-off-by: Gerd Hoffmann --- MAINTAINERS | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5f0420a0da5b..15eadc8c58c9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4106,11 +4106,16 @@ F: drivers/gpu/drm/bridge/ DRM DRIVER FOR BOCHS VIRTUAL GPU M: Gerd Hoffmann -S: Odd Fixes +L: virtualization@lists.linux-foundation.org +T: git git://git.kraxel.org/linux drm-qemu +S: Maintained F: drivers/gpu/drm/bochs/ DRM DRIVER FOR QEMU'S CIRRUS DEVICE M: Dave Airlie +M: Gerd Hoffmann +L: virtualization@lists.linux-foundation.org +T: git git://git.kraxel.org/linux drm-qemu S: Odd Fixes F: drivers/gpu/drm/cirrus/ @@ -4304,7 +4309,10 @@ F: Documentation/devicetree/bindings/display/renesas,du.txt DRM DRIVER FOR QXL VIRTUAL GPU M: Dave Airlie -S: Odd Fixes +M: Gerd Hoffmann +L: virtualization@lists.linux-foundation.org +T: git git://git.kraxel.org/linux drm-qemu +S: Maintained F: drivers/gpu/drm/qxl/ F: include/uapi/drm/qxl_drm.h @@ -13085,6 +13093,7 @@ M: David Airlie M: Gerd Hoffmann L: dri-devel@lists.freedesktop.org L: virtualization@lists.linux-foundation.org +T: git git://git.kraxel.org/linux drm-qemu S: Maintained F: drivers/gpu/drm/virtio/ F: include/uapi/linux/virtio_gpu.h -- cgit From af3076e67c31ceb3e314933dd61cb68a1d5120cf Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 21 Nov 2016 19:32:06 +0100 Subject: drm: flip cirrus driver status to "obsolete". Also update Kconfig help text, explaining things: Cirrus is obsolete, the hardware was designed in the 90ies and can't keep up with todays needs. More background: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ Better alternatives are: - stdvga (DRM_BOCHS, qemu -vga std, default in qemu 2.2+) - qxl (DRM_QXL, qemu -vga qxl, works best with spice) - virtio (VIRTIO_GPU), qemu -vga virtio) Signed-off-by: Gerd Hoffmann --- MAINTAINERS | 3 ++- drivers/gpu/drm/cirrus/Kconfig | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 15eadc8c58c9..b8e2bbe621a3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4116,7 +4116,8 @@ M: Dave Airlie M: Gerd Hoffmann L: virtualization@lists.linux-foundation.org T: git git://git.kraxel.org/linux drm-qemu -S: Odd Fixes +S: Obsolete +W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ F: drivers/gpu/drm/cirrus/ RADEON and AMDGPU DRM DRIVERS diff --git a/drivers/gpu/drm/cirrus/Kconfig b/drivers/gpu/drm/cirrus/Kconfig index 04b3c161dfae..7f4cc6e172ab 100644 --- a/drivers/gpu/drm/cirrus/Kconfig +++ b/drivers/gpu/drm/cirrus/Kconfig @@ -7,3 +7,12 @@ config DRM_CIRRUS_QEMU This is a KMS driver for emulated cirrus device in qemu. It is *NOT* intended for real cirrus devices. This requires the modesetting userspace X.org driver. + + Cirrus is obsolete, the hardware was designed in the 90ies + and can't keep up with todays needs. More background: + https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ + + Better alternatives are: + - stdvga (DRM_BOCHS, qemu -vga std, default in qemu 2.2+) + - qxl (DRM_QXL, qemu -vga qxl, works best with spice) + - virtio (DRM_VIRTIO_GPU), qemu -vga virtio) -- cgit From 4fe7c2962e110dfd58e61888514726aac419562f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 22 Dec 2016 07:04:59 -0800 Subject: iw_cxgb4: refactor sq/rq drain logic With the addition of the IB/Core drain API, iw_cxgb4 supported drain by watching the CQs when the QP was out of RTS and signalling "drain complete" when the last CQE is polled. This, however, doesn't fully support the drain semantics. Namely, the drain logic is supposed to signal "drain complete" only when the application has _processed_ the last CQE, not just removed them from the CQ. Thus a small timing hole exists that can cause touch after free type bugs in applications using the drain API (nvmf, iSER, for example). So iw_cxgb4 needs a better solution. The iWARP Verbs spec mandates that "_at some point_ after the QP is moved to ERROR", the iWARP driver MUST synchronously fail post_send and post_recv calls. iw_cxgb4 was currently not allowing any posts once the QP is in ERROR. This was in part due to the fact that the HW queues for the QP in ERROR state are disabled at this point, so there wasn't much else to do but fail the post operation synchronously. This restriction is what drove the first drain implementation in iw_cxgb4 that has the above mentioned flaw. This patch changes iw_cxgb4 to allow post_send and post_recv WRs after the QP is moved to ERROR state for kernel mode users, thus still adhering to the Verbs spec for user mode users, but allowing flush WRs for kernel users. Since the HW queues are disabled, we just synthesize a CQE for this post, queue it to the SW CQ, and then call the CQ event handler. This enables proper drain operations for the various storage applications. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 21 ++++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 6 +- drivers/infiniband/hw/cxgb4/provider.c | 2 - drivers/infiniband/hw/cxgb4/qp.c | 112 ++++++++++++++++++++------------- drivers/infiniband/hw/cxgb4/t4.h | 2 + 5 files changed, 85 insertions(+), 58 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 19c6477af19f..bec82a600d77 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -504,6 +504,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, goto skip_cqe; } + /* + * Special cqe for drain WR completions... + */ + if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { + *cookie = CQE_DRAIN_COOKIE(hw_cqe); + *cqe = *hw_cqe; + goto skip_cqe; + } + /* * Gotta tweak READ completions: * 1) the cqe doesn't contain the sq_wptr from the wr. @@ -753,6 +762,9 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) c4iw_invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; + case C4IW_DRAIN_OPCODE: + wc->opcode = IB_WC_SEND; + break; default: printk(KERN_ERR MOD "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", @@ -817,15 +829,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) { - if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) { - if (t4_sq_empty(wq)) - complete(&qhp->sq_drained); - if (t4_rq_empty(wq)) - complete(&qhp->rq_drained); - } + if (wq) spin_unlock(&qhp->lock); - } return ret; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4788e1a46fde..7b1e465b2a5e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -480,8 +480,6 @@ struct c4iw_qp { wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; - struct completion rq_drained; - struct completion sq_drained; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -615,6 +613,8 @@ static inline int to_ib_qp_state(int c4iw_qp_state) return IB_QPS_ERR; } +#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN + static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | @@ -997,8 +997,6 @@ extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; -void c4iw_drain_rq(struct ib_qp *qp); -void c4iw_drain_sq(struct ib_qp *qp); void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 49b51b7e0fd7..c156413515b1 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -607,8 +607,6 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.get_dev_fw_str = get_dev_fw_str; - dev->ibdev.drain_sq = c4iw_drain_sq; - dev->ibdev.drain_rq = c4iw_drain_rq; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cda5542e13a2..31ab4512f827 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -776,6 +776,64 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; } +static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *schp; + unsigned long flag; + struct t4_cq *cq; + + schp = to_c4iw_cq(qhp->ibqp.send_cq); + cq = &schp->cq; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_TYPE_V(1) | + CQE_SWCQE_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&schp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&schp->lock, flag); + + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); +} + +static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + struct t4_cqe cqe = {}; + struct c4iw_cq *rchp; + unsigned long flag; + struct t4_cq *cq; + + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + cq = &rchp->cq; + + cqe.u.drain_cookie = wr->wr_id; + cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | + CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_TYPE_V(0) | + CQE_SWCQE_V(1) | + CQE_QPID_V(qhp->wq.sq.qid)); + + spin_lock_irqsave(&rchp->lock, flag); + cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); + spin_unlock_irqrestore(&rchp->lock, flag); + + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -794,8 +852,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); - *bad_wr = wr; - return -EINVAL; + complete_sq_drain_wr(qhp, wr); + return err; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { @@ -937,8 +995,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); - *bad_wr = wr; - return -EINVAL; + complete_rq_drain_wr(qhp, wr); + return err; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { @@ -1550,7 +1608,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_CLOSING: - if (!internal) { + + /* + * Allow kernel users to move to ERROR for qp draining. + */ + if (!internal && (qhp->ibqp.uobject || attrs->next_state != + C4IW_QP_STATE_ERROR)) { ret = -EINVAL; goto out; } @@ -1763,8 +1826,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); - init_completion(&qhp->sq_drained); - init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); kref_init(&qhp->kref); @@ -1958,40 +2019,3 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } - -static void move_qp_to_err(struct c4iw_qp *qp) -{ - struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR }; - - (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); -} - -void c4iw_drain_sq(struct ib_qp *ibqp) -{ - struct c4iw_qp *qp = to_c4iw_qp(ibqp); - unsigned long flag; - bool need_to_wait; - - move_qp_to_err(qp); - spin_lock_irqsave(&qp->lock, flag); - need_to_wait = !t4_sq_empty(&qp->wq); - spin_unlock_irqrestore(&qp->lock, flag); - - if (need_to_wait) - wait_for_completion(&qp->sq_drained); -} - -void c4iw_drain_rq(struct ib_qp *ibqp) -{ - struct c4iw_qp *qp = to_c4iw_qp(ibqp); - unsigned long flag; - bool need_to_wait; - - move_qp_to_err(qp); - spin_lock_irqsave(&qp->lock, flag); - need_to_wait = !t4_rq_empty(&qp->wq); - spin_unlock_irqrestore(&qp->lock, flag); - - if (need_to_wait) - wait_for_completion(&qp->rq_drained); -} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 862381aa83c8..640d22148a3e 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -179,6 +179,7 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + u64 drain_cookie; } u; __be64 reserved; __be64 bits_type_ts; @@ -238,6 +239,7 @@ struct t4_cqe { /* generic accessor macros */ #define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) #define CQE_WRID_LOW(x) (be32_to_cpu((x)->u.gen.wrid_low)) +#define CQE_DRAIN_COOKIE(x) ((x)->u.drain_cookie) /* macros for flit 3 of the cqe */ #define CQE_GENBIT_S 63 -- cgit From c12a67fec8d99bb554e8d4e99120d418f1a39c87 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 22 Dec 2016 07:40:36 -0800 Subject: iw_cxgb4: free EQ queue memory on last deref Commit ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") introduced a bug where the RDMA QP EQ queue memory (and QIDs) are possibly freed before the underlying connection has been fully shutdown. The result being a possible DMA read issued by HW after the queue memory has been unmapped and freed. This results in possible WR corruption in the worst case, system bus errors if an IOMMU is in use, and SGE "bad WR" errors reported in the very least. The fix is to defer unmap/free of queue memory and QID resources until the QP struct has been fully dereferenced. To do this, the c4iw_ucontext must also be kept around until the last QP that references it is fully freed. In addition, since the last QP deref can happen in an IRQ disabled context, we need a new workqueue thread to do the final unmap/free of the EQ queue memory. Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 9 +++++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 +++++++++++++++++ drivers/infiniband/hw/cxgb4/provider.c | 20 +++++++++++++++---- drivers/infiniband/hw/cxgb4/qp.c | 35 +++++++++++++++++++++++++--------- 4 files changed, 69 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 516b0ae6dc3f..40c0e7b9fc6e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -846,9 +846,17 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } } + rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free"); + if (!rdev->free_workq) { + err = -ENOMEM; + goto err_free_status_page; + } + rdev->status_page->db_off = 0; return 0; +err_free_status_page: + free_page((unsigned long)rdev->status_page); destroy_ocqp_pool: c4iw_ocqp_pool_destroy(rdev); destroy_rqtpool: @@ -862,6 +870,7 @@ destroy_resource: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7b1e465b2a5e..8cd4d054a87e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -107,6 +108,7 @@ struct c4iw_dev_ucontext { struct list_head qpids; struct list_head cqids; struct mutex lock; + struct kref kref; }; enum c4iw_rdev_flags { @@ -183,6 +185,7 @@ struct c4iw_rdev { atomic_t wr_log_idx; struct wr_log_entry *wr_log; int wr_log_size; + struct workqueue_struct *free_workq; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -480,6 +483,8 @@ struct c4iw_qp { wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; + struct work_struct free_work; + struct c4iw_ucontext *ucontext; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -493,6 +498,7 @@ struct c4iw_ucontext { u32 key; spinlock_t mmap_lock; struct list_head mmaps; + struct kref kref; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -500,6 +506,18 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) return container_of(c, struct c4iw_ucontext, ibucontext); } +void _c4iw_free_ucontext(struct kref *kref); + +static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_put(&ucontext->kref, _c4iw_free_ucontext); +} + +static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_get(&ucontext->kref); +} + struct c4iw_mm_entry { struct list_head entry; u64 addr; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index c156413515b1..fa64f5d93b11 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -93,17 +93,28 @@ static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, return -ENOSYS; } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +void _c4iw_free_ucontext(struct kref *kref) { - struct c4iw_dev *rhp = to_c4iw_dev(context->device); - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_ucontext *ucontext; + struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - PDBG("%s context %p\n", __func__, context); + ucontext = container_of(kref, struct c4iw_ucontext, kref); + rhp = to_c4iw_dev(ucontext->ibucontext.device); + + PDBG("%s ucontext %p\n", __func__, ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); kfree(ucontext); +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + + PDBG("%s context %p\n", __func__, context); + c4iw_put_ucontext(ucontext); return 0; } @@ -127,6 +138,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + kref_init(&context->kref); if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 31ab4512f827..04c1c382dedb 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -715,13 +715,32 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } -static void _free_qp(struct kref *kref) +static void free_qp_work(struct work_struct *work) +{ + struct c4iw_ucontext *ucontext; + struct c4iw_qp *qhp; + struct c4iw_dev *rhp; + + qhp = container_of(work, struct c4iw_qp, free_work); + ucontext = qhp->ucontext; + rhp = qhp->rhp; + + PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext); + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + if (ucontext) + c4iw_put_ucontext(ucontext); + kfree(qhp); +} + +static void queue_qp_free(struct kref *kref) { struct c4iw_qp *qhp; qhp = container_of(kref, struct c4iw_qp, kref); PDBG("%s qhp %p\n", __func__, qhp); - kfree(qhp); + queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); } void c4iw_qp_add_ref(struct ib_qp *qp) @@ -733,7 +752,7 @@ void c4iw_qp_add_ref(struct ib_qp *qp) void c4iw_qp_rem_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - kref_put(&to_c4iw_qp(qp)->kref, _free_qp); + kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1706,7 +1725,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; - struct c4iw_ucontext *ucontext; qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; @@ -1726,11 +1744,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) spin_unlock_irq(&rhp->lock); free_ird(rhp, qhp->attr.max_ird); - ucontext = ib_qp->uobject ? - to_c4iw_ucontext(ib_qp->uobject->context) : NULL; - destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - c4iw_qp_rem_ref(ib_qp); PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); @@ -1829,6 +1842,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); kref_init(&qhp->kref); + INIT_WORK(&qhp->free_work, free_qp_work); ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) @@ -1915,6 +1929,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ma_sync_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, ma_sync_key_mm); } + + c4iw_get_ucontext(ucontext); + qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); -- cgit From 3bcf96e0183f5c863657cb6ae9adad307a0f6071 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 22 Dec 2016 07:40:37 -0800 Subject: iw_cxgb4: do not send RX_DATA_ACK CPLs after close/abort Function rx_data(), which handles ingress CPL_RX_DATA messages, was always sending an RX_DATA_ACK with the goal of updating the credits. However, if the RDMA connection is moved out of FPDU mode abruptly, then it is possible for iw_cxgb4 to process queued RX_DATA CPLs after HW has aborted the connection. These CPLs should not trigger RX_DATA_ACKS. If they do, HW can see a READ after DELETE of the DB_LE hash entry for the tid and post a LE_DB HashTblMemCrcError. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index f1510cc76d2d..9398143d7c5e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1804,20 +1804,21 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) skb_trim(skb, dlen); mutex_lock(&ep->com.mutex); - /* update RX credits */ - update_rx_credits(ep, dlen); - switch (ep->com.state) { case MPA_REQ_SENT: + update_rx_credits(ep, dlen); ep->rcv_seq += dlen; disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + update_rx_credits(ep, dlen); ep->rcv_seq += dlen; disconnect = process_mpa_request(ep, skb); break; case FPDU_MODE: { struct c4iw_qp_attributes attrs; + + update_rx_credits(ep, dlen); BUG_ON(!ep->com.qp); if (status) pr_err("%s Unexpected streaming data." \ -- cgit From 3546fb0cdac25a79c89d87020566fab52b92867d Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 12 Dec 2016 16:15:17 +0100 Subject: drm/etnaviv: trick drm_mm into giving out a low IOVA After rollover of the IOVA space, we want to get a low IOVA address, otherwise the the games we play by remembering the last IOVA are pointless. When we search for a free hole with DRM_MM_SEARCH_DEFAULT, drm_mm will pop the next entry from the free holes stack, which will likely be a high IOVA. By using DRM_MM_SEARCH_BELOW we can trick drm_mm into reversing the search and provide us with a low IOVA. Signed-off-by: Lucas Stach Reviewed-by: Wladimir van der Laan --- drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 169ac96e8f08..fe0e85b41310 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -116,9 +116,14 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, struct list_head list; bool found; + /* + * XXX: The DRM_MM_SEARCH_BELOW is really a hack to trick + * drm_mm into giving out a low IOVA after address space + * rollover. This needs a proper fix. + */ ret = drm_mm_insert_node_in_range(&mmu->mm, node, size, 0, mmu->last_iova, ~0UL, - DRM_MM_SEARCH_DEFAULT); + mmu->last_iova ? DRM_MM_SEARCH_DEFAULT : DRM_MM_SEARCH_BELOW); if (ret != -ENOSPC) break; -- cgit From ecc8995363ee6231b32dad61c955b371b79cc4cf Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 10 Jan 2017 17:31:56 +0300 Subject: pinctrl: broxton: Use correct PADCFGLOCK offset PADCFGLOCK (and PADCFGLOCK_TX) offset in Broxton actually starts at 0x060 and not 0x090 as used in the driver. Fix it to use the correct offset. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-broxton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-broxton.c b/drivers/pinctrl/intel/pinctrl-broxton.c index 59cb7a6fc5be..901b356b09d7 100644 --- a/drivers/pinctrl/intel/pinctrl-broxton.c +++ b/drivers/pinctrl/intel/pinctrl-broxton.c @@ -19,7 +19,7 @@ #define BXT_PAD_OWN 0x020 #define BXT_HOSTSW_OWN 0x080 -#define BXT_PADCFGLOCK 0x090 +#define BXT_PADCFGLOCK 0x060 #define BXT_GPI_IE 0x110 #define BXT_COMMUNITY(s, e) \ -- cgit From 17fab473693e8357a9aa6fee4fbed6c13a34bd81 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 14:07:22 +0200 Subject: pinctrl: intel: Set pin direction properly There are two bits in the PADCFG0 register to configure direction, one per TX/RX buffers. For now we wrongly assume that the GPIO is always requested before it is being used, which is not true when the GPIO is used through irqchip. In this case the GPIO is never requested and we never enable RX buffer for it. Fix this by setting both bits accordingly. Reported-by: Jarkko Nikula Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-intel.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 1e139672f1af..6df35dcb29ae 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -353,6 +353,21 @@ static int intel_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned function, return 0; } +static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) +{ + u32 value; + + value = readl(padcfg0); + if (input) { + value &= ~PADCFG0_GPIORXDIS; + value |= PADCFG0_GPIOTXDIS; + } else { + value &= ~PADCFG0_GPIOTXDIS; + value |= PADCFG0_GPIORXDIS; + } + writel(value, padcfg0); +} + static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -375,11 +390,11 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, /* Disable SCI/SMI/NMI generation */ value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - /* Disable TX buffer and enable RX (this will be input) */ - value &= ~PADCFG0_GPIORXDIS; - value |= PADCFG0_GPIOTXDIS; writel(value, padcfg0); + /* Disable TX buffer and enable RX (this will be input) */ + __intel_gpio_set_direction(padcfg0, true); + raw_spin_unlock_irqrestore(&pctrl->lock, flags); return 0; @@ -392,18 +407,11 @@ static int intel_gpio_set_direction(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; - u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - - value = readl(padcfg0); - if (input) - value |= PADCFG0_GPIOTXDIS; - else - value &= ~PADCFG0_GPIOTXDIS; - writel(value, padcfg0); + __intel_gpio_set_direction(padcfg0, input); raw_spin_unlock_irqrestore(&pctrl->lock, flags); -- cgit From 04ff5a095d662e0879f0eb04b9247e092210aeff Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Jan 2017 16:38:52 +0200 Subject: pinctrl: baytrail: Rectify debounce support The commit 658b476c742f ("pinctrl: baytrail: Add debounce configuration") implements debounce for Baytrail pin control, but seems wasn't tested properly. The register which keeps debounce value is separated from the configuration one. Writing wrong values to the latter will guarantee wrong behaviour of the driver and even might break something physically. Besides above there is missed case how to disable it, which is actually done through the bit in configuration register. Rectify implementation here by using proper register for debounce value. Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration") Cc: Cristina Ciocan Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 37300634b7d2..67e92699b84e 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1092,6 +1092,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, enum pin_config_param param = pinconf_to_config_param(*config); void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, pull, val, debounce; u16 arg = 0; @@ -1128,7 +1129,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, return -EINVAL; raw_spin_lock_irqsave(&vg->lock, flags); - debounce = readl(byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG)); + debounce = readl(db_reg); raw_spin_unlock_irqrestore(&vg->lock, flags); switch (debounce & BYT_DEBOUNCE_PULSE_MASK) { @@ -1176,6 +1177,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, unsigned int param, arg; void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, val, debounce; int i, ret = 0; @@ -1238,36 +1240,40 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, break; case PIN_CONFIG_INPUT_DEBOUNCE: - debounce = readl(byt_gpio_reg(vg, offset, - BYT_DEBOUNCE_REG)); - conf &= ~BYT_DEBOUNCE_PULSE_MASK; + debounce = readl(db_reg); + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; switch (arg) { + case 0: + conf &= BYT_DEBOUNCE_EN; + break; case 375: - conf |= BYT_DEBOUNCE_PULSE_375US; + debounce |= BYT_DEBOUNCE_PULSE_375US; break; case 750: - conf |= BYT_DEBOUNCE_PULSE_750US; + debounce |= BYT_DEBOUNCE_PULSE_750US; break; case 1500: - conf |= BYT_DEBOUNCE_PULSE_1500US; + debounce |= BYT_DEBOUNCE_PULSE_1500US; break; case 3000: - conf |= BYT_DEBOUNCE_PULSE_3MS; + debounce |= BYT_DEBOUNCE_PULSE_3MS; break; case 6000: - conf |= BYT_DEBOUNCE_PULSE_6MS; + debounce |= BYT_DEBOUNCE_PULSE_6MS; break; case 12000: - conf |= BYT_DEBOUNCE_PULSE_12MS; + debounce |= BYT_DEBOUNCE_PULSE_12MS; break; case 24000: - conf |= BYT_DEBOUNCE_PULSE_24MS; + debounce |= BYT_DEBOUNCE_PULSE_24MS; break; default: ret = -EINVAL; } + if (!ret) + writel(debounce, db_reg); break; default: ret = -ENOTSUPP; -- cgit From fdf35a6b22247746a7053fc764d04218a9306f82 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Jan 2017 15:56:14 +0100 Subject: drm: Fix broken VT switch with video=1366x768 option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed that the VT switch doesn't work any longer with a Dell laptop with 1366x768 eDP when the machine is connected with a DP monitor. It behaves as if VT were switched, but the graphics remain frozen. Actually the keyboard works, so I could switch back to VT7 again. I tried to track down the problem, and encountered a long story until we reach to this error: - The machine is booted with video=1366x768 option (the distro installer seems to add it as default). - Recently, drm_helper_probe_single_connector_modes() deals with cmdline modes, and it tries to create a new mode when no matching mode is found. - The drm_mode_create_from_cmdline_mode() creates a mode based on either CVT of GFT according to the given cmdline mode; in our case, it's 1366x768. - Since both CVT and GFT can't express the width 1366 due to alignment, the resultant mode becomes 1368x768, slightly larger than the given size. - Later on, the atomic commit is performed, and in drm_atomic_check_only(), the size of each plane is checked. - The size check of 1366x768 fails due to the above, and eventually the whole VT switch fails. Back in the history, we've had a manual fix-up of 1368x768 in various places via c09dedb7a50e ("drm/edid: Add a workaround for 1366x768 HD panel"), but they have been all in drm_edid.c at probing the modes from EDID. For addressing the problem above, we need a similar hack to the mode newly created from cmdline, manually adjusting the width when the expected size is 1366 while we get 1368 instead. Fixes: eaf99c749d43 ("drm: Perform cmdline mode parsing during...") Cc: Signed-off-by: Takashi Iwai Link: http://patchwork.freedesktop.org/patch/msgid/20170109145614.29454-1-tiwai@suse.de Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/drm_modes.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index ac6a35212501..e6b19bc9021a 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1460,6 +1460,13 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev, return NULL; mode->type |= DRM_MODE_TYPE_USERDEF; + /* fix up 1368x768: GFT/CVT can't express 1366 width due to alignment */ + if (cmd->xres == 1366 && mode->hdisplay == 1368) { + mode->hdisplay = 1366; + mode->hsync_start--; + mode->hsync_end--; + drm_mode_set_name(mode); + } drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); return mode; } -- cgit From 49c03096263871a68c9dea3e86b7d1e163d2fba8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Jan 2017 15:49:00 +0200 Subject: pinctrl: baytrail: Do not add all GPIOs to IRQ domain When DIRECT_IRQ_EN is set, the pin is routed directly to the IO-APIC bypassing the GPIO driver completely. However, the mask register is still used to determine if the pin is supposed to generate IRQ or not. So with commit 3ae02c14d964 the IRQ core masks all IRQs (because of handle_bad_irq()) the pin connected to the touchscreen gets masked as well and hence no interrupts. To make this all work as expected we do not add those GPIOs to the IRQ domain that can actually propagate interrupts. Fixes: 3ae02c14d964 ("pinctrl: intel: set default handler to be handle_bad_irq()") Reported-by: Robert R. Howell Suggested-by: Mika Westerberg Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 67e92699b84e..c123488266ce 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1623,6 +1623,8 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) static void byt_gpio_irq_init_hw(struct byt_gpio *vg) { + struct gpio_chip *gc = &vg->chip; + struct device *dev = &vg->pdev->dev; void __iomem *reg; u32 base, value; int i; @@ -1644,10 +1646,12 @@ static void byt_gpio_irq_init_hw(struct byt_gpio *vg) } value = readl(reg); - if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i) && - !(value & BYT_DIRECT_IRQ_EN)) { + if (value & BYT_DIRECT_IRQ_EN) { + clear_bit(i, gc->irq_valid_mask); + dev_dbg(dev, "excluding GPIO %d from IRQ domain\n", i); + } else if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i)) { byt_gpio_clear_triggering(vg, i); - dev_dbg(&vg->pdev->dev, "disabling GPIO %d\n", i); + dev_dbg(dev, "disabling GPIO %d\n", i); } } @@ -1686,6 +1690,7 @@ static int byt_gpio_probe(struct byt_gpio *vg) gc->can_sleep = false; gc->parent = &vg->pdev->dev; gc->ngpio = vg->soc_data->npins; + gc->irq_need_valid_mask = true; #ifdef CONFIG_PM_SLEEP vg->saved_context = devm_kcalloc(&vg->pdev->dev, gc->ngpio, -- cgit From 581d3c2025632f838fb08e5160dab752b3a1f527 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:36:56 +0100 Subject: pinctrl: amd: avoid maybe-uninitalized warning Since gpio_dev->hwbank_num is now a variable, the compiler cannot figure out if pin_num is initialized at all: drivers/pinctrl/pinctrl-amd.c: In function 'amd_gpio_dbg_show': drivers/pinctrl/pinctrl-amd.c:210:3: warning: 'pin_num' may be used uninitialized in this function [-Wmaybe-uninitialized] for (; i < pin_num; i++) { ^~~ drivers/pinctrl/pinctrl-amd.c:172:21: warning: 'i' may be used uninitialized in this function [-Wmaybe-uninitialized] This adds a 'default' statement to make that case well-defined. Fixes: 3bfd44306c65 ("pinctrl: amd: Add support for additional GPIO") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index c9a146948192..537b52055756 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -202,6 +202,8 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) i = 128; pin_num = AMD_GPIO_PINS_BANK2 + i; break; + default: + return; } for (; i < pin_num; i++) { -- cgit From e88893fea17996018b2d68a22e677ea04f3baadf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jan 2017 15:59:40 +0000 Subject: drm/i915: Clear ret before unbinding in i915_gem_evict_something() Missed when rebasing patches, I failed to set ret to zero before starting the unbind loop (which depends upon ret being zero). Reported-by: Matthew Auld Fixes: 9332f3b1b99a ("drm/i915: Combine loops within i915_gem_evict_something") Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170105155940.10033-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Cc: # v4.9+ (cherry picked from commit 121dfbb2a2ef1c5f49e15c38ccc47ff0beb59446) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_evict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index bd08814b015c..d534a316a16e 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -199,6 +199,7 @@ found: } /* Unbinding will emit any required flushes */ + ret = 0; while (!list_empty(&eviction_list)) { vma = list_first_entry(&eviction_list, struct i915_vma, -- cgit From e4621b73b6b472fe2b434b4f0f76b8f33ee26a73 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jan 2017 15:22:38 +0000 Subject: drm/i915: Fix phys pwrite for struct_mutex-less operation Since commit fe115628d567 ("drm/i915: Implement pwrite without struct-mutex") the lowlevel pwrite calls are now called without the protection of struct_mutex, but pwrite_phys was still asserting that it held the struct_mutex and later tried to drop and relock it. Fixes: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Link: http://patchwork.freedesktop.org/patch/msgid/20170106152240.5793-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit 10466d2a59b23aa6d5ecd5310296c8cdb6458dac) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3dd7fc662859..4b23a7814713 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -595,47 +595,21 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_device *dev = obj->base.dev; void *vaddr = obj->phys_handle->vaddr + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); - int ret; /* We manually control the domain here and pretend that it * remains coherent i.e. in the GTT domain, like shmem_pwrite. */ - lockdep_assert_held(&obj->base.dev->struct_mutex); - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); - if (ret) - return ret; - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { - unsigned long unwritten; - - /* The physical object once assigned is fixed for the lifetime - * of the obj, so we can safely drop the lock and continue - * to access vaddr. - */ - mutex_unlock(&dev->struct_mutex); - unwritten = copy_from_user(vaddr, user_data, args->size); - mutex_lock(&dev->struct_mutex); - if (unwritten) { - ret = -EFAULT; - goto out; - } - } + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; drm_clflush_virt_range(vaddr, args->size); - i915_gem_chipset_flush(to_i915(dev)); + i915_gem_chipset_flush(to_i915(obj->base.dev)); -out: intel_fb_obj_flush(obj, false, ORIGIN_CPU); - return ret; + return 0; } void *i915_gem_object_alloc(struct drm_device *dev) -- cgit From 3846fd9b86001bea171943cc3bb9222cb6da6b42 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Jan 2017 10:01:17 +0100 Subject: drm/probe-helpers: Drop locking from poll_enable It was only needed to protect the connector_list walking, see commit 8c4ccc4ab6f64e859d4ff8d7c02c2ed2e956e07f Author: Daniel Vetter Date: Thu Jul 9 23:44:26 2015 +0200 drm/probe-helper: Grab mode_config.mutex in poll_init/enable Unfortunately the commit message of that patch fails to mention that the new locking check was for the connector_list. But that requirement disappeared in commit c36a3254f7857f1ad9badbe3578ccc92be541a8e Author: Daniel Vetter Date: Thu Dec 15 16:58:43 2016 +0100 drm: Convert all helpers to drm_connector_list_iter and so we can drop this again. This fixes a locking inversion on nouveau, where the rpm code needs to re-enable. But in other places the rpm_get() calls are nested within the big modeset locks. While at it, also improve the kerneldoc for these two functions a notch. v2: Update the kerneldoc even more to explain that these functions can't be called concurrently, or bad things happen (Chris). Cc: Dave Airlie Reviewed-by: Chris Wilson Cc: Chris Wilson Tested-by: Lyude Reviewed-by: Lyude Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170111090117.5134-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_probe_helper.c | 51 ++++++++++++++---------------------- drivers/gpu/drm/i915/intel_hotplug.c | 4 +-- include/drm/drm_crtc_helper.h | 1 - 3 files changed, 22 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index cf8f0128c161..b452a7ccd84b 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -115,24 +115,27 @@ static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector) #define DRM_OUTPUT_POLL_PERIOD (10*HZ) /** - * drm_kms_helper_poll_enable_locked - re-enable output polling. + * drm_kms_helper_poll_enable - re-enable output polling. * @dev: drm_device * - * This function re-enables the output polling work without - * locking the mode_config mutex. + * This function re-enables the output polling work, after it has been + * temporarily disabled using drm_kms_helper_poll_disable(), for example over + * suspend/resume. * - * This is like drm_kms_helper_poll_enable() however it is to be - * called from a context where the mode_config mutex is locked - * already. + * Drivers can call this helper from their device resume implementation. It is + * an error to call this when the output polling support has not yet been set + * up. + * + * Note that calls to enable and disable polling must be strictly ordered, which + * is automatically the case when they're only call from suspend/resume + * callbacks. */ -void drm_kms_helper_poll_enable_locked(struct drm_device *dev) +void drm_kms_helper_poll_enable(struct drm_device *dev) { bool poll = false; struct drm_connector *connector; unsigned long delay = DRM_OUTPUT_POLL_PERIOD; - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); - if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll) return; @@ -160,7 +163,7 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev) if (poll) schedule_delayed_work(&dev->mode_config.output_poll_work, delay); } -EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked); +EXPORT_SYMBOL(drm_kms_helper_poll_enable); static enum drm_connector_status drm_connector_detect(struct drm_connector *connector, bool force) @@ -287,7 +290,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, /* Re-enable polling in case the global poll config changed. */ if (drm_kms_helper_poll != dev->mode_config.poll_running) - drm_kms_helper_poll_enable_locked(dev); + drm_kms_helper_poll_enable(dev); dev->mode_config.poll_running = drm_kms_helper_poll; @@ -479,8 +482,12 @@ out: * This function disables the output polling work. * * Drivers can call this helper from their device suspend implementation. It is - * not an error to call this even when output polling isn't enabled or arlready - * disabled. + * not an error to call this even when output polling isn't enabled or already + * disabled. Polling is re-enabled by calling drm_kms_helper_poll_enable(). + * + * Note that calls to enable and disable polling must be strictly ordered, which + * is automatically the case when they're only call from suspend/resume + * callbacks. */ void drm_kms_helper_poll_disable(struct drm_device *dev) { @@ -490,24 +497,6 @@ void drm_kms_helper_poll_disable(struct drm_device *dev) } EXPORT_SYMBOL(drm_kms_helper_poll_disable); -/** - * drm_kms_helper_poll_enable - re-enable output polling. - * @dev: drm_device - * - * This function re-enables the output polling work. - * - * Drivers can call this helper from their device resume implementation. It is - * an error to call this when the output polling support has not yet been set - * up. - */ -void drm_kms_helper_poll_enable(struct drm_device *dev) -{ - mutex_lock(&dev->mode_config.mutex); - drm_kms_helper_poll_enable_locked(dev); - mutex_unlock(&dev->mode_config.mutex); -} -EXPORT_SYMBOL(drm_kms_helper_poll_enable); - /** * drm_kms_helper_poll_init - initialize and enable output polling * @dev: drm_device diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 3d546c019de0..b62e3f8ad415 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -180,7 +180,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) /* Enable polling and queue hotplug re-enabling. */ if (hpd_disabled) { - drm_kms_helper_poll_enable_locked(dev); + drm_kms_helper_poll_enable(dev); mod_delayed_work(system_wq, &dev_priv->hotplug.reenable_work, msecs_to_jiffies(HPD_STORM_REENABLE_DELAY)); } @@ -511,7 +511,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work) } if (enabled) - drm_kms_helper_poll_enable_locked(dev); + drm_kms_helper_poll_enable(dev); mutex_unlock(&dev->mode_config.mutex); diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 982c299e435a..d026f5017c33 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -73,6 +73,5 @@ extern void drm_kms_helper_hotplug_event(struct drm_device *dev); extern void drm_kms_helper_poll_disable(struct drm_device *dev); extern void drm_kms_helper_poll_enable(struct drm_device *dev); -extern void drm_kms_helper_poll_enable_locked(struct drm_device *dev); #endif -- cgit From ef736d394e85b1bf1fd65ba5e5257b85f6c82325 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Dec 2016 16:35:50 -0500 Subject: drm/radeon/si: load special ucode for certain MC configs Special MC ucode is required for these memory configurations. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index e8a38d296855..32d55220a2d3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -115,6 +115,8 @@ MODULE_FIRMWARE("radeon/hainan_rlc.bin"); MODULE_FIRMWARE("radeon/hainan_smc.bin"); MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); +MODULE_FIRMWARE("radeon/si58_mc.bin"); + static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh); static void si_pcie_gen3_enable(struct radeon_device *rdev); static void si_program_aspm(struct radeon_device *rdev); @@ -1650,6 +1652,7 @@ static int si_init_microcode(struct radeon_device *rdev) int err; int new_fw = 0; bool new_smc = false; + bool si58_fw = false; DRM_DEBUG("\n"); @@ -1742,6 +1745,10 @@ static int si_init_microcode(struct radeon_device *rdev) default: BUG(); } + /* this memory configuration requires special firmware */ + if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) + si58_fw = true; + DRM_INFO("Loading %s Microcode\n", new_chip_name); snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name); @@ -1845,7 +1852,10 @@ static int si_init_microcode(struct radeon_device *rdev) } } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name); + if (si58_fw) + snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); + else + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name); err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); if (err) { snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name); -- cgit From f1d877be65d36806c581c32b4687d4acefa55960 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Dec 2016 16:40:48 -0500 Subject: drm/amdgpu/si: load special ucode for certain MC configs Special MC ucode is required for these memory configurations. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 45a573e63d4a..920960089f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -44,6 +44,7 @@ MODULE_FIRMWARE("radeon/tahiti_mc.bin"); MODULE_FIRMWARE("radeon/pitcairn_mc.bin"); MODULE_FIRMWARE("radeon/verde_mc.bin"); MODULE_FIRMWARE("radeon/oland_mc.bin"); +MODULE_FIRMWARE("radeon/si58_mc.bin"); #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 @@ -113,6 +114,7 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) const char *chip_name; char fw_name[30]; int err; + bool is_58_fw = false; DRM_DEBUG("\n"); @@ -135,7 +137,14 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + /* this memory configuration requires special firmware */ + if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) + is_58_fw = true; + + if (is_58_fw) + snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); + else + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); err = request_firmware(&adev->mc.fw, fw_name, adev->dev); if (err) goto out; -- cgit From 89d5595a6f53eba4d274c1d577d649db47620601 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 13:04:47 -0500 Subject: drm/amdgpu: drop oland quirks Fixed by the new 58 MC firmware. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 10bedfac27b8..34cb7ea17caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3487,17 +3487,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6817) || (adev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (adev->asic_type == CHIP_OLAND) { - if ((adev->pdev->revision == 0xC7) || - (adev->pdev->revision == 0x80) || - (adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (adev->asic_type == CHIP_HAINAN) { if ((adev->pdev->revision == 0x81) || (adev->pdev->revision == 0x83) || -- cgit From 5cc6f520ace3aa0086747e08417c2627374af1d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 13:05:52 -0500 Subject: drm/amdgpu: drop the mclk quirk for hainan fixed by the new 58 mc firmware. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 34cb7ea17caf..e9242d6fbf84 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3495,7 +3495,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6665) || (adev->pdev->device == 0x6667)) { max_sclk = 75000; - max_mclk = 80000; } } /* Apply dpm quirks */ -- cgit From 3a69adfe5617ceba04ad3cff0f9ccad470503fb2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 13:07:14 -0500 Subject: drm/radeon: drop oland quirks Fixed by the new 58 MC firmware. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 13ba73fd9b68..668060fa68c1 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3008,17 +3008,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6817) || (rdev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (rdev->family == CHIP_OLAND) { - if ((rdev->pdev->revision == 0xC7) || - (rdev->pdev->revision == 0x80) || - (rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6604) || - (rdev->pdev->device == 0x6605)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (rdev->family == CHIP_HAINAN) { if ((rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0x83) || -- cgit From a628392cf03e0eef21b345afbb192cbade041741 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 13:08:01 -0500 Subject: drm/radeon: drop the mclk quirk for hainan fixed by the new 58 mc firmware. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 668060fa68c1..2944916f7102 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3016,7 +3016,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6665) || (rdev->pdev->device == 0x6667)) { max_sclk = 75000; - max_mclk = 80000; } } /* Apply dpm quirks */ -- cgit From 3731d12dce83d47b357753ffc450ce03f1b49688 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Jan 2017 19:26:49 +0800 Subject: drm/amd/powerplay: fix vce cg logic error on CZ/St. can fix Bug 191281: vce ib test failed. when vce idle, set vce clock gate, so the clock in vce domain will be disabled. when need to encode, disable vce clock gate, enable the clocks to vce engine. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c index b0c63c5f54c9..6bb79c94cb9f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c @@ -200,7 +200,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) cgs_set_clockgating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); + AMD_CG_STATE_GATE); cgs_set_powergating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, @@ -218,7 +218,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) cgs_set_clockgating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); + AMD_PG_STATE_UNGATE); cz_dpm_update_vce_dpm(hwmgr); cz_enable_disable_vce_dpm(hwmgr, true); return 0; -- cgit From a844764751275e0e5d381958e3c7e6e0fe739e25 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Tue, 10 Jan 2017 10:50:30 +0800 Subject: drm/amdgpu: fix vm_fault_stop on gfx6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Flora Cui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 920960089f27..e2b0b1646f99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -472,19 +472,11 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_CONTEXT1_CNTL, VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK | (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) | - ((amdgpu_vm_block_size - 9) << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT) | - VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK | - VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK | - VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK | - VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK | - VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_DEFAULT_MASK | - VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK); + ((amdgpu_vm_block_size - 9) << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT)); + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + gmc_v6_0_set_fault_enable_default(adev, false); + else + gmc_v6_0_set_fault_enable_default(adev, true); gmc_v6_0_gart_flush_gpu_tlb(adev, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", @@ -763,7 +755,10 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); + else + return 0; } static int gmc_v6_0_sw_init(void *handle) -- cgit From ab8db87b8256e13a62f10af1d32f5fc233c398cc Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Jan 2017 15:47:50 +0800 Subject: drm/amd/powerplay: refine vce dpm update code on Cz. Program HardMin based on the vce_arbiter.ecclk if ecclk is 0, disable ECLK DPM 0. Otherwise VCE could hang if switching SCLK from DPM 0 to 6/7 Signed-off-by: Rex Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 4b14f259a147..0fb4e8c8f5e1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -1402,14 +1402,22 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr) cz_hwmgr->vce_dpm.hard_min_clk, PPSMC_MSG_SetEclkHardMin)); } else { - /*EPR# 419220 -HW limitation to to */ - cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; - smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, - PPSMC_MSG_SetEclkHardMin, - cz_get_eclk_level(hwmgr, - cz_hwmgr->vce_dpm.hard_min_clk, - PPSMC_MSG_SetEclkHardMin)); - + /*Program HardMin based on the vce_arbiter.ecclk */ + if (hwmgr->vce_arbiter.ecclk == 0) { + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetEclkHardMin, 0); + /* disable ECLK DPM 0. Otherwise VCE could hang if + * switching SCLK from DPM 0 to 6/7 */ + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetEclkSoftMin, 1); + } else { + cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetEclkHardMin, + cz_get_eclk_level(hwmgr, + cz_hwmgr->vce_dpm.hard_min_clk, + PPSMC_MSG_SetEclkHardMin)); + } } return 0; } -- cgit From d22a48bf7302ef064295749fa79cd47093c5a000 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:15:56 +0800 Subject: drm/i915/gvt: introudce intel_vgpu_reset_resource() to reset vgpu resource state This patch introudces a new function intel_vgpu_reset_resource() to reset allocated vgpu resources by intel_vgpu_alloc_resource(). So far we only need clear the fence registers. The function _clear_vgpu_fence() will reset both virtual and physical fence registers to 0. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 29 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/gvt/gvt.h | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 65200313515c..f7bce8603958 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -158,6 +158,14 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu, POSTING_READ(fence_reg_lo); } +static void _clear_vgpu_fence(struct intel_vgpu *vgpu) +{ + int i; + + for (i = 0; i < vgpu_fence_sz(vgpu); i++) + intel_vgpu_write_fence(vgpu, i, 0); +} + static void free_vgpu_fence(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; @@ -171,9 +179,9 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); + _clear_vgpu_fence(vgpu); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; - intel_vgpu_write_fence(vgpu, i, 0); list_add_tail(®->link, &dev_priv->mm.fence_list); } @@ -201,13 +209,14 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) continue; list_del(pos); vgpu->fence.regs[i] = reg; - intel_vgpu_write_fence(vgpu, i, 0); if (++i == vgpu_fence_sz(vgpu)) break; } if (i != vgpu_fence_sz(vgpu)) goto out_free_fence; + _clear_vgpu_fence(vgpu); + mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put(dev_priv); return 0; @@ -306,6 +315,22 @@ void intel_vgpu_free_resource(struct intel_vgpu *vgpu) free_resource(vgpu); } +/** + * intel_vgpu_reset_resource - reset resource state owned by a vGPU + * @vgpu: a vGPU + * + * This function is used to reset resource state owned by a vGPU. + * + */ +void intel_vgpu_reset_resource(struct intel_vgpu *vgpu) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + intel_runtime_pm_get(dev_priv); + _clear_vgpu_fence(vgpu); + intel_runtime_pm_put(dev_priv); +} + /** * intel_alloc_vgpu_resource - allocate HW resource for a vGPU * @vgpu: vGPU diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 0af17016f33f..455281d07d75 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -323,6 +323,7 @@ struct intel_vgpu_creation_params { int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu, struct intel_vgpu_creation_params *param); +void intel_vgpu_reset_resource(struct intel_vgpu *vgpu); void intel_vgpu_free_resource(struct intel_vgpu *vgpu); void intel_vgpu_write_fence(struct intel_vgpu *vgpu, u32 fence, u64 value); -- cgit From b611581b375ce28536ab50be9cd507bb6092fb1e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:15:57 +0800 Subject: drm/i915/gvt: introuduce intel_vgpu_reset_gtt() to reset gtt This patch introduces a new function intel_vgpu_reset_gtt() to reset the all GTT related status, including GGTT, PPGTT, scratch page. This function can free all shadowed PPGTT, clear all GGTT entry, and clear scratch page to all zero. After this, we can ensure no gtt related information can be leakaged from one VM to anothor one when assign vgpu instance across different VMs (not simultaneously). Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gtt.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 3cf0df0bb391..47dec4acf7ff 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2277,3 +2277,30 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) for (offset = 0; offset < num_entries; offset++) ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); } + +/** + * intel_vgpu_reset_gtt - reset the all GTT related status + * @vgpu: a vGPU + * @dmlr: true for vGPU Device Model Level Reset, false for GT Reset + * + * This function is called from vfio core to reset reset all + * GTT related status, including GGTT, PPGTT, scratch page. + * + */ +void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) +{ + int i; + + ppgtt_free_all_shadow_page(vgpu); + if (!dmlr) + return; + + intel_vgpu_reset_ggtt(vgpu); + + /* clear scratch page for security */ + for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { + if (vgpu->gtt.scratch_pt[i].page != NULL) + memset(page_address(vgpu->gtt.scratch_pt[i].page), + 0, PAGE_SIZE); + } +} diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index b315ab3593ec..f88eb5e89bea 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -208,6 +208,7 @@ extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); +extern void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr); extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, -- cgit From 536fc234074b09adae1763d8fb5b2d947847ad1d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:15:58 +0800 Subject: drm/i915/gvt: move cfg space inititation function to cfg_space.c Move the configuration space inititation function setup_vgpu_cfg_space() in vgpu.c to dedicated source file cfg_space.c, and rename the function as intel_vgpu_init_cfg_space(). Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 49 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/vgpu.c | 44 +------------------------------- 3 files changed, 52 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 711c31c8d8b4..82f449d59884 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -282,3 +282,52 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, } return 0; } + +/** + * intel_vgpu_init_cfg_space - init vGPU configuration space when create vGPU + * + * @vgpu: a vGPU + * @primary: is the vGPU presented as primary + * + */ +void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, + bool primary) +{ + struct intel_gvt *gvt = vgpu->gvt; + const struct intel_gvt_device_info *info = &gvt->device_info; + u16 *gmch_ctl; + int i; + + memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, + info->cfg_space_size); + + if (!primary) { + vgpu_cfg_space(vgpu)[PCI_CLASS_DEVICE] = + INTEL_GVT_PCI_CLASS_VGA_OTHER; + vgpu_cfg_space(vgpu)[PCI_CLASS_PROG] = + INTEL_GVT_PCI_CLASS_VGA_OTHER; + } + + /* Show guest that there isn't any stolen memory.*/ + gmch_ctl = (u16 *)(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_GMCH_CONTROL); + *gmch_ctl &= ~(BDW_GMCH_GMS_MASK << BDW_GMCH_GMS_SHIFT); + + intel_vgpu_write_pci_bar(vgpu, PCI_BASE_ADDRESS_2, + gvt_aperture_pa_base(gvt), true); + + vgpu_cfg_space(vgpu)[PCI_COMMAND] &= ~(PCI_COMMAND_IO + | PCI_COMMAND_MEMORY + | PCI_COMMAND_MASTER); + /* + * Clear the bar upper 32bit and let guest to assign the new value + */ + memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4); + memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4); + memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4); + + for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) { + vgpu->cfg_space.bar[i].size = pci_resource_len( + gvt->dev_priv->drm.pdev, i * 2); + vgpu->cfg_space.bar[i].tracked = false; + } +} diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 455281d07d75..ab505552d313 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -412,6 +412,8 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, unsigned long *g_index); +void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, + bool primary); int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index f0e86123e45b..35c274177da8 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -66,48 +66,6 @@ int setup_vgpu_mmio(struct intel_vgpu *vgpu) return 0; } -static void setup_vgpu_cfg_space(struct intel_vgpu *vgpu, - struct intel_vgpu_creation_params *param) -{ - struct intel_gvt *gvt = vgpu->gvt; - const struct intel_gvt_device_info *info = &gvt->device_info; - u16 *gmch_ctl; - int i; - - memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, - info->cfg_space_size); - - if (!param->primary) { - vgpu_cfg_space(vgpu)[PCI_CLASS_DEVICE] = - INTEL_GVT_PCI_CLASS_VGA_OTHER; - vgpu_cfg_space(vgpu)[PCI_CLASS_PROG] = - INTEL_GVT_PCI_CLASS_VGA_OTHER; - } - - /* Show guest that there isn't any stolen memory.*/ - gmch_ctl = (u16 *)(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_GMCH_CONTROL); - *gmch_ctl &= ~(BDW_GMCH_GMS_MASK << BDW_GMCH_GMS_SHIFT); - - intel_vgpu_write_pci_bar(vgpu, PCI_BASE_ADDRESS_2, - gvt_aperture_pa_base(gvt), true); - - vgpu_cfg_space(vgpu)[PCI_COMMAND] &= ~(PCI_COMMAND_IO - | PCI_COMMAND_MEMORY - | PCI_COMMAND_MASTER); - /* - * Clear the bar upper 32bit and let guest to assign the new value - */ - memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4); - memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4); - memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4); - - for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) { - vgpu->cfg_space.bar[i].size = pci_resource_len( - gvt->dev_priv->drm.pdev, i * 2); - vgpu->cfg_space.bar[i].tracked = false; - } -} - void populate_pvinfo_page(struct intel_vgpu *vgpu) { /* setup the ballooning information */ @@ -300,7 +258,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->gvt = gvt; bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); - setup_vgpu_cfg_space(vgpu, param); + intel_vgpu_init_cfg_space(vgpu, param->primary); ret = setup_vgpu_mmio(vgpu); if (ret) -- cgit From c64ff6c774413fdbffd7f0f3ef5b04127d461cf4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:15:59 +0800 Subject: drm/i915/gvt: introduce intel_vgpu_reset_cfg_space to reset configuration space This patch introduces a new function intel_vgpu_reset_cfg_space() to reset vGPU configuration space. This function will unmap gttmmio and aperture if they are mapped before. Then entire cfg space will be restored to default values. Currently we only do such reset when vGPU is not owned by any VM so we simply restore entire cfg space to default value, not following the PCIe FLR spec that some fields should remain unchanged. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 82f449d59884..4a6a2ed65732 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -331,3 +331,28 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, vgpu->cfg_space.bar[i].tracked = false; } } + +/** + * intel_vgpu_reset_cfg_space - reset vGPU configuration space + * + * @vgpu: a vGPU + * + */ +void intel_vgpu_reset_cfg_space(struct intel_vgpu *vgpu) +{ + u8 cmd = vgpu_cfg_space(vgpu)[PCI_COMMAND]; + bool primary = vgpu_cfg_space(vgpu)[PCI_CLASS_DEVICE] != + INTEL_GVT_PCI_CLASS_VGA_OTHER; + + if (cmd & PCI_COMMAND_MEMORY) { + trap_gttmmio(vgpu, false); + map_aperture(vgpu, false); + } + + /** + * Currently we only do such reset when vGPU is not + * owned by any VM, so we simply restore entire cfg + * space to default value. + */ + intel_vgpu_init_cfg_space(vgpu, primary); +} diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index ab505552d313..9c97d7d36449 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -414,6 +414,8 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, bool primary); +void intel_vgpu_reset_cfg_space(struct intel_vgpu *vgpu); + int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); -- cgit From cdcc43479c9b929940a1955d2e7bae696d2b9496 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:16:00 +0800 Subject: drm/i915/gvt: move mmio init/clean function to mmio.c Move the mmio space inititation function setup_vgpu_mmio() and cleanup function clean_vgpu_mmio() in vgpu.c to dedicated source file mmio.c, and rename them as intel_vgpu_init_mmio() and intel_vgpu_clean_mmio() respectively. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/mmio.c | 42 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.h | 3 +++ drivers/gpu/drm/i915/gvt/vgpu.c | 37 +++----------------------------- 5 files changed, 49 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9c97d7d36449..9832e7f2a662 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -429,7 +429,6 @@ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu); int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); -int setup_vgpu_mmio(struct intel_vgpu *vgpu); void populate_pvinfo_page(struct intel_vgpu *vgpu); struct intel_gvt_ops { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8cbaf1c83720..fd96898db71d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -257,7 +257,7 @@ static int handle_device_reset(struct intel_vgpu *vgpu, unsigned int offset, mutex_unlock(&vgpu->gvt->lock); intel_vgpu_clean_gtt(vgpu); mutex_lock(&vgpu->gvt->lock); - setup_vgpu_mmio(vgpu); + intel_vgpu_init_mmio(vgpu); populate_pvinfo_page(vgpu); intel_vgpu_init_gtt(vgpu); } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index e60701397ac2..f7da735b7919 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -303,3 +303,45 @@ err: mutex_unlock(&gvt->lock); return ret; } + +/** + * intel_vgpu_init_mmio - init MMIO space + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed + */ +int intel_vgpu_init_mmio(struct intel_vgpu *vgpu) +{ + const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; + + if (vgpu->mmio.vreg) + memset(vgpu->mmio.vreg, 0, info->mmio_size * 2); + else { + vgpu->mmio.vreg = vzalloc(info->mmio_size * 2); + if (!vgpu->mmio.vreg) + return -ENOMEM; + } + vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; + + memcpy(vgpu->mmio.vreg, vgpu->gvt->firmware.mmio, info->mmio_size); + memcpy(vgpu->mmio.sreg, vgpu->gvt->firmware.mmio, info->mmio_size); + + vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + + /* set the bit 0:2(Core C-State ) to C0 */ + vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + + return 0; +} + +/** + * intel_vgpu_clean_mmio - clean MMIO space + * @vgpu: a vGPU + * + */ +void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu) +{ + vfree(vgpu->mmio.vreg); + vgpu->mmio.vreg = vgpu->mmio.sreg = NULL; +} diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 87d5b5e366a3..1b74ac9650d5 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -86,6 +86,9 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, *offset; \ }) +int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); +void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); + int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 35c274177da8..958c3ed1c6c6 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -35,37 +35,6 @@ #include "gvt.h" #include "i915_pvinfo.h" -static void clean_vgpu_mmio(struct intel_vgpu *vgpu) -{ - vfree(vgpu->mmio.vreg); - vgpu->mmio.vreg = vgpu->mmio.sreg = NULL; -} - -int setup_vgpu_mmio(struct intel_vgpu *vgpu) -{ - struct intel_gvt *gvt = vgpu->gvt; - const struct intel_gvt_device_info *info = &gvt->device_info; - - if (vgpu->mmio.vreg) - memset(vgpu->mmio.vreg, 0, info->mmio_size * 2); - else { - vgpu->mmio.vreg = vzalloc(info->mmio_size * 2); - if (!vgpu->mmio.vreg) - return -ENOMEM; - } - - vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; - - memcpy(vgpu->mmio.vreg, gvt->firmware.mmio, info->mmio_size); - memcpy(vgpu->mmio.sreg, gvt->firmware.mmio, info->mmio_size); - - vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; - - /* set the bit 0:2(Core C-State ) to C0 */ - vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; - return 0; -} - void populate_pvinfo_page(struct intel_vgpu *vgpu) { /* setup the ballooning information */ @@ -226,7 +195,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) intel_vgpu_clean_gtt(vgpu); intel_gvt_hypervisor_detach_vgpu(vgpu); intel_vgpu_free_resource(vgpu); - clean_vgpu_mmio(vgpu); + intel_vgpu_clean_mmio(vgpu); vfree(vgpu); intel_gvt_update_vgpu_types(gvt); @@ -260,7 +229,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, intel_vgpu_init_cfg_space(vgpu, param->primary); - ret = setup_vgpu_mmio(vgpu); + ret = intel_vgpu_init_mmio(vgpu); if (ret) goto out_clean_idr; @@ -312,7 +281,7 @@ out_detach_hypervisor_vgpu: out_clean_vgpu_resource: intel_vgpu_free_resource(vgpu); out_clean_vgpu_mmio: - clean_vgpu_mmio(vgpu); + intel_vgpu_clean_mmio(vgpu); out_clean_idr: idr_remove(&gvt->vgpu_idr, vgpu->id); out_free_vgpu: -- cgit From 97d58f7dd0ff12e5fddeffb40aed845daa628149 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:16:01 +0800 Subject: drm/i915/gvt: introduce intel_vgpu_reset_mmio() to reset mmio space This patch introduces a new function intel_vgpu_reset_mmio() to reset vGPU MMIO space (virtual registers of the vGPU). The default values are loaded as firmware during gvt inititiation. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 39 +++++++++++++++++++++++++-------------- drivers/gpu/drm/i915/gvt/mmio.h | 1 + 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index f7da735b7919..4df078bc5d04 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -304,6 +304,26 @@ err: return ret; } + +/** + * intel_vgpu_reset_mmio - reset virtual MMIO space + * @vgpu: a vGPU + * + */ +void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) +{ + struct intel_gvt *gvt = vgpu->gvt; + const struct intel_gvt_device_info *info = &gvt->device_info; + + memcpy(vgpu->mmio.vreg, gvt->firmware.mmio, info->mmio_size); + memcpy(vgpu->mmio.sreg, gvt->firmware.mmio, info->mmio_size); + + vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + + /* set the bit 0:2(Core C-State ) to C0 */ + vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; +} + /** * intel_vgpu_init_mmio - init MMIO space * @vgpu: a vGPU @@ -315,22 +335,13 @@ int intel_vgpu_init_mmio(struct intel_vgpu *vgpu) { const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; - if (vgpu->mmio.vreg) - memset(vgpu->mmio.vreg, 0, info->mmio_size * 2); - else { - vgpu->mmio.vreg = vzalloc(info->mmio_size * 2); - if (!vgpu->mmio.vreg) - return -ENOMEM; - } - vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; + vgpu->mmio.vreg = vzalloc(info->mmio_size * 2); + if (!vgpu->mmio.vreg) + return -ENOMEM; - memcpy(vgpu->mmio.vreg, vgpu->gvt->firmware.mmio, info->mmio_size); - memcpy(vgpu->mmio.sreg, vgpu->gvt->firmware.mmio, info->mmio_size); - - vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; - /* set the bit 0:2(Core C-State ) to C0 */ - vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + intel_vgpu_reset_mmio(vgpu); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 1b74ac9650d5..3bc620f56f35 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -87,6 +87,7 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, }) int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); +void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); -- cgit From cfe65f4037cedb911a840ebcf6dafc5b69e535b4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:16:02 +0800 Subject: drm/i915/gvt: fix vGPU instance reuse issues by vGPU reset function Our function tests found several issues related to reusing vGPU instance. They are qemu reboot failure, guest tdr after reboot, host hang when reboot guest. All these issues are caused by dirty status inherited from last VM. This patch fix all these issues by resetting a virtual GPU before VM use it. The reset logical is put into a low level function _intel_gvt_reset_vgpu(), which supports Device Model Level Reset, Full GT Reset and Per-Engine Reset. vGPU Device Model Level Reset (DMLR) simulates the PCI reset to reset the whole vGPU to default state as when it is created, including GTT, execlist, scratch pages, cfg space, mmio space, pvinfo page, scheduler and fence registers. The ultimate goal of vGPU DMLR is that reuse a vGPU instance by different virtual machines. When we reassign a vGPU to a virtual machine we must issue such reset first. Full GT Reset and Per-Engine GT Reset are soft reset flow for GPU engines (Render, Blitter, Video, Video Enhancement). It is defined by GPU Spec. Unlike the FLR, GT reset only reset particular resource of a vGPU per the reset request. Guest driver can issue a GT reset by programming the virtual GDRST register to reset specific virtual GPU engine or all engines. Since vGPU DMLR and GT reset can share some code so we implement both these two into one single function intel_gvt_reset_vgpu_locked(). The parameter dmlr is to identify if we will do FLR or GT reset. The parameter engine_mask is to specific the engines that need to be resetted. If value ALL_ENGINES is given for engine_mask, it means the caller requests a full gt reset that we will reset all virtual GPU engines. Signed-off-by: Changbin Du Reviewed-by: Jike Song Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/vgpu.c | 73 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9832e7f2a662..e227caf5859e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -376,6 +376,8 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, + unsigned int engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 958c3ed1c6c6..7295bc8e12fb 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -327,7 +327,75 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, } /** - * intel_gvt_reset_vgpu - reset a virtual GPU + * intel_gvt_reset_vgpu_locked - reset a virtual GPU by DMLR or GT reset + * @vgpu: virtual GPU + * @dmlr: vGPU Device Model Level Reset or GT Reset + * @engine_mask: engines to reset for GT reset + * + * This function is called when user wants to reset a virtual GPU through + * device model reset or GT reset. The caller should hold the gvt lock. + * + * vGPU Device Model Level Reset (DMLR) simulates the PCI level reset to reset + * the whole vGPU to default state as when it is created. This vGPU function + * is required both for functionary and security concerns.The ultimate goal + * of vGPU FLR is that reuse a vGPU instance by virtual machines. When we + * assign a vGPU to a virtual machine we must isse such reset first. + * + * Full GT Reset and Per-Engine GT Reset are soft reset flow for GPU engines + * (Render, Blitter, Video, Video Enhancement). It is defined by GPU Spec. + * Unlike the FLR, GT reset only reset particular resource of a vGPU per + * the reset request. Guest driver can issue a GT reset by programming the + * virtual GDRST register to reset specific virtual GPU engine or all + * engines. + * + * The parameter dev_level is to identify if we will do DMLR or GT reset. + * The parameter engine_mask is to specific the engines that need to be + * resetted. If value ALL_ENGINES is given for engine_mask, it means + * the caller requests a full GT reset that we will reset all virtual + * GPU engines. For FLR, engine_mask is ignored. + */ +void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, + unsigned int engine_mask) +{ + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; + + gvt_dbg_core("------------------------------------------\n"); + gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n", + vgpu->id, dmlr, engine_mask); + vgpu->resetting = true; + + intel_vgpu_stop_schedule(vgpu); + /* + * The current_vgpu will set to NULL after stopping the + * scheduler when the reset is triggered by current vgpu. + */ + if (scheduler->current_vgpu == NULL) { + mutex_unlock(&gvt->lock); + intel_gvt_wait_vgpu_idle(vgpu); + mutex_lock(&gvt->lock); + } + + intel_vgpu_reset_execlist(vgpu, dmlr ? ALL_ENGINES : engine_mask); + + /* full GPU reset or device model level reset */ + if (engine_mask == ALL_ENGINES || dmlr) { + intel_vgpu_reset_gtt(vgpu, dmlr); + intel_vgpu_reset_resource(vgpu); + intel_vgpu_reset_mmio(vgpu); + populate_pvinfo_page(vgpu); + + if (dmlr) + intel_vgpu_reset_cfg_space(vgpu); + } + + vgpu->resetting = false; + gvt_dbg_core("reset vgpu%d done\n", vgpu->id); + gvt_dbg_core("------------------------------------------\n"); +} + +/** + * intel_gvt_reset_vgpu - reset a virtual GPU (Function Level) * @vgpu: virtual GPU * * This function is called when user wants to reset a virtual GPU. @@ -335,4 +403,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, */ void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu) { + mutex_lock(&vgpu->gvt->lock); + intel_gvt_reset_vgpu_locked(vgpu, true, 0); + mutex_unlock(&vgpu->gvt->lock); } -- cgit From c34eaa8d0f9d9ae26a4a6af7bc3aca57310cf483 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 13 Jan 2017 11:16:03 +0800 Subject: drm/i915/gvt: rewrite gt reset handler using new function intel_gvt_reset_vgpu_locked GT reset and FLR share some operations and they are both implemented in our new function intel_gvt_reset_vgpu_locked(). This patch rewrite the gt reset handler using this new function. Besides, this new implementation fixed the old issue in GT reset. The old implementation reset GGTT entries which is illegal. We only clear GGTT entries at PCI level reset. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 90 ++++++++++++------------------------- 1 file changed, 29 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index fd96898db71d..ab2ea157da4c 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -231,77 +231,45 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, return 0; } -static int handle_device_reset(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes, unsigned long bitmap) -{ - struct intel_gvt_workload_scheduler *scheduler = - &vgpu->gvt->scheduler; - - vgpu->resetting = true; - - intel_vgpu_stop_schedule(vgpu); - /* - * The current_vgpu will set to NULL after stopping the - * scheduler when the reset is triggered by current vgpu. - */ - if (scheduler->current_vgpu == NULL) { - mutex_unlock(&vgpu->gvt->lock); - intel_gvt_wait_vgpu_idle(vgpu); - mutex_lock(&vgpu->gvt->lock); - } - - intel_vgpu_reset_execlist(vgpu, bitmap); - - /* full GPU reset */ - if (bitmap == 0xff) { - mutex_unlock(&vgpu->gvt->lock); - intel_vgpu_clean_gtt(vgpu); - mutex_lock(&vgpu->gvt->lock); - intel_vgpu_init_mmio(vgpu); - populate_pvinfo_page(vgpu); - intel_vgpu_init_gtt(vgpu); - } - - vgpu->resetting = false; - - return 0; -} - static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) + void *p_data, unsigned int bytes) { + unsigned int engine_mask = 0; u32 data; - u64 bitmap = 0; write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); if (data & GEN6_GRDOM_FULL) { gvt_dbg_mmio("vgpu%d: request full GPU reset\n", vgpu->id); - bitmap = 0xff; - } - if (data & GEN6_GRDOM_RENDER) { - gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id); - bitmap |= (1 << RCS); - } - if (data & GEN6_GRDOM_MEDIA) { - gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id); - bitmap |= (1 << VCS); - } - if (data & GEN6_GRDOM_BLT) { - gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id); - bitmap |= (1 << BCS); - } - if (data & GEN6_GRDOM_VECS) { - gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id); - bitmap |= (1 << VECS); - } - if (data & GEN8_GRDOM_MEDIA2) { - gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); - if (HAS_BSD2(vgpu->gvt->dev_priv)) - bitmap |= (1 << VCS2); + engine_mask = ALL_ENGINES; + } else { + if (data & GEN6_GRDOM_RENDER) { + gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id); + engine_mask |= (1 << RCS); + } + if (data & GEN6_GRDOM_MEDIA) { + gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id); + engine_mask |= (1 << VCS); + } + if (data & GEN6_GRDOM_BLT) { + gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id); + engine_mask |= (1 << BCS); + } + if (data & GEN6_GRDOM_VECS) { + gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id); + engine_mask |= (1 << VECS); + } + if (data & GEN8_GRDOM_MEDIA2) { + gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id); + if (HAS_BSD2(vgpu->gvt->dev_priv)) + engine_mask |= (1 << VCS2); + } } - return handle_device_reset(vgpu, offset, p_data, bytes, bitmap); + + intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); + + return 0; } static int gmbus_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, -- cgit From f65a7c9cb3770ed4d3e7c57c66d7032689081b5e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 9 Jan 2017 15:33:02 +0100 Subject: drm/exynos/decon5433: update shadow registers iff there are active windows Improper usage of DECON_UPDATE register leads to subtle errors. If it set in decon_commit when there are no active windows it results in slow registry updates - all subsequent shadow registry updates takes more than full vblank. On the other side if it is not set when there are active windows it results in garbage on the screen after suspend/resume of FB console. The patch hopefully fixes it. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6ca1f3117fe8..ef7fcb5f044b 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -188,8 +188,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) /* enable output and display signal */ decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID | VIDCON0_ENVID_F, ~0); - - decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); } static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, @@ -340,8 +338,9 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); - /* standalone update */ - decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); + /* update iff there are active windows */ + if (crtc->base.state->plane_mask) + decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); -- cgit From 821b40b79db7dedbfe15ab330dfd181e661a533f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Jan 2017 10:20:58 +0100 Subject: drm/exynos/decon5433: set STANDALONE_UPDATE_F also if planes are disabled STANDALONE_UPDATE_F should be set if something changed in plane configurations, including plane disable. The patch fixes page-faults bugs, caused by decon still using framebuffers of disabled planes. v2: fixed clear-bit code (Thx Marek) v3: use test_and_clear_bit (Thx Joonyoung) Signed-off-by: Andrzej Hajda Tested-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index ef7fcb5f044b..09e8cc36948e 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -46,7 +46,8 @@ enum decon_flag_bits { BIT_CLKS_ENABLED, BIT_IRQS_ENABLED, BIT_WIN_UPDATED, - BIT_SUSPENDED + BIT_SUSPENDED, + BIT_REQUEST_UPDATE }; struct decon_context { @@ -313,6 +314,7 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, /* window enable */ decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_disable_plane(struct exynos_drm_crtc *crtc, @@ -325,6 +327,7 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, return; decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, 0); + set_bit(BIT_REQUEST_UPDATE, &ctx->flags); } static void decon_atomic_flush(struct exynos_drm_crtc *crtc) @@ -338,8 +341,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); - /* update iff there are active windows */ - if (crtc->base.state->plane_mask) + if (test_and_clear_bit(BIT_REQUEST_UPDATE, &ctx->flags)) decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); if (ctx->out_type & IFTYPE_I80) -- cgit From c57a94ffd0105d58ab104fe383148c5eda5aa033 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 11 Jan 2017 10:16:12 -0500 Subject: drm/msm/mdp5: rip out plane->pending tracking It would race between userspace thread and commit worker. Ie. vblank irq would trigger event and userspace could begin the next atomic update, before the commit worker had a chance to clear the pending flag. If we do end up needing something to prevent userspace from trying another pageflip before getting vblank event, it should probably be implemented as a pending_planes bitmask, similar to pending_crtcs. See start_atomic() and end_atomic(). Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 6 ------ drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 4 ---- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 22 ---------------------- 3 files changed, 32 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 5f6cd8745dbc..c396d459a9d0 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -119,13 +119,7 @@ static void mdp5_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *st static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *state) { - int i; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); - struct drm_plane *plane; - struct drm_plane_state *plane_state; - - for_each_plane_in_state(state, plane, plane_state, i) - mdp5_plane_complete_commit(plane, plane_state); if (mdp5_kms->smp) mdp5_smp_complete_commit(mdp5_kms->smp, &mdp5_kms->state->smp); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h index 17b0cc101171..cdfc63d90c7b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h @@ -104,8 +104,6 @@ struct mdp5_plane_state { /* assigned by crtc blender */ enum mdp_mixer_stage_id stage; - - bool pending : 1; }; #define to_mdp5_plane_state(x) \ container_of(x, struct mdp5_plane_state, base) @@ -232,8 +230,6 @@ int mdp5_irq_domain_init(struct mdp5_kms *mdp5_kms); void mdp5_irq_domain_fini(struct mdp5_kms *mdp5_kms); uint32_t mdp5_plane_get_flush(struct drm_plane *plane); -void mdp5_plane_complete_commit(struct drm_plane *plane, - struct drm_plane_state *state); enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane); struct drm_plane *mdp5_plane_init(struct drm_device *dev, bool primary); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index c099da7bc212..25d9d0a97156 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -179,7 +179,6 @@ mdp5_plane_atomic_print_state(struct drm_printer *p, drm_printf(p, "\tzpos=%u\n", pstate->zpos); drm_printf(p, "\talpha=%u\n", pstate->alpha); drm_printf(p, "\tstage=%s\n", stage2name(pstate->stage)); - drm_printf(p, "\tpending=%u\n", pstate->pending); } static void mdp5_plane_reset(struct drm_plane *plane) @@ -220,8 +219,6 @@ mdp5_plane_duplicate_state(struct drm_plane *plane) if (mdp5_state && mdp5_state->base.fb) drm_framebuffer_reference(mdp5_state->base.fb); - mdp5_state->pending = false; - return &mdp5_state->base; } @@ -288,13 +285,6 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane, DBG("%s: check (%d -> %d)", plane->name, plane_enabled(old_state), plane_enabled(state)); - /* We don't allow faster-than-vblank updates.. if we did add this - * some day, we would need to disallow in cases where hwpipe - * changes - */ - if (WARN_ON(to_mdp5_plane_state(old_state)->pending)) - return -EBUSY; - max_width = config->hw->lm.max_width << 16; max_height = config->hw->lm.max_height << 16; @@ -370,12 +360,9 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct drm_plane_state *state = plane->state; - struct mdp5_plane_state *mdp5_state = to_mdp5_plane_state(state); DBG("%s: update", plane->name); - mdp5_state->pending = true; - if (plane_enabled(state)) { int ret; @@ -851,15 +838,6 @@ uint32_t mdp5_plane_get_flush(struct drm_plane *plane) return pstate->hwpipe->flush_mask; } -/* called after vsync in thread context */ -void mdp5_plane_complete_commit(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct mdp5_plane_state *pstate = to_mdp5_plane_state(plane->state); - - pstate->pending = false; -} - /* initialize plane */ struct drm_plane *mdp5_plane_init(struct drm_device *dev, bool primary) { -- cgit From de85d2b35ac74f6be769573d4a8708c823219900 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 12 Jan 2017 17:41:44 -0500 Subject: drm/msm: fix potential null ptr issue in non-iommu case Fixes: 9cb07b099fb ("drm/msm: support multiple address spaces") Reported-by: Riku Voipio Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 5 ++--- drivers/gpu/drm/msm/msm_gem.c | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 14ff87686a36..686a580c711a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -345,7 +345,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu *gpu = &adreno_gpu->base; - struct msm_mmu *mmu; int ret; adreno_gpu->funcs = funcs; @@ -385,8 +384,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return ret; } - mmu = gpu->aspace->mmu; - if (mmu) { + if (gpu->aspace && gpu->aspace->mmu) { + struct msm_mmu *mmu = gpu->aspace->mmu; ret = mmu->funcs->attach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports)); if (ret) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d8bc59c7e261..8098677a3916 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -294,6 +294,8 @@ put_iova(struct drm_gem_object *obj) WARN_ON(!mutex_is_locked(&dev->struct_mutex)); for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) { + if (!priv->aspace[id]) + continue; msm_gem_unmap_vma(priv->aspace[id], &msm_obj->domain[id], msm_obj->sgt); } -- cgit From 69bcc0b7140c30de552aa3ef08322295862e8e2f Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 11 Jan 2017 18:27:25 +0900 Subject: Revert "drm/amdgpu: Only update the CUR_SIZE register when necessary" This reverts commits 7c83d7abc9997cf1efac2c0ce384b5e8453ee870 and a1f49cc179ce6b7b7758ae3ff5cdb138d0ee0f56. They caused the HW cursor to disappear under various circumstances in the wild. I wasn't able to reproduce any of them, and I'm not sure what's going on. But those changes aren't a big deal anyway, so let's just revert for now. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=191291 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99143 Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 22 +++++++--------------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 22 +++++++--------------- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 24 +++++++++--------------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 22 +++++++--------------- 4 files changed, 30 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9999dc71b998..ccb5e02e7b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2512,6 +2512,8 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, + ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); return 0; } @@ -2537,7 +2539,6 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; struct drm_gem_object *obj; struct amdgpu_bo *aobj; int ret; @@ -2578,7 +2579,9 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v10_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2587,16 +2590,10 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v10_0_cursor_move_locked(crtc, x, y); - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height) { - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (width - 1) << 16 | (height - 1)); amdgpu_crtc->cursor_width = width; amdgpu_crtc->cursor_height = height; + amdgpu_crtc->cursor_hot_x = hot_x; + amdgpu_crtc->cursor_hot_y = hot_y; } dce_v10_0_show_cursor(crtc); @@ -2620,7 +2617,6 @@ unpin: static void dce_v10_0_cursor_reset(struct drm_crtc *crtc) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; if (amdgpu_crtc->cursor_bo) { dce_v10_0_lock_cursor(crtc, true); @@ -2628,10 +2624,6 @@ static void dce_v10_0_cursor_reset(struct drm_crtc *crtc) dce_v10_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, amdgpu_crtc->cursor_y); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->cursor_width - 1) << 16 | - (amdgpu_crtc->cursor_height - 1)); - dce_v10_0_show_cursor(crtc); dce_v10_0_lock_cursor(crtc, false); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 2006abbbfb62..a7af5b33a5e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2532,6 +2532,8 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, + ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); return 0; } @@ -2557,7 +2559,6 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; struct drm_gem_object *obj; struct amdgpu_bo *aobj; int ret; @@ -2598,7 +2599,9 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v11_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2607,16 +2610,10 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v11_0_cursor_move_locked(crtc, x, y); - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height) { - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (width - 1) << 16 | (height - 1)); amdgpu_crtc->cursor_width = width; amdgpu_crtc->cursor_height = height; + amdgpu_crtc->cursor_hot_x = hot_x; + amdgpu_crtc->cursor_hot_y = hot_y; } dce_v11_0_show_cursor(crtc); @@ -2640,7 +2637,6 @@ unpin: static void dce_v11_0_cursor_reset(struct drm_crtc *crtc) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; if (amdgpu_crtc->cursor_bo) { dce_v11_0_lock_cursor(crtc, true); @@ -2648,10 +2644,6 @@ static void dce_v11_0_cursor_reset(struct drm_crtc *crtc) dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, amdgpu_crtc->cursor_y); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->cursor_width - 1) << 16 | - (amdgpu_crtc->cursor_height - 1)); - dce_v11_0_show_cursor(crtc); dce_v11_0_lock_cursor(crtc, false); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b4e4ec630e8c..39df6a50637f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1859,6 +1859,8 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + int w = amdgpu_crtc->cursor_width; + amdgpu_crtc->cursor_x = x; amdgpu_crtc->cursor_y = y; @@ -1878,6 +1880,8 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, + ((w - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); return 0; } @@ -1903,7 +1907,6 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; struct drm_gem_object *obj; struct amdgpu_bo *aobj; int ret; @@ -1944,7 +1947,9 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v6_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -1953,16 +1958,10 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v6_0_cursor_move_locked(crtc, x, y); - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height) { - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (width - 1) << 16 | (height - 1)); amdgpu_crtc->cursor_width = width; amdgpu_crtc->cursor_height = height; + amdgpu_crtc->cursor_hot_x = hot_x; + amdgpu_crtc->cursor_hot_y = hot_y; } dce_v6_0_show_cursor(crtc); @@ -1986,7 +1985,6 @@ unpin: static void dce_v6_0_cursor_reset(struct drm_crtc *crtc) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; if (amdgpu_crtc->cursor_bo) { dce_v6_0_lock_cursor(crtc, true); @@ -1994,10 +1992,6 @@ static void dce_v6_0_cursor_reset(struct drm_crtc *crtc) dce_v6_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, amdgpu_crtc->cursor_y); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->cursor_width - 1) << 16 | - (amdgpu_crtc->cursor_height - 1)); - dce_v6_0_show_cursor(crtc); dce_v6_0_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 584abe834a3c..28102bb1704d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2363,6 +2363,8 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, + ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); return 0; } @@ -2388,7 +2390,6 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; struct drm_gem_object *obj; struct amdgpu_bo *aobj; int ret; @@ -2429,7 +2430,9 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v8_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2438,16 +2441,10 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v8_0_cursor_move_locked(crtc, x, y); - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height) { - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (width - 1) << 16 | (height - 1)); amdgpu_crtc->cursor_width = width; amdgpu_crtc->cursor_height = height; + amdgpu_crtc->cursor_hot_x = hot_x; + amdgpu_crtc->cursor_hot_y = hot_y; } dce_v8_0_show_cursor(crtc); @@ -2471,7 +2468,6 @@ unpin: static void dce_v8_0_cursor_reset(struct drm_crtc *crtc) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = crtc->dev->dev_private; if (amdgpu_crtc->cursor_bo) { dce_v8_0_lock_cursor(crtc, true); @@ -2479,10 +2475,6 @@ static void dce_v8_0_cursor_reset(struct drm_crtc *crtc) dce_v8_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, amdgpu_crtc->cursor_y); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->cursor_width - 1) << 16 | - (amdgpu_crtc->cursor_height - 1)); - dce_v8_0_show_cursor(crtc); dce_v8_0_lock_cursor(crtc, false); -- cgit From 81ddd8c0c5e1cb41184d66567140cb48c53eb3d1 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 13 Jan 2017 15:00:16 +0100 Subject: cifs: initialize file_info_lock Reviewed-by: Jeff Layton CC: Stable file_info_lock is not initalized in initiate_cifs_search(), leading to the following splat after a simple "mount.cifs ... dir && ls dir/": BUG: spinlock bad magic on CPU#0, ls/486 lock: 0xffff880009301110, .magic: 00000000, .owner: /-1, .owner_cpu: 0 CPU: 0 PID: 486 Comm: ls Not tainted 4.9.0 #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ffffc900042f3db0 ffffffff81327533 0000000000000000 ffff880009301110 ffffc900042f3dd0 ffffffff810baf75 ffff880009301110 ffffffff817ae077 ffffc900042f3df0 ffffffff810baff6 ffff880009301110 ffff880008d69900 Call Trace: [] dump_stack+0x65/0x92 [] spin_dump+0x85/0xe0 [] spin_bug+0x26/0x30 [] do_raw_spin_lock+0xe9/0x130 [] _raw_spin_lock+0x1f/0x30 [] cifs_closedir+0x4d/0x100 [] __fput+0x5d/0x160 [] ____fput+0xe/0x10 [] task_work_run+0x7e/0xa0 [] exit_to_usermode_loop+0x92/0xa0 [] syscall_return_slowpath+0x49/0x50 [] entry_SYSCALL_64_fastpath+0xa7/0xa9 Fixes: 3afca265b5f53a0 ("Clarify locking of cifs file and tcon structures and make more granular") Signed-off-by: Rabin Vincent Signed-off-by: Steve French --- fs/cifs/readdir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8f6a2a5863b9..a27fc8791551 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) rc = -ENOMEM; goto error_exit; } + spin_lock_init(&cifsFile->file_info_lock); file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); tcon = tlink_tcon(tlink); -- cgit From d7f5762c5e532dfe8247ce1bc60d97af27ff8d00 Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Wed, 4 Jan 2017 23:16:41 +0100 Subject: netfilter: nf_tables: fix spelling mistakes o s/numerice/numeric o s/opertaor/operator Signed-off-by: Alexander Alemayhu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 881d49e94569..e3f27e09eb2b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -235,7 +235,7 @@ enum nft_rule_compat_flags { /** * enum nft_rule_compat_attributes - nf_tables rule compat attributes * - * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_PROTO: numeric value of handled protocol (NLA_U32) * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) */ enum nft_rule_compat_attributes { @@ -499,7 +499,7 @@ enum nft_bitwise_attributes { * enum nft_byteorder_ops - nf_tables byteorder operators * * @NFT_BYTEORDER_NTOH: network to host operator - * @NFT_BYTEORDER_HTON: host to network opertaor + * @NFT_BYTEORDER_HTON: host to network operator */ enum nft_byteorder_ops { NFT_BYTEORDER_NTOH, -- cgit From 6443ebc3fdd6f3c766d9442c18be274b3d736050 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 7 Jan 2017 20:49:18 +0800 Subject: netfilter: rpfilter: fix incorrect loopback packet judgment Currently, we check the existing rtable in PREROUTING hook, if RTCF_LOCAL is set, we assume that the packet is loopback. But this assumption is incorrect, for example, a packet encapsulated in ipsec transport mode was received and routed to local, after decapsulation, it would be delivered to local again, and the rtable was not dropped, so RTCF_LOCAL check would trigger. But actually, the packet was not loopback. So for these normal loopback packets, we can check whether the in device is IFF_LOOPBACK or not. For these locally generated broadcast/multicast, we can check whether the skb->pkt_type is PACKET_LOOPBACK or not. Finally, there's a subtle difference between nft fib expr and xtables rpfilter extension, user can add the following nft rule to do strict rpfilter check: # nft add rule x y meta iif eth0 fib saddr . iif oif != eth0 drop So when the packet is loopback, it's better to store the in device instead of the LOOPBACK_IFINDEX, otherwise, after adding the above nft rule, locally generated broad/multicast packets will be dropped incorrectly. Fixes: f83a7ea2075c ("netfilter: xt_rpfilter: skip locally generated broadcast/multicast, too") Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 6 ++++++ net/ipv4/netfilter/ipt_rpfilter.c | 8 ++++---- net/ipv4/netfilter/nft_fib_ipv4.c | 15 +++++---------- net/ipv6/netfilter/ip6t_rpfilter.c | 8 ++++---- net/ipv6/netfilter/nft_fib_ipv6.c | 13 ++++--------- 5 files changed, 23 insertions(+), 27 deletions(-) diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index cbedda077db2..5ceb2205e4e3 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -9,6 +9,12 @@ struct nft_fib { extern const struct nla_policy nft_fib_policy[]; +static inline bool +nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) +{ + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; +} + int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index f273098e48fd..37fb9552e858 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -63,10 +63,10 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, return dev_match || flags & XT_RPFILTER_LOOSE; } -static bool rpfilter_is_local(const struct sk_buff *skb) +static bool +rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { - const struct rtable *rt = skb_rtable(skb); - return rt && (rt->rt_flags & RTCF_LOCAL); + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -79,7 +79,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (rpfilter_is_local(skb)) + if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 965b1a161369..2981291910dd 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -26,13 +26,6 @@ static __be32 get_saddr(__be32 addr) return addr; } -static bool fib4_is_local(const struct sk_buff *skb) -{ - const struct rtable *rt = skb_rtable(skb); - - return rt && (rt->rt_flags & RTCF_LOCAL); -} - #define DSCP_BITS 0xfc void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, @@ -95,8 +88,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { - nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && + nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { + nft_fib_store_result(dest, priv->result, pkt, + nft_in(pkt)->ifindex); return; } @@ -131,7 +126,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, switch (res.type) { case RTN_UNICAST: break; - case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + case RTN_LOCAL: /* Should not see RTN_LOCAL here */ return; default: break; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index d5263dc364a9..b12e61b7b16c 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -72,10 +72,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, return ret; } -static bool rpfilter_is_local(const struct sk_buff *skb) +static bool +rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { - const struct rt6_info *rt = (const void *) skb_dst(skb); - return rt && (rt->rt6i_flags & RTF_LOCAL); + return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (rpfilter_is_local(skb)) + if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index c947aad8bcc6..765facf03d45 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -18,13 +18,6 @@ #include #include -static bool fib6_is_local(const struct sk_buff *skb) -{ - const struct rt6_info *rt = (const void *)skb_dst(skb); - - return rt && (rt->rt6i_flags & RTF_LOCAL); -} - static int get_ifindex(const struct net_device *dev) { return dev ? dev->ifindex : 0; @@ -164,8 +157,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { - nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && + nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { + nft_fib_store_result(dest, priv->result, pkt, + nft_in(pkt)->ifindex); return; } -- cgit From d21e540b4dd74a26df7a66ebab75c693a4a6a861 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 7 Jan 2017 20:51:50 +0800 Subject: netfilter: nf_tables: fix possible oops when dumping stateful objects When dumping nft stateful objects, if NFTA_OBJ_TABLE and NFTA_OBJ_TYPE attributes are not specified either, filter will become NULL, so oops will happen(actually nft utility will always set NFTA_OBJ_TABLE attr, so I write a test program to make this happen): BUG: unable to handle kernel NULL pointer dereference at (null) IP: nf_tables_dump_obj+0x17c/0x330 [nf_tables] [...] Call Trace: ? nf_tables_dump_obj+0x5/0x330 [nf_tables] ? __kmalloc_reserve.isra.35+0x31/0x90 ? __alloc_skb+0x5b/0x1e0 netlink_dump+0x124/0x2a0 __netlink_dump_start+0x161/0x190 nf_tables_getobj+0xe8/0x280 [nf_tables] Fixes: a9fea2a3c3cf ("netfilter: nf_tables: allow to filter stateful object dumps by type") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0db5f9782265..091d2dcc63b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4262,10 +4262,11 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (filter->table[0] && + if (filter && filter->table[0] && strcmp(filter->table, table->name)) goto cont; - if (filter->type != NFT_OBJECT_UNSPEC && + if (filter && + filter->type != NFT_OBJECT_UNSPEC && obj->type->type != filter->type) goto cont; -- cgit From e4670b058af64639ec1aef4db845c39bfdfff7c4 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 9 Jan 2017 17:24:18 -0500 Subject: netfilter: Fix typo in NF_CONNTRACK Kconfig option description The NF_CONNTRACK Kconfig option description makes an incorrect reference to the "meta" expression where the "ct" expression would be correct.This patch fixes the respective typographical error. Fixes: d497c6352736 ("netfilter: add help information to new nf_tables Kconfig options") Signed-off-by: William Breathitt Gray Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 63729b489c2c..bbc45f8a7b2d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -494,7 +494,7 @@ config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" help - This option adds the "meta" expression that you can use to match + This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. config NFT_SET_RBTREE -- cgit From 7e9081c5aac73b8a0bc22e0b3e7a12c3e9cf5256 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 13 Jan 2017 12:22:09 -0200 Subject: drm/fence: fix memory overwrite when setting out_fence fd Currently if the userspace declares a int variable to store the out_fence fd and pass it to OUT_FENCE_PTR the kernel will overwrite the 32 bits above the int variable on 64 bits systems. Fix this by making the internal storage of out_fence in the kernel a s32 pointer. Reported-by: Chad Versace Signed-off-by: Gustavo Padovan Fixes: beaf5af48034 ("drm/fence: add out-fences support") Cc: Daniel Vetter Cc: Rafael Antognolli Cc: Laurent Pinchart Acked-by: Laurent Pinchart Reviewed-and-Tested-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/1484317329-9293-1-git-send-email-gustavo@padovan.org --- drivers/gpu/drm/drm_atomic.c | 12 ++++++------ include/drm/drm_atomic.h | 2 +- include/drm/drm_mode_config.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 60697482b94c..50f5cf7b69d1 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -291,15 +291,15 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state, EXPORT_SYMBOL(drm_atomic_get_crtc_state); static void set_out_fence_for_crtc(struct drm_atomic_state *state, - struct drm_crtc *crtc, s64 __user *fence_ptr) + struct drm_crtc *crtc, s32 __user *fence_ptr) { state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr; } -static s64 __user *get_out_fence_for_crtc(struct drm_atomic_state *state, +static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc) { - s64 __user *fence_ptr; + s32 __user *fence_ptr; fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr; state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL; @@ -512,7 +512,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->prop_out_fence_ptr) { - s64 __user *fence_ptr = u64_to_user_ptr(val); + s32 __user *fence_ptr = u64_to_user_ptr(val); if (!fence_ptr) return 0; @@ -1915,7 +1915,7 @@ EXPORT_SYMBOL(drm_atomic_clean_old_fb); */ struct drm_out_fence_state { - s64 __user *out_fence_ptr; + s32 __user *out_fence_ptr; struct sync_file *sync_file; int fd; }; @@ -1952,7 +1952,7 @@ static int prepare_crtc_signaling(struct drm_device *dev, return 0; for_each_crtc_in_state(state, crtc, crtc_state, i) { - u64 __user *fence_ptr; + s32 __user *fence_ptr; fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc); diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index d6d241f63b9f..56814e8ae7ea 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -144,7 +144,7 @@ struct __drm_crtcs_state { struct drm_crtc *ptr; struct drm_crtc_state *state; struct drm_crtc_commit *commit; - s64 __user *out_fence_ptr; + s32 __user *out_fence_ptr; }; struct __drm_connnectors_state { diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index bf9991b20611..137432386310 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -488,7 +488,7 @@ struct drm_mode_config { /** * @prop_out_fence_ptr: Sync File fd pointer representing the * outgoing fences for a CRTC. Userspace should provide a pointer to a - * value of type s64, and then cast that pointer to u64. + * value of type s32, and then cast that pointer to u64. */ struct drm_property *prop_out_fence_ptr; /** -- cgit From 5d0e7705774dd412a465896d08d59a81a345c1e4 Mon Sep 17 00:00:00 2001 From: Tom Hromatka Date: Tue, 10 Jan 2017 10:57:56 -0700 Subject: sparc: Fixed typo in sstate.c. Replaced panicing with panicking Signed-off-by: Tom Hromatka Signed-off-by: David S. Miller --- arch/sparc/kernel/sstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c index c59af546f522..3caed4023589 100644 --- a/arch/sparc/kernel/sstate.c +++ b/arch/sparc/kernel/sstate.c @@ -43,8 +43,8 @@ static const char poweroff_msg[32] __attribute__((aligned(32))) = "Linux powering off"; static const char rebooting_msg[32] __attribute__((aligned(32))) = "Linux rebooting"; -static const char panicing_msg[32] __attribute__((aligned(32))) = - "Linux panicing"; +static const char panicking_msg[32] __attribute__((aligned(32))) = + "Linux panicking"; static int sstate_reboot_call(struct notifier_block *np, unsigned long type, void *_unused) { @@ -76,7 +76,7 @@ static struct notifier_block sstate_reboot_notifier = { static int sstate_panic_event(struct notifier_block *n, unsigned long event, void *ptr) { - do_set_sstate(HV_SOFT_STATE_TRANSITION, panicing_msg); + do_set_sstate(HV_SOFT_STATE_TRANSITION, panicking_msg); return NOTIFY_DONE; } -- cgit From 58c744da9dcc82a4b55a18e05149ae0e32624d11 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 17 Jan 2017 14:33:16 +0800 Subject: drm/i915/gvt: Enable the shadow batch buffer Shadow batch buffer is used to shadow the privileged batch buffer which is submitted by vGPU's workload. This patch is used to unmark this functionality. Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d26a092c70e8..e4563984cb1e 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -481,7 +481,6 @@ struct parser_exec_state { (s->vgpu->gvt->device_info.gmadr_bytes_in_cmd >> 2) static unsigned long bypass_scan_mask = 0; -static bool bypass_batch_buffer_scan = true; /* ring ALL, type = 0 */ static struct sub_op_bits sub_op_mi[] = { @@ -1525,9 +1524,6 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) { struct intel_gvt *gvt = s->vgpu->gvt; - if (bypass_batch_buffer_scan) - return 0; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ if (cmd_val(s, 0) & (1 << 8)) -- cgit From 62f0a11e2339e1ba154600d1f49ef5d5d84eaae4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jan 2017 19:58:16 +0000 Subject: drm/i915/gvt: Fix relocation of shadow bb set_gma_to_bb_cmd() is completely bogus - it is (incorrectly) applying the rules to read a GTT offset from a command as opposed to writing the GTT offset. And to cap it all set_gma_to_bb_cmd() is called within a list iterator of the most strange construction. Fixes: be1da7070aea ("drm/i915/gvt: vGPU command scanner") Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Zhi Wang Cc: Yulei Zhang Cc: # v4.10-rc1+ Tested-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 64 ++++++++++-------------------------- drivers/gpu/drm/i915/gvt/scheduler.h | 2 +- 2 files changed, 19 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f32bb6f6495c..fb852c51d00e 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -364,58 +364,30 @@ static void free_workload(struct intel_vgpu_workload *workload) #define get_desc_from_elsp_dwords(ed, i) \ ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) - -#define BATCH_BUFFER_ADDR_MASK ((1UL << 32) - (1U << 2)) -#define BATCH_BUFFER_ADDR_HIGH_MASK ((1UL << 16) - (1U)) -static int set_gma_to_bb_cmd(struct intel_shadow_bb_entry *entry_obj, - unsigned long add, int gmadr_bytes) -{ - if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) - return -1; - - *((u32 *)(entry_obj->bb_start_cmd_va + (1 << 2))) = add & - BATCH_BUFFER_ADDR_MASK; - if (gmadr_bytes == 8) { - *((u32 *)(entry_obj->bb_start_cmd_va + (2 << 2))) = - add & BATCH_BUFFER_ADDR_HIGH_MASK; - } - - return 0; -} - static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; + const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; + struct intel_shadow_bb_entry *entry_obj; /* pin the gem object to ggtt */ - if (!list_empty(&workload->shadow_bb)) { - struct intel_shadow_bb_entry *entry_obj = - list_first_entry(&workload->shadow_bb, - struct intel_shadow_bb_entry, - list); - struct intel_shadow_bb_entry *temp; + list_for_each_entry(entry_obj, &workload->shadow_bb, list) { + struct i915_vma *vma; - list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, - list) { - struct i915_vma *vma; - - vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, - 4, 0); - if (IS_ERR(vma)) { - gvt_err("Cannot pin\n"); - return; - } - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - /* update the relocate gma with shadow batch buffer*/ - set_gma_to_bb_cmd(entry_obj, - i915_ggtt_offset(vma), - gmadr_bytes); + vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); + if (IS_ERR(vma)) { + gvt_err("Cannot pin\n"); + return; } + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + /* update the relocate gma with shadow batch buffer*/ + entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); + if (gmadr_bytes == 8) + entry_obj->bb_start_cmd_va[2] = 0; } } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 3b30c28bff51..2833dfa8c9ae 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -113,7 +113,7 @@ struct intel_shadow_bb_entry { struct drm_i915_gem_object *obj; void *va; unsigned long len; - void *bb_start_cmd_va; + u32 *bb_start_cmd_va; }; #define workload_q_head(vgpu, ring_id) \ -- cgit From 7622b25543665567d8830a63210385b7d705924b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 10 Oct 2016 09:44:06 -0700 Subject: drm/vc4: Fix memory leak of the CRTC state. The underscores variant frees the pointers inside, while the no-underscores variant calls underscores and then frees the struct. Signed-off-by: Eric Anholt Fixes: d8dbf44f13b9 ("drm/vc4: Make the CRTCs cooperate on allocating display lists.") Cc: stable@vger.kernel.org --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index a0fd3e66bc4b..7aadce1f7e7a 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -839,7 +839,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, } - __drm_atomic_helper_crtc_destroy_state(state); + drm_atomic_helper_crtc_destroy_state(crtc, state); } static const struct drm_crtc_funcs vc4_crtc_funcs = { -- cgit From 21ccc32496b2f63228f5232b3ac0e426e8fb3c31 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Jan 2017 10:49:00 +0300 Subject: drm/vc4: fix a bounds check We accidentally return success even if vc4_full_res_bounds_check() fails. Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Engestrom Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_render_cl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 08886a309757..5cdd003605f5 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -461,7 +461,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, } ret = vc4_full_res_bounds_check(exec, *obj, surf); - if (!ret) + if (ret) return ret; return 0; -- cgit From 0f2ff82e11c86c05d051cae32b58226392d33bbf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jan 2017 21:42:53 +1100 Subject: drm/vc4: Fix an integer overflow in temporary allocation layout. We copy the unvalidated ioctl arguments from the user into kernel temporary memory to run the validation from, to avoid a race where the user updates the unvalidate contents in between validating them and copying them into the validated BO. However, in setting up the layout of the kernel side, we failed to check one of the additions (the roundup() for shader_rec_offset) against integer overflow, allowing a nearly MAX_UINT value of bin_cl_size to cause us to under-allocate the temporary space that we then copy_from_user into. Reported-by: Murray McAllister Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") --- drivers/gpu/drm/vc4/vc4_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index db920771bfb5..c5fe3554858e 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -594,7 +594,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) args->shader_rec_count); struct vc4_bo *bo; - if (uniforms_offset < shader_rec_offset || + if (shader_rec_offset < args->bin_cl_size || + uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || -- cgit From 6b8ac63847bc2f958dd93c09edc941a0118992d9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jan 2017 21:58:06 +1100 Subject: drm/vc4: Return -EINVAL on the overflow checks failing. By failing to set the errno, we'd continue on to trying to set up the RCL, and then oops on trying to dereference the tile_bo that binning validation should have set up. Reported-by: Ingo Molnar Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") --- drivers/gpu/drm/vc4/vc4_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index c5fe3554858e..ab3016982466 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -601,6 +601,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) sizeof(struct vc4_shader_state)) || temp_size < exec_size) { DRM_ERROR("overflow in exec arguments\n"); + ret = -EINVAL; goto fail; } -- cgit From 7195439d1d71bc4a6c33cfb57bc669a7cd041041 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 13 Jan 2017 12:23:35 +0100 Subject: Revert "bcma: init serial console directly from ChipCommon code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4c81acab3816 ("bcma: init serial console directly from ChipCommon code") as it broke IRQ assignment. Getting IRQ with bcma_core_irq helper on SoC requires MIPS core to be set. It happens *after* ChipCommon initialization so we can't do this so early. This fixes a user reported regression. It wasn't critical as serial was still somehow working but lack of IRQs was making in unreliable. Fixes: 4c81acab3816 ("bcma: init serial console directly from ChipCommon code") Reported-by: Felix Fietkau Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 3 +++ drivers/bcma/driver_chipcommon.c | 11 +++-------- drivers/bcma/driver_mips.c | 3 +++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index f642c4264c27..168fa175d65a 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -45,6 +45,9 @@ int bcma_sprom_get(struct bcma_bus *bus); void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc); +#endif /* CONFIG_BCMA_DRIVER_MIPS */ /* driver_chipcommon_b.c */ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index b4f6520e74f0..62f5bfa5065d 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -15,8 +15,6 @@ #include #include -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc); - static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, u32 mask, u32 value) { @@ -186,9 +184,6 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc) if (cc->capabilities & BCMA_CC_CAP_PMU) bcma_pmu_early_init(cc); - if (IS_BUILTIN(CONFIG_BCM47XX) && bus->hosttype == BCMA_HOSTTYPE_SOC) - bcma_chipco_serial_init(cc); - if (bus->hosttype == BCMA_HOSTTYPE_SOC) bcma_core_chipcommon_flash_detect(cc); @@ -378,9 +373,9 @@ u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value) return res; } -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc) { -#if IS_BUILTIN(CONFIG_BCM47XX) unsigned int irq; u32 baud_base; u32 i; @@ -422,5 +417,5 @@ static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) ports[i].baud_base = baud_base; ports[i].reg_shift = 0; } -#endif /* CONFIG_BCM47XX */ } +#endif /* CONFIG_BCMA_DRIVER_MIPS */ diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 96f171328200..89af807cf29c 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -278,9 +278,12 @@ static void bcma_core_mips_nvram_init(struct bcma_drv_mips *mcore) void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { + struct bcma_bus *bus = mcore->core->bus; + if (mcore->early_setup_done) return; + bcma_chipco_serial_init(&bus->drv_cc); bcma_core_mips_nvram_init(mcore); mcore->early_setup_done = true; -- cgit From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Tue, 17 Jan 2017 18:13:51 +0000 Subject: virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a subtle change in how the virtio_net flags are derived from the SKBs ip_summed field. With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to ip_summed == CHECKSUM_NONE, which should be the same. Further, the virtio spec 1.0 / CS04 explicitly says that VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver. Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb") Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion") Signed-off-by: Rolf Neugebauer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 66204007d7ac..56436472ccc7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; -- cgit From a249708bc2aa1fe3ddf15dfac22bee519d15996b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 17 Jan 2017 12:23:21 +0100 Subject: stmmac: add missing of_node_put The function stmmac_dt_phy provides several possibilities for initializing plat->mdio_node, all of which have the effect of increasing the reference count of the assigned value. This field is not updated elsewhere, so the value is live until the end of the lifetime of plat (devm_allocated), just after the end of stmmac_remove_config_dt. Thus, add an of_node_put on plat->mdio_node in stmmac_remove_config_dt. It is possible that the field mdio_node is never initialized, but of_node_put is NULL-safe, so it is also safe to call of_node_put in that case. Signed-off-by: Julia Lawall Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 082cd48db6a7..36942f5a6a53 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -351,6 +351,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev, if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(plat->phy_node); + of_node_put(plat->mdio_node); } #else struct plat_stmmacenet_data * -- cgit From d5ff72d9af73bc3cbaa3edb541333a851f8c7295 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Mon, 16 Jan 2017 18:37:58 -0500 Subject: vxlan: fix byte order of vxlan-gpe port number vxlan->cfg.dst_port is in network byte order, so an htons() is needed here. Also reduced comment length to stay closer to 80 column width (still slightly over, however). Fixes: e1e5314de08b ("vxlan: implement GPE") Signed-off-by: Lance Richardson Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ca7196c40060..8a79cfcac889 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2890,7 +2890,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) { if (conf->flags & VXLAN_F_GPE) - vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ + vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ else vxlan->cfg.dst_port = default_port; } -- cgit From e05208ded1905e500cd5b369d624b071951c68b9 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Jan 2017 20:00:40 +0800 Subject: drm/amdgpu: fix bug set incorrect value to vce register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the proper bits for clockgating setup. Signed-off-by: Rex Zhu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 5fb0b7f5c065..b621bde8c240 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -175,7 +175,7 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, WREG32(mmVCE_UENC_CLOCK_GATING_2, data); data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); - data &= ~0xffc00000; + data &= ~0x3ff; WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); -- cgit From 50a1ebc70a2803deb7811fc73fb55d70e353bc34 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 10 Jan 2017 20:03:59 +0800 Subject: drm/amdgpu: fix program vce instance logic error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit need to clear bit31-29 in GRBM_GFX_INDEX, then the program can be valid. Signed-off-by: Rex Zhu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index b621bde8c240..37ca685e5a9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -43,9 +43,13 @@ #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 +#define GRBM_GFX_INDEX__VCE_ALL_PIPE 0x07 + #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618 +#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000 + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 #define VCE_V3_0_FW_SIZE (384 * 1024) @@ -54,6 +58,9 @@ #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) +#define GET_VCE_INSTANCE(i) ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \ + | GRBM_GFX_INDEX__VCE_ALL_PIPE) + static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); @@ -249,7 +256,7 @@ static int vce_v3_0_start(struct amdgpu_device *adev) if (adev->vce.harvest_config & (1 << idx)) continue; - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); vce_v3_0_mc_resume(adev, idx); WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); @@ -273,7 +280,7 @@ static int vce_v3_0_start(struct amdgpu_device *adev) } } - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); mutex_unlock(&adev->grbm_idx_mutex); return 0; @@ -288,7 +295,7 @@ static int vce_v3_0_stop(struct amdgpu_device *adev) if (adev->vce.harvest_config & (1 << idx)) continue; - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); if (adev->asic_type >= CHIP_STONEY) WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001); @@ -306,7 +313,7 @@ static int vce_v3_0_stop(struct amdgpu_device *adev) vce_v3_0_set_vce_sw_clock_gating(adev, false); } - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); mutex_unlock(&adev->grbm_idx_mutex); return 0; @@ -586,17 +593,17 @@ static bool vce_v3_0_check_soft_reset(void *handle) * VCE team suggest use bit 3--bit 6 for busy status check */ mutex_lock(&adev->grbm_idx_mutex); - WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); } - WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); } - WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); mutex_unlock(&adev->grbm_idx_mutex); if (srbm_soft_reset) { @@ -734,7 +741,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, if (adev->vce.harvest_config & (1 << i)) continue; - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i)); if (enable) { /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ @@ -753,7 +760,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, vce_v3_0_set_vce_sw_clock_gating(adev, enable); } - WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); mutex_unlock(&adev->grbm_idx_mutex); return 0; -- cgit From ca581e45335c6aa45e5b27999bc13bdefb7e84d9 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 12 Jan 2017 21:48:26 +0800 Subject: drm/amdgpu: change clock gating mode for uvd_v4. use sw cg when decode. and hw cg when idle. fixes: https://bugs.freedesktop.org/show_bug.cgi?id=99313 https://bugzilla.kernel.org/show_bug.cgi?id=192161 Signed-off-by: Rex Zhu Ack-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 42 +++++++++-------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 96444e4d862a..7fb9137dd89b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -40,13 +40,14 @@ #include "smu/smu_7_0_1_sh_mask.h" static void uvd_v4_2_mc_resume(struct amdgpu_device *adev); -static void uvd_v4_2_init_cg(struct amdgpu_device *adev); static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v4_2_start(struct amdgpu_device *adev); static void uvd_v4_2_stop(struct amdgpu_device *adev); static int uvd_v4_2_set_clockgating_state(void *handle, enum amd_clockgating_state state); +static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, + bool sw_mode); /** * uvd_v4_2_ring_get_rptr - get read pointer * @@ -140,7 +141,8 @@ static int uvd_v4_2_sw_fini(void *handle) return r; } - +static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, + bool enable); /** * uvd_v4_2_hw_init - start and test UVD block * @@ -155,8 +157,7 @@ static int uvd_v4_2_hw_init(void *handle) uint32_t tmp; int r; - uvd_v4_2_init_cg(adev); - uvd_v4_2_set_clockgating_state(adev, AMD_CG_STATE_GATE); + uvd_v4_2_enable_mgcg(adev, true); amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); r = uvd_v4_2_start(adev); if (r) @@ -266,11 +267,13 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->uvd.ring; uint32_t rb_bufsz; int i, j, r; - /* disable byte swapping */ u32 lmi_swap_cntl = 0; u32 mp_swap_cntl = 0; + WREG32(mmUVD_CGC_GATE, 0); + uvd_v4_2_set_dcm(adev, true); + uvd_v4_2_mc_resume(adev); /* disable interupt */ @@ -406,6 +409,8 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev) /* Unstall UMC and register bus */ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8)); + + uvd_v4_2_set_dcm(adev, false); } /** @@ -619,19 +624,6 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2); } -static void uvd_v4_2_init_cg(struct amdgpu_device *adev) -{ - bool hw_mode = true; - - if (hw_mode) { - uvd_v4_2_set_dcm(adev, false); - } else { - u32 tmp = RREG32(mmUVD_CGC_CTRL); - tmp &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; - WREG32(mmUVD_CGC_CTRL, tmp); - } -} - static bool uvd_v4_2_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -685,17 +677,6 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, static int uvd_v4_2_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) - return 0; - - if (state == AMD_CG_STATE_GATE) - gate = true; - - uvd_v4_2_enable_mgcg(adev, gate); - return 0; } @@ -711,9 +692,6 @@ static int uvd_v4_2_set_powergating_state(void *handle, */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) - return 0; - if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); return 0; -- cgit From 4e6e98b1e48c9474aed7ce03025ec319b941e26e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Jan 2017 15:02:13 -0500 Subject: drm/radeon: add support for new hainan variants New hainan parts require updated smc firmware. Cc: Sonny Jiang Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 32d55220a2d3..414776811e71 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -114,6 +114,7 @@ MODULE_FIRMWARE("radeon/hainan_mc.bin"); MODULE_FIRMWARE("radeon/hainan_rlc.bin"); MODULE_FIRMWARE("radeon/hainan_smc.bin"); MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); +MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); MODULE_FIRMWARE("radeon/si58_mc.bin"); @@ -1653,6 +1654,7 @@ static int si_init_microcode(struct radeon_device *rdev) int new_fw = 0; bool new_smc = false; bool si58_fw = false; + bool banks2_fw = false; DRM_DEBUG("\n"); @@ -1730,10 +1732,11 @@ static int si_init_microcode(struct radeon_device *rdev) ((rdev->pdev->device == 0x6660) || (rdev->pdev->device == 0x6663) || (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667))) || - ((rdev->pdev->revision == 0xc3) && - (rdev->pdev->device == 0x6665))) + (rdev->pdev->device == 0x6667)))) new_smc = true; + else if ((rdev->pdev->revision == 0xc3) && + (rdev->pdev->device == 0x6665)) + banks2_fw = true; new_chip_name = "hainan"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; me_req_size = SI_PM4_UCODE_SIZE * 4; @@ -1886,7 +1889,9 @@ static int si_init_microcode(struct radeon_device *rdev) } } - if (new_smc) + if (banks2_fw) + snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin"); + else if (new_smc) snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name); else snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name); -- cgit From 17324b6add82d6c0bf119f1d1944baef392a4e39 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Jan 2017 15:06:58 -0500 Subject: drm/amdgpu: add support for new hainan variants New hainan parts require updated smc firmware. Cc: Sonny Jiang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index e9242d6fbf84..6e150db8f380 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -64,6 +64,7 @@ MODULE_FIRMWARE("radeon/oland_smc.bin"); MODULE_FIRMWARE("radeon/oland_k_smc.bin"); MODULE_FIRMWARE("radeon/hainan_smc.bin"); MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); +MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); union power_info { struct _ATOM_POWERPLAY_INFO info; @@ -7701,10 +7702,11 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) ((adev->pdev->device == 0x6660) || (adev->pdev->device == 0x6663) || (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667))) || - ((adev->pdev->revision == 0xc3) && - (adev->pdev->device == 0x6665))) + (adev->pdev->device == 0x6667)))) chip_name = "hainan_k"; + else if ((adev->pdev->revision == 0xc3) && + (adev->pdev->device == 0x6665)) + chip_name = "banks_k_2"; else chip_name = "hainan"; break; -- cgit From 3fbfadce6012e7bb384b2e9ad47869d5177f7209 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 16 Jan 2017 22:17:29 -0800 Subject: bpf: Fix test_lru_sanity5() in test_lru_map.c test_lru_sanity5() fails when the number of online cpus is fewer than the number of possible cpus. It can be reproduced with qemu by using cmd args "--smp cpus=2,maxcpus=8". The problem is the loop in test_lru_sanity5() is testing 'i' which is incorrect. This patch: 1. Make sched_next_online() always return -1 if it cannot find a next cpu to schedule the process. 2. In test_lru_sanity5(), the parent process does sched_setaffinity() first (through sched_next_online()) and the forked process will inherit it according to the 'man sched_setaffinity'. Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab") Reported-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lru_map.c | 53 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index b13fed534d76..9f7bd1915c21 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -67,21 +67,23 @@ static int map_equal(int lru_map, int expected) return map_subset(lru_map, expected) && map_subset(expected, lru_map); } -static int sched_next_online(int pid, int next_to_try) +static int sched_next_online(int pid, int *next_to_try) { cpu_set_t cpuset; + int next = *next_to_try; + int ret = -1; - if (next_to_try == nr_cpus) - return -1; - - while (next_to_try < nr_cpus) { + while (next < nr_cpus) { CPU_ZERO(&cpuset); - CPU_SET(next_to_try++, &cpuset); - if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) + CPU_SET(next++, &cpuset); + if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) { + ret = 0; break; + } } - return next_to_try; + *next_to_try = next; + return ret; } /* Size of the LRU amp is 2 @@ -96,11 +98,12 @@ static void test_lru_sanity0(int map_type, int map_flags) { unsigned long long key, value[nr_cpus]; int lru_map_fd, expected_map_fd; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); if (map_flags & BPF_F_NO_COMMON_LRU) lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); @@ -183,6 +186,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) /* Ther percpu lru list (i.e each cpu has its own LRU @@ -196,7 +200,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -262,6 +266,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) /* Ther percpu lru list (i.e each cpu has its own LRU @@ -275,7 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -370,11 +375,12 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned int batch_size; unsigned int map_size; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); @@ -430,11 +436,12 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) int lru_map_fd, expected_map_fd; unsigned long long key, value[nr_cpus]; unsigned long long end_key; + int next_cpu = 0; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); - assert(sched_next_online(0, 0) != -1); + assert(sched_next_online(0, &next_cpu) != -1); if (map_flags & BPF_F_NO_COMMON_LRU) lru_map_fd = create_map(map_type, map_flags, @@ -502,9 +509,8 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) static void test_lru_sanity5(int map_type, int map_flags) { unsigned long long key, value[nr_cpus]; - int next_sched_cpu = 0; + int next_cpu = 0; int map_fd; - int i; if (map_flags & BPF_F_NO_COMMON_LRU) return; @@ -519,27 +525,20 @@ static void test_lru_sanity5(int map_type, int map_flags) key = 0; assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST)); - for (i = 0; i < nr_cpus; i++) { + while (sched_next_online(0, &next_cpu) != -1) { pid_t pid; pid = fork(); if (pid == 0) { - next_sched_cpu = sched_next_online(0, next_sched_cpu); - if (next_sched_cpu != -1) - do_test_lru_sanity5(key, map_fd); + do_test_lru_sanity5(key, map_fd); exit(0); } else if (pid == -1) { - printf("couldn't spawn #%d process\n", i); + printf("couldn't spawn process to test key:%llu\n", + key); exit(1); } else { int status; - /* It is mostly redundant and just allow the parent - * process to update next_shced_cpu for the next child - * process - */ - next_sched_cpu = sched_next_online(pid, next_sched_cpu); - assert(waitpid(pid, &status, 0) == pid); assert(status == 0); key++; @@ -547,6 +546,8 @@ static void test_lru_sanity5(int map_type, int map_flags) } close(map_fd); + /* At least one key should be tested */ + assert(key > 0); printf("Pass\n"); } -- cgit From 93b43fd137cd8865adf9978ab9870a344365d3af Mon Sep 17 00:00:00 2001 From: "Alvaro G. M" Date: Tue, 17 Jan 2017 09:08:16 +0100 Subject: net: phy: dp83848: add DP83620 PHY support This PHY with fiber support is register compatible with DP83848, so add support for it. Signed-off-by: Alvaro Gamez Machado Signed-off-by: David S. Miller --- drivers/net/phy/dp83848.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 800b39f06279..a10d0e7fc5f7 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -17,6 +17,7 @@ #include #define TI_DP83848C_PHY_ID 0x20005ca0 +#define TI_DP83620_PHY_ID 0x20005ce0 #define NS_DP83848C_PHY_ID 0x20005c90 #define TLK10X_PHY_ID 0x2000a210 #define TI_DP83822_PHY_ID 0x2000a240 @@ -77,6 +78,7 @@ static int dp83848_config_intr(struct phy_device *phydev) static struct mdio_device_id __maybe_unused dp83848_tbl[] = { { TI_DP83848C_PHY_ID, 0xfffffff0 }, { NS_DP83848C_PHY_ID, 0xfffffff0 }, + { TI_DP83620_PHY_ID, 0xfffffff0 }, { TLK10X_PHY_ID, 0xfffffff0 }, { TI_DP83822_PHY_ID, 0xfffffff0 }, { } @@ -106,6 +108,7 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); static struct phy_driver dp83848_driver[] = { DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(TI_DP83620_PHY_ID, "TI DP83620 10/100 Mbps PHY"), DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"), DP83848_PHY_DRIVER(TI_DP83822_PHY_ID, "TI DP83822 10/100 Mbps PHY"), }; -- cgit From c97c52be78b8463ac5407f1cf1f22f8f6cf93a37 Mon Sep 17 00:00:00 2001 From: Einar Jón Date: Fri, 12 Aug 2016 13:50:41 +0200 Subject: can: c_can_pci: fix null-pointer-deref in c_can_start() - set device pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The priv->device pointer for c_can_pci is never set, but it is used without a NULL check in c_can_start(). Setting it in c_can_pci_probe() like c_can_plat_probe() prevents c_can_pci.ko from crashing, with and without CONFIG_PM. This might also cause the pm_runtime_*() functions in c_can.c to actually be executed for c_can_pci devices - they are the only other place where priv->device is used, but they all contain a null check. Signed-off-by: Einar Jón Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index 7be393c96b1a..cf7c18947189 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -161,6 +161,7 @@ static int c_can_pci_probe(struct pci_dev *pdev, dev->irq = pdev->irq; priv->base = addr; + priv->device = &pdev->dev; if (!c_can_pci_data->freq) { dev_err(&pdev->dev, "no clock frequency defined\n"); -- cgit From befa60113ce7ea270cb51eada28443ca2756f480 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Wed, 18 Jan 2017 11:35:57 +0100 Subject: can: ti_hecc: add missing prepare and unprepare of the clock In order to make the driver work with the common clock framework, this patch converts the clk_enable()/clk_disable() to clk_prepare_enable()/clk_disable_unprepare(). Also add error checking for clk_prepare_enable(). Signed-off-by: Yegor Yefremov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 680d1ff07a55..6749b1829469 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -948,7 +948,12 @@ static int ti_hecc_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll, HECC_DEF_NAPI_WEIGHT); - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) { + dev_err(&pdev->dev, "clk_prepare_enable() failed\n"); + goto probe_exit_clk; + } + err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed\n"); @@ -981,7 +986,7 @@ static int ti_hecc_remove(struct platform_device *pdev) struct ti_hecc_priv *priv = netdev_priv(ndev); unregister_candev(ndev); - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); clk_put(priv->clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); iounmap(priv->base); @@ -1006,7 +1011,7 @@ static int ti_hecc_suspend(struct platform_device *pdev, pm_message_t state) hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_SLEEPING; - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); return 0; } @@ -1015,8 +1020,11 @@ static int ti_hecc_resume(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct ti_hecc_priv *priv = netdev_priv(dev); + int err; - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) + return err; hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_ERROR_ACTIVE; -- cgit From 4fc020d864647ea3ae8cb8f17d63e48e87ebd0bf Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 12 Jan 2017 12:44:54 +0200 Subject: drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround. The WaDisableLSQCROPERFforOCL workaround has the side effect of disabling an L3SQ optimization that has huge performance implications and is unlikely to be necessary for the correct functioning of usual graphic workloads. Userspace is free to re-enable the workaround on demand, and is generally in a better position to determine whether the workaround is necessary than the DRM is (e.g. only during the execution of compute kernels that rely on both L3 fences and HDC R/W requests). The same workaround seems to apply to BDW (at least to production stepping G1) and SKL as well (the internal workaround database claims that it does for all steppings, while the BSpec workaround table only mentions pre-production steppings), but the DRM doesn't do anything beyond whitelisting the L3SQCREG4 register so userspace can enable it when it sees fit. Do the same on KBL platforms. Improves performance of the GFXBench4 gl_manhattan31 benchmark by 60%, and gl_4 (AKA car chase) by 14% on a KBL GT2 running Mesa master -- This is followed by a regression of 35% and 10% respectively for the same benchmarks and platform caused by my recent patch series switching userspace to use the dataport constant cache instead of the sampler to implement uniform pull constant loads, which caused us to hit more heavily the L3 cache (and on platforms other than KBL had the opposite effect of improving performance of the same two benchmarks). The overall effect on KBL of this change combined with the recent userspace change is respectively 4.6% and 2.6%. SynMark2 OglShMapPcf was affected by the constant cache changes (though it improved as it did on other platforms rather than regressing), but is not significantly affected by this patch (with statistical significance of 5% and sample size 20). v2: Drop some more code to avoid unused variable warning. Fixes: 738fa1b3123f ("drm/i915/kbl: Add WaDisableLSQCROPERFforOCL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99256 Signed-off-by: Francisco Jerez Cc: Matthew Auld Cc: Eero Tamminen Cc: Jani Nikula Cc: Mika Kuoppala Cc: beignet@lists.freedesktop.org Cc: # v4.7+ Reviewed-by: Mika Kuoppala [Removed double Fixes tag] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484217894-20505-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 8726f2faa371514fba2f594d799db95203dfeee0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 10 ---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 2 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d4961fa20c73..beabc17e7c8a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -979,18 +979,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, uint32_t *batch, uint32_t index) { - struct drm_i915_private *dev_priv = engine->i915; uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - /* - * WaDisableLSQCROPERFforOCL:kbl - * This WA is implemented in skl_init_clock_gating() but since - * this batch updates GEN8_L3SQCREG4 with default value we need to - * set this bit here to retain the WA during flush. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aeb637dc1fdf..91cb4c422ad5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1095,14 +1095,6 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE); - /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes - * involving this register should also be added to WA batch as required. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - /* WaDisableLSQCROPERFforOCL:kbl */ - I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_RO_PERF_DIS); - /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, -- cgit From 3bfdfdcbce2796ce75bf2d85fd8471858d702e5d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 7 Nov 2016 22:20:54 +0200 Subject: drm/i915: Ignore bogus plane coordinates on SKL when the plane is not visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the plane is invisible we may have all sorts of bogus stuff in the coordinates, which we must ignore or else we might fail the plane update. This started to happen on SKL when I moved the plane offset computation to happen in the check phase. Previously we happily ignored it all since we never called the update_plane hook with an invisible plane. Cc: Sivakumar Thulasimani Cc: drm-intel-fixes@lists.freedesktop.org Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98258 Testcase: igt/pm_rpm/legacy-planes Testcase: igt/pm_rpm/universal-planes Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-3-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit a5e4c7d0aa6784d8abe95c3ceef0da9656d17468) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3dc8724df400..8d702cf1a616 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2967,6 +2967,9 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (!plane_state->base.visible) + return 0; + /* Rotate src coordinates to match rotated GTT view */ if (drm_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, -- cgit From b98acbff9a05b371c5f0ca6e44a3af8ce9274379 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Jan 2017 15:36:20 +0000 Subject: regulator: twl6030: fix range comparison, allowing vsel = 59 The range min_uV > 1350000 && min_uV <= 150000 is never reachable because of a typo in the previous range check and hence vsel = 59 is never reached. Fix the previous range check to enable the vsel = 59 setting. Fixes CoverityScan CID#728454 ("Logially dead code") Signed-off-by: Colin Ian King Signed-off-by: Mark Brown --- drivers/regulator/twl6030-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 4864b9d742c0..716191046a70 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -452,7 +452,7 @@ static int twl6030smps_map_voltage(struct regulator_dev *rdev, int min_uV, vsel = 62; else if ((min_uV > 1800000) && (min_uV <= 1900000)) vsel = 61; - else if ((min_uV > 1350000) && (min_uV <= 1800000)) + else if ((min_uV > 1500000) && (min_uV <= 1800000)) vsel = 60; else if ((min_uV > 1350000) && (min_uV <= 1500000)) vsel = 59; -- cgit From 31a86d137219373c3222ca5f4f912e9a4d8065bb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 17 Jan 2017 19:19:17 +0200 Subject: net: ethtool: Initialize buffer when querying device channel settings Ethtool channels respond struct was uninitialized when querying device channel boundaries settings. As a result, unreported fields by the driver hold garbage. This may cause sending unsupported params to driver. Fixes: 8bf368620486 ('ethtool: ensure channel counts are within bounds ...') Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan CC: John W. Linville Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e23766c7e3ba..236a21e3c878 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1712,7 +1712,7 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev, static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { - struct ethtool_channels channels, max; + struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS }; u32 max_rx_in_use = 0; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) -- cgit From e91ef71dfe834e11b57411f1715cd2e2bb4401f1 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 17 Jan 2017 19:19:18 +0200 Subject: net/mlx4_en: Remove unnecessary checks when setting num channels Boundaries checks for the number of RX, TX, other and combined channels should be checked by the caller and not in the driver. In addition, remove wrong memset on get channels as it overrides the cmd field in the requester struct. Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d9c9f86a30df..d5a9372ed84d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1732,8 +1732,6 @@ static void mlx4_en_get_channels(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); - memset(channel, 0, sizeof(*channel)); - channel->max_rx = MAX_RX_RINGS; channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; @@ -1752,10 +1750,7 @@ static int mlx4_en_set_channels(struct net_device *dev, int xdp_count; int err = 0; - if (channel->other_count || channel->combined_count || - channel->tx_count > MLX4_EN_MAX_TX_RING_P_UP || - channel->rx_count > MAX_RX_RINGS || - !channel->tx_count || !channel->rx_count) + if (!channel->tx_count || !channel->rx_count) return -EINVAL; tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); -- cgit From 639e9e94160e59469305fc2c5e6f9c2733744958 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 17 Jan 2017 19:19:19 +0200 Subject: net/mlx5e: Remove unnecessary checks when setting num channels Boundaries checks for the number of RX and TX should be checked by the caller and not in the driver. Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 33a399a8b5d5..b1b9eb6ee135 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -554,16 +554,6 @@ static int mlx5e_set_channels(struct net_device *dev, __func__); return -EINVAL; } - if (ch->rx_count || ch->tx_count) { - netdev_info(dev, "%s: separate rx/tx count not supported\n", - __func__); - return -EINVAL; - } - if (count > ncv) { - netdev_info(dev, "%s: count (%d) > max (%d)\n", - __func__, count, ncv); - return -EINVAL; - } if (priv->params.num_channels == count) return 0; -- cgit From 3fd0b634de7d6b9a85f34a4cf9d8afc1df465cc9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jan 2017 16:41:03 +0100 Subject: netfilter: ipt_CLUSTERIP: fix build error without procfs We can't access c->pde if CONFIG_PROC_FS is disabled: net/ipv4/netfilter/ipt_CLUSTERIP.c: In function 'clusterip_config_find_get': net/ipv4/netfilter/ipt_CLUSTERIP.c:147:9: error: 'struct clusterip_config' has no member named 'pde' This moves the check inside of another #ifdef. Fixes: 6c5d5cfbe3c5 ("netfilter: ipt_CLUSTERIP: check duplicate config when initializing") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a6b8c1a4102b..0a783cd73faf 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -144,7 +144,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) rcu_read_lock_bh(); c = __clusterip_config_find(net, clusterip); if (c) { - if (!c->pde || unlikely(!atomic_inc_not_zero(&c->refcount))) +#ifdef CONFIG_PROC_FS + if (!c->pde) + c = NULL; + else +#endif + if (unlikely(!atomic_inc_not_zero(&c->refcount))) c = NULL; else if (entry) atomic_inc(&c->entries); -- cgit From 9a6cdf52b85ea5fb21d2bb31e4a7bc61b79923a7 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 17 Jan 2017 20:49:37 +0000 Subject: xen-netback: fix memory leaks on XenBus disconnect Eliminate memory leaks introduced several years ago by cleaning the queue resources which are allocated on XenBus connection event. Namely, queue structure array and pages used for IO rings. Signed-off-by: Igor Druzhinin Reviewed-by: Paul Durrant Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 3124eaec9427..34e4af9272ea 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -493,11 +493,20 @@ static int backend_create_xenvif(struct backend_info *be) static void backend_disconnect(struct backend_info *be) { if (be->vif) { + unsigned int queue_index; + xen_unregister_watchers(be->vif); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(be->vif); #endif /* CONFIG_DEBUG_FS */ xenvif_disconnect_data(be->vif); + for (queue_index = 0; queue_index < be->vif->num_queues; ++queue_index) + xenvif_deinit_queue(&be->vif->queues[queue_index]); + + vfree(be->vif->queues); + be->vif->num_queues = 0; + be->vif->queues = NULL; + xenvif_disconnect_ctrl(be->vif); } } @@ -1034,6 +1043,8 @@ static void connect(struct backend_info *be) err: if (be->vif->num_queues > 0) xenvif_disconnect_data(be->vif); /* Clean up existing queues */ + for (queue_index = 0; queue_index < be->vif->num_queues; ++queue_index) + xenvif_deinit_queue(&be->vif->queues[queue_index]); vfree(be->vif->queues); be->vif->queues = NULL; be->vif->num_queues = 0; -- cgit From f16f1df65f1cf139ff9e9f84661e6573d6bb27fc Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 17 Jan 2017 20:49:38 +0000 Subject: xen-netback: protect resource cleaning on XenBus disconnect vif->lock is used to protect statistics gathering agents from using the queue structure during cleaning. Signed-off-by: Igor Druzhinin Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netback/xenbus.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index e30ffd29b7e9..579521327b03 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -221,18 +221,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned int num_queues = vif->num_queues; unsigned long rx_bytes = 0; unsigned long rx_packets = 0; unsigned long tx_bytes = 0; unsigned long tx_packets = 0; unsigned int index; + spin_lock(&vif->lock); if (vif->queues == NULL) goto out; /* Aggregate tx and rx stats from each queue */ - for (index = 0; index < num_queues; ++index) { + for (index = 0; index < vif->num_queues; ++index) { queue = &vif->queues[index]; rx_bytes += queue->stats.rx_bytes; rx_packets += queue->stats.rx_packets; @@ -241,6 +241,8 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) } out: + spin_unlock(&vif->lock); + vif->dev->stats.rx_bytes = rx_bytes; vif->dev->stats.rx_packets = rx_packets; vif->dev->stats.tx_bytes = tx_bytes; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 34e4af9272ea..85b742e1c42f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -503,9 +503,11 @@ static void backend_disconnect(struct backend_info *be) for (queue_index = 0; queue_index < be->vif->num_queues; ++queue_index) xenvif_deinit_queue(&be->vif->queues[queue_index]); + spin_lock(&be->vif->lock); vfree(be->vif->queues); be->vif->num_queues = 0; be->vif->queues = NULL; + spin_unlock(&be->vif->lock); xenvif_disconnect_ctrl(be->vif); } -- cgit From 7be2c82cfd5d28d7adb66821a992604eb6dd112e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 12:12:17 -0800 Subject: net: fix harmonize_features() vs NETIF_F_HIGHDMA Ashizuka reported a highmem oddity and sent a patch for freescale fec driver. But the problem root cause is that core networking stack must ensure no skb with highmem fragment is ever sent through a device that does not assert NETIF_F_HIGHDMA in its features. We need to call illegal_highdma() from harmonize_features() regardless of CSUM checks. Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Eric Dumazet Cc: Pravin Shelar Reported-by: "Ashizuka, Yuusuke" Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 07b307b0b414..7f218e095361 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,9 +2795,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - } else if (illegal_highdma(skb->dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(skb->dev, skb)) + features &= ~NETIF_F_SG; return features; } -- cgit From cd33b3e0da43522ff8e8f2b2b71d3d08298512b0 Mon Sep 17 00:00:00 2001 From: Daniel Gonzalez Cabanelas Date: Tue, 17 Jan 2017 16:26:55 -0800 Subject: net: phy: bcm63xx: Utilize correct config_intr function Commit a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") make the BCM63xx PHY driver utilize bcm_phy_config_intr() which would appear to do the right thing, except that it does not write to the MII_BCM63XX_IR register but to MII_BCM54XX_ECR which is different. This would be causing invalid link parameters and events from being generated by the PHY interrupt. Fixes: a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") Signed-off-by: Daniel Gonzalez Cabanelas Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm63xx.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index e741bf614c4e..b0492ef2cdaa 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -21,6 +21,23 @@ MODULE_DESCRIPTION("Broadcom 63xx internal PHY driver"); MODULE_AUTHOR("Maxime Bizon "); MODULE_LICENSE("GPL"); +static int bcm63xx_config_intr(struct phy_device *phydev) +{ + int reg, err; + + reg = phy_read(phydev, MII_BCM63XX_IR); + if (reg < 0) + return reg; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + reg &= ~MII_BCM63XX_IR_GMASK; + else + reg |= MII_BCM63XX_IR_GMASK; + + err = phy_write(phydev, MII_BCM63XX_IR, reg); + return err; +} + static int bcm63xx_config_init(struct phy_device *phydev) { int reg, err; @@ -55,7 +72,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, }, { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, @@ -67,7 +84,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, } }; module_phy_driver(bcm63xx_driver); -- cgit From 719ca8111402aa6157bd83a3c966d184db0d8956 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Jan 2017 22:07:19 -0500 Subject: bnxt_en: Fix "uninitialized variable" bug in TPA code path. In the TPA GRO code path, initialize the tcp_opt_len variable to 0 so that it will be correct for packets without TCP timestamps. The bug caused the SKB fields to be incorrectly set up for packets without TCP timestamps, leading to these packets being rejected by the stack. Reported-by: Andy Gospodarek Acked-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9608cb49a11c..53e686fdf2ce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1099,7 +1099,7 @@ static struct sk_buff *bnxt_gro_func_5730x(struct bnxt_tpa_info *tpa_info, { #ifdef CONFIG_INET struct tcphdr *th; - int len, nw_off, tcp_opt_len; + int len, nw_off, tcp_opt_len = 0; if (tcp_ts) tcp_opt_len = 12; -- cgit From 9ed59592e3e379b2e9557dc1d9e9ec8fcbb33f16 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Jan 2017 14:57:36 -0800 Subject: lwtunnel: fix autoload of lwt modules Trying to add an mpls encap route when the MPLS modules are not loaded hangs. For example: CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m $ ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 The ip command hangs: root 880 826 0 21:25 pts/0 00:00:00 ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 $ cat /proc/880/stack [] call_usermodehelper_exec+0xd6/0x134 [] __request_module+0x27b/0x30a [] lwtunnel_build_state+0xe4/0x178 [] fib_create_info+0x47f/0xdd4 [] fib_table_insert+0x90/0x41f [] inet_rtm_newroute+0x4b/0x52 ... modprobe is trying to load rtnl-lwt-MPLS: root 881 5 0 21:25 ? 00:00:00 /sbin/modprobe -q -- rtnl-lwt-MPLS and it hangs after loading mpls_router: $ cat /proc/881/stack [] rtnl_lock+0x12/0x14 [] register_netdevice_notifier+0x16/0x179 [] mpls_init+0x25/0x1000 [mpls_router] [] do_one_initcall+0x8e/0x13f [] do_init_module+0x5a/0x1e5 [] load_module+0x13bd/0x17d6 ... The problem is that lwtunnel_build_state is called with rtnl lock held preventing mpls_init from registering. Given the potential references held by the time lwtunnel_build_state it can not drop the rtnl lock to the load module. So, extract the module loading code from lwtunnel_build_state into a new function to validate the encap type. The new function is called while converting the user request into a fib_config which is well before any table, device or fib entries are examined. Fixes: 745041e2aaf1 ("lwtunnel: autoload of lwt modules") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 11 +++++++++ net/core/lwtunnel.c | 62 ++++++++++++++++++++++++++++++++++++++++++++----- net/ipv4/fib_frontend.c | 8 +++++++ net/ipv6/route.c | 12 +++++++++- 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index d4c1c75b8862..0b585f1fd340 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -105,6 +105,8 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); +int lwtunnel_valid_encap_type(u16 encap_type); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, @@ -168,6 +170,15 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } +static inline int lwtunnel_valid_encap_type(u16 encap_type) +{ + return -EOPNOTSUPP; +} +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +{ + return -EOPNOTSUPP; +} + static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index a5d4e866ce88..47b1dd65947b 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_MODULES @@ -114,25 +115,74 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + if (likely(ops && ops->build_state)) + ret = ops->build_state(dev, encap, family, cfg, lws); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_valid_encap_type(u16 encap_type) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type > LWTUNNEL_ENCAP_MAX) + return ret; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); #ifdef CONFIG_MODULES if (!ops) { const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { - rcu_read_unlock(); + __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); + rtnl_lock(); + rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); } } #endif - if (likely(ops && ops->build_state)) - ret = ops->build_state(dev, encap, family, cfg, lws); - rcu_read_unlock(); + return ops ? 0 : -EOPNOTSUPP; +} +EXPORT_SYMBOL(lwtunnel_valid_encap_type); - return ret; +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) +{ + struct rtnexthop *rtnh = (struct rtnexthop *)attr; + struct nlattr *nla_entype; + struct nlattr *attrs; + struct nlattr *nla; + u16 encap_type; + int attrlen; + + while (rtnh_ok(rtnh, remaining)) { + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + attrs = rtnh_attrs(rtnh); + nla = nla_find(attrs, attrlen, RTA_ENCAP); + nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + + if (nla_entype) { + encap_type = nla_get_u16(nla_entype); + + if (lwtunnel_valid_encap_type(encap_type) != 0) + return -EOPNOTSUPP; + } + } + rtnh = rtnh_next(rtnh, &remaining); + } + + return 0; } -EXPORT_SYMBOL(lwtunnel_build_state); +EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); void lwtstate_free(struct lwtunnel_state *lws) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eae0332b0e8c..7db2ad2e82d3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -677,6 +678,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_mx_len = nla_len(attr); break; case RTA_MULTIPATH: + err = lwtunnel_valid_encap_type_attr(nla_data(attr), + nla_len(attr)); + if (err < 0) + goto errout; cfg->fc_mp = nla_data(attr); cfg->fc_mp_len = nla_len(attr); break; @@ -691,6 +696,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; break; } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4f6b067c8753..7ea85370c11c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2896,6 +2896,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_MULTIPATH]) { cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + + err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, + cfg->fc_mp_len); + if (err < 0) + goto errout; } if (tb[RTA_PREF]) { @@ -2909,9 +2914,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP]) cfg->fc_encap = tb[RTA_ENCAP]; - if (tb[RTA_ENCAP_TYPE]) + if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; + } + if (tb[RTA_EXPIRES]) { unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); -- cgit From d407bd25a204bd66b7346dde24bd3d37ef0e0b05 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Jan 2017 15:14:17 +0100 Subject: bpf: don't trigger OOM killer under pressure with map alloc This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(), that are to be used for map allocations. Using kmalloc() for very large allocations can cause excessive work within the page allocator, so i) fall back earlier to vmalloc() when the attempt is considered costly anyway, and even more importantly ii) don't trigger OOM killer with any of the allocators. Since this is based on a user space request, for example, when creating maps with element pre-allocation, we really want such requests to fail instead of killing other user space processes. Also, don't spam the kernel log with warnings should any of the allocations fail under pressure. Given that, we can make backend selection in bpf_map_area_alloc() generic, and convert all maps over to use this API for spots with potentially large allocation requests. Note, replacing the one kmalloc_array() is fine as overflow checks happen earlier in htab_map_alloc(), since it must also protect the multiplication for vmalloc() should kmalloc_array() fail. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 18 +++++++----------- kernel/bpf/hashtab.c | 22 +++++++++------------- kernel/bpf/stackmap.c | 20 ++++++++------------ kernel/bpf/syscall.c | 26 ++++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 36 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05cf951df3fe..3ed1f3b1d594 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -247,6 +247,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); +void *bpf_map_area_alloc(size_t size); +void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 229a5d5df977..3d55d95dcf49 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,7 +11,6 @@ */ #include #include -#include #include #include #include @@ -74,14 +73,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) if (array_size >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); - /* allocate all map elements and zero-initialize them */ - array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); - if (!array) { - array = vzalloc(array_size); - if (!array) - return ERR_PTR(-ENOMEM); - } + array = bpf_map_area_alloc(array_size); + if (!array) + return ERR_PTR(-ENOMEM); /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -97,7 +92,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) if (array_size >= U32_MAX - PAGE_SIZE || elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { - kvfree(array); + bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } out: @@ -262,7 +257,7 @@ static void array_map_free(struct bpf_map *map) if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); - kvfree(array); + bpf_map_area_free(array); } static const struct bpf_map_ops array_ops = { @@ -319,7 +314,8 @@ static void fd_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); - kvfree(array); + + bpf_map_area_free(array); } static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3f2bb58952d8..a753bbe7df0a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -13,7 +13,6 @@ #include #include #include -#include #include "percpu_freelist.h" #include "bpf_lru_list.h" @@ -103,7 +102,7 @@ static void htab_free_elems(struct bpf_htab *htab) free_percpu(pptr); } free_elems: - vfree(htab->elems); + bpf_map_area_free(htab->elems); } static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, @@ -125,7 +124,8 @@ static int prealloc_init(struct bpf_htab *htab) { int err = -ENOMEM, i; - htab->elems = vzalloc(htab->elem_size * htab->map.max_entries); + htab->elems = bpf_map_area_alloc(htab->elem_size * + htab->map.max_entries); if (!htab->elems) return -ENOMEM; @@ -320,14 +320,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; err = -ENOMEM; - htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), - GFP_USER | __GFP_NOWARN); - - if (!htab->buckets) { - htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket)); - if (!htab->buckets) - goto free_htab; - } + htab->buckets = bpf_map_area_alloc(htab->n_buckets * + sizeof(struct bucket)); + if (!htab->buckets) + goto free_htab; for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_HEAD(&htab->buckets[i].head); @@ -354,7 +350,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) free_extra_elems: free_percpu(htab->extra_elems); free_buckets: - kvfree(htab->buckets); + bpf_map_area_free(htab->buckets); free_htab: kfree(htab); return ERR_PTR(err); @@ -1014,7 +1010,7 @@ static void htab_map_free(struct bpf_map *map) prealloc_destroy(htab); free_percpu(htab->extra_elems); - kvfree(htab->buckets); + bpf_map_area_free(htab->buckets); kfree(htab); } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 732ae16d12b7..be8519148c25 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include "percpu_freelist.h" @@ -32,7 +31,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; int err; - smap->elems = vzalloc(elem_size * smap->map.max_entries); + smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries); if (!smap->elems) return -ENOMEM; @@ -45,7 +44,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) return 0; free_elems: - vfree(smap->elems); + bpf_map_area_free(smap->elems); return err; } @@ -76,12 +75,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-E2BIG); - smap = kzalloc(cost, GFP_USER | __GFP_NOWARN); - if (!smap) { - smap = vzalloc(cost); - if (!smap) - return ERR_PTR(-ENOMEM); - } + smap = bpf_map_area_alloc(cost); + if (!smap) + return ERR_PTR(-ENOMEM); err = -E2BIG; cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); @@ -112,7 +108,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) put_buffers: put_callchain_buffers(); free_smap: - kvfree(smap); + bpf_map_area_free(smap); return ERR_PTR(err); } @@ -262,9 +258,9 @@ static void stack_map_free(struct bpf_map *map) /* wait for bpf programs to complete before freeing stack map */ synchronize_rcu(); - vfree(smap->elems); + bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); - kvfree(smap); + bpf_map_area_free(smap); put_callchain_buffers(); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d6b29e4e2c3..19b6129eab23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -49,6 +51,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) list_add(&tl->list_node, &bpf_map_types); } +void *bpf_map_area_alloc(size_t size) +{ + /* We definitely need __GFP_NORETRY, so OOM killer doesn't + * trigger under memory pressure as we really just want to + * fail instead. + */ + const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + void *area; + + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + area = kmalloc(size, GFP_USER | flags); + if (area != NULL) + return area; + } + + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, + PAGE_KERNEL); +} + +void bpf_map_area_free(void *area) +{ + kvfree(area); +} + int bpf_map_precharge_memlock(u32 pages) { struct user_struct *user = get_current_user(); -- cgit From b5b46c4740aed1538544f0fa849c5b76c7823469 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 18 Jan 2017 14:29:21 +0100 Subject: objtool: Fix IRET's opcode The IRET opcode is 0xcf according to the Intel manual and also to objdump of my vmlinux: 1ea8: 48 cf iretq Fix the opcode in arch_decode_instruction(). The previous value (0xc5) seems to correspond to LDS. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170118132921.19319-1-jslaby@suse.cz Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 5e0dea2cdc01..039636ffb6c8 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -150,9 +150,9 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_RETURN; break; - case 0xc5: /* iret */ case 0xca: /* retf */ case 0xcb: /* retf */ + case 0xcf: /* iret */ *type = INSN_CONTEXT_SWITCH; break; -- cgit From 24d615a694d649aa2e167c3f97f62bdad07e3f84 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 18 Jan 2017 21:31:31 +0100 Subject: USB: serial: qcserial: add Dell DW5570 QDL The Dell DW5570 is a re-branded Sierra Wireless MC8805 which will by default boot with vid 0x413c and pid 0x81a3. When triggered QDL download mode, the device switches to pid 0x81a6 and provides the standard TTY used for firmware upgrade. Cc: Signed-off-by: Aleksander Morgado Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 1bc6089b9008..696458db7e3c 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x1410, 0xa021)}, /* Novatel Gobi 3000 Composite */ {USB_DEVICE(0x413c, 0x8193)}, /* Dell Gobi 3000 QDL */ {USB_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ + {USB_DEVICE(0x413c, 0x81a6)}, /* Dell DW5570 QDL (MC8805) */ {USB_DEVICE(0x1199, 0x68a4)}, /* Sierra Wireless QDL */ {USB_DEVICE(0x1199, 0x68a5)}, /* Sierra Wireless Modem */ {USB_DEVICE(0x1199, 0x68a8)}, /* Sierra Wireless QDL */ -- cgit From 739e6f5945d88dcee01590913f6886132a10c215 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 11 Jan 2017 13:37:07 +0100 Subject: gpio: provide lockdep keys for nested/unnested irqchips The helper function for adding a GPIO chip compiles in a lockdep key for debugging, the same key is needed for nested chips as well. The macro construction is unreadable, replace this with two static inlines instead. The _gpiochip_irqchip_add prefixed function is not helpful, rename it with gpiochip_irqchip_add_key() that tell us what the function is actually doing. Fixes: d245b3f9bd36 ("gpio: simplify adding threaded interrupts") Cc: Roger Quadros Reported-by: Clemens Gruber Reported-by: Roger Quadros Reported-by: Grygorii Strashko Tested-by: Clemens Gruber Tested-by: Grygorii Strashko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 18 ++++++------ include/linux/gpio/driver.h | 70 ++++++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 86bf3b84ada5..a07ae9e37930 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1723,7 +1723,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) } /** - * _gpiochip_irqchip_add() - adds an irqchip to a gpiochip + * gpiochip_irqchip_add_key() - adds an irqchip to a gpiochip * @gpiochip: the gpiochip to add the irqchip to * @irqchip: the irqchip to add to the gpiochip * @first_irq: if not dynamically assigned, the base (first) IRQ to @@ -1749,13 +1749,13 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * the pins on the gpiochip can generate a unique IRQ. Everything else * need to be open coded. */ -int _gpiochip_irqchip_add(struct gpio_chip *gpiochip, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type, - bool nested, - struct lock_class_key *lock_key) +int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type, + bool nested, + struct lock_class_key *lock_key) { struct device_node *of_node; bool irq_base_set = false; @@ -1840,7 +1840,7 @@ int _gpiochip_irqchip_add(struct gpio_chip *gpiochip, return 0; } -EXPORT_SYMBOL_GPL(_gpiochip_irqchip_add); +EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c2748accea71..e973faba69dc 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -274,37 +274,67 @@ void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, int parent_irq); -int _gpiochip_irqchip_add(struct gpio_chip *gpiochip, +int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type, + bool nested, + struct lock_class_key *lock_key); + +#ifdef CONFIG_LOCKDEP + +/* + * Lockdep requires that each irqchip instance be created with a + * unique key so as to avoid unnecessary warnings. This upfront + * boilerplate static inlines provides such a key for each + * unique instance. + */ +static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type) +{ + static struct lock_class_key key; + + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, false, &key); +} + +static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, - unsigned int type, - bool nested, - struct lock_class_key *lock_key); + unsigned int type) +{ + + static struct lock_class_key key; + + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, true, &key); +} +#else +static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type) +{ + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, false, NULL); +} -/* FIXME: I assume threaded IRQchips do not have the lockdep problem */ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, unsigned int type) { - return _gpiochip_irqchip_add(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, true, NULL); } - -#ifdef CONFIG_LOCKDEP -#define gpiochip_irqchip_add(...) \ -( \ - ({ \ - static struct lock_class_key _key; \ - _gpiochip_irqchip_add(__VA_ARGS__, false, &_key); \ - }) \ -) -#else -#define gpiochip_irqchip_add(...) \ - _gpiochip_irqchip_add(__VA_ARGS__, false, NULL) -#endif +#endif /* CONFIG_LOCKDEP */ #endif /* CONFIG_GPIOLIB_IRQCHIP */ -- cgit From b27e36482c02a94194fec71fb29696f4c8e9241c Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 15 Jan 2017 23:20:28 +0100 Subject: pinctrl: meson: fix uart_ao_b for GXBB and GXL/GXM The GXBB and GXL/GXM pinctrl drivers had a configuration which conflicts with uart_ao_a. According to the GXBB ("S905") datasheet the AO UART functions are: - GPIOAO_0: Func1 = UART_TX_AO_A (bit 12), Func2 = UART_TX_AO_B (bit 26) - GPIOAO_1: Func1 = UART_RX_AO_A (bit 11), Func2 = UART_RX_AO_B (bit 25) - GPIOAO_4: Func2 = UART_TX_AO_B (bit 24) - GPIOAO_5: Func2 = UART_RX_AO_B (bit 25) The existing definition for uart_AO_A already uses GPIOAO_0 and GPIOAO_1. The old definition of uart_AO_B however was broken, as it used GPIOAO_0 for TX (which would be fine) and two pins (GPIOAO_1 and GPIOAO_5) for RX (which does not make any sense). This fixes the uart_AO_B configuration by moving it to GPIOAO_4 and GPIOAO_5 (it would be possible to use GPIOAO_0 and GPIOAO_1 in theory, but all existing hardware uses uart_AO_A there). The fix for GXBB and GXL/GXM is identical since it seems that these specific pins are identical on both SoC variants. Signed-off-by: Martin Blumenstingl Reviewed-by: Kevin Hilman Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 7 +++---- drivers/pinctrl/meson/pinctrl-meson-gxl.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index c3928aa3fefa..e0bca4df2a2f 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -253,9 +253,8 @@ static const unsigned int uart_tx_ao_a_pins[] = { PIN(GPIOAO_0, 0) }; static const unsigned int uart_rx_ao_a_pins[] = { PIN(GPIOAO_1, 0) }; static const unsigned int uart_cts_ao_a_pins[] = { PIN(GPIOAO_2, 0) }; static const unsigned int uart_rts_ao_a_pins[] = { PIN(GPIOAO_3, 0) }; -static const unsigned int uart_tx_ao_b_pins[] = { PIN(GPIOAO_0, 0) }; -static const unsigned int uart_rx_ao_b_pins[] = { PIN(GPIOAO_1, 0), - PIN(GPIOAO_5, 0) }; +static const unsigned int uart_tx_ao_b_pins[] = { PIN(GPIOAO_4, 0) }; +static const unsigned int uart_rx_ao_b_pins[] = { PIN(GPIOAO_5, 0) }; static const unsigned int uart_cts_ao_b_pins[] = { PIN(GPIOAO_2, 0) }; static const unsigned int uart_rts_ao_b_pins[] = { PIN(GPIOAO_3, 0) }; @@ -498,7 +497,7 @@ static struct meson_pmx_group meson_gxbb_aobus_groups[] = { GPIO_GROUP(GPIOAO_13, 0), /* bank AO */ - GROUP(uart_tx_ao_b, 0, 26), + GROUP(uart_tx_ao_b, 0, 24), GROUP(uart_rx_ao_b, 0, 25), GROUP(uart_tx_ao_a, 0, 12), GROUP(uart_rx_ao_a, 0, 11), diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 25694f7094c7..b69743b07a1d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -214,9 +214,8 @@ static const unsigned int uart_tx_ao_a_pins[] = { PIN(GPIOAO_0, 0) }; static const unsigned int uart_rx_ao_a_pins[] = { PIN(GPIOAO_1, 0) }; static const unsigned int uart_cts_ao_a_pins[] = { PIN(GPIOAO_2, 0) }; static const unsigned int uart_rts_ao_a_pins[] = { PIN(GPIOAO_3, 0) }; -static const unsigned int uart_tx_ao_b_pins[] = { PIN(GPIOAO_0, 0) }; -static const unsigned int uart_rx_ao_b_pins[] = { PIN(GPIOAO_1, 0), - PIN(GPIOAO_5, 0) }; +static const unsigned int uart_tx_ao_b_pins[] = { PIN(GPIOAO_4, 0) }; +static const unsigned int uart_rx_ao_b_pins[] = { PIN(GPIOAO_5, 0) }; static const unsigned int uart_cts_ao_b_pins[] = { PIN(GPIOAO_2, 0) }; static const unsigned int uart_rts_ao_b_pins[] = { PIN(GPIOAO_3, 0) }; @@ -409,7 +408,7 @@ static struct meson_pmx_group meson_gxl_aobus_groups[] = { GPIO_GROUP(GPIOAO_9, 0), /* bank AO */ - GROUP(uart_tx_ao_b, 0, 26), + GROUP(uart_tx_ao_b, 0, 24), GROUP(uart_rx_ao_b, 0, 25), GROUP(uart_tx_ao_a, 0, 12), GROUP(uart_rx_ao_a, 0, 11), -- cgit From df1539c25cce98e2ac69881958850c6535240707 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 17 Jan 2017 19:52:54 +0900 Subject: pinctrl: uniphier: fix Ethernet (RMII) pin-mux setting for LD20 Fix the pin-mux values for the MDC, MDIO, MDIO_INTL, PHYRSTL pins. Fixes: 1e359ab1285e ("pinctrl: uniphier: add Ethernet pin-mux settings") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c index aa8bd9794683..96686336e3a3 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c @@ -561,7 +561,7 @@ static const int ether_rgmii_muxvals[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static const unsigned ether_rmii_pins[] = {30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 45}; -static const int ether_rmii_muxvals[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int ether_rmii_muxvals[] = {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}; static const unsigned i2c0_pins[] = {63, 64}; static const int i2c0_muxvals[] = {0, 0}; static const unsigned i2c1_pins[] = {65, 66}; -- cgit From 524b698db06b9b6da7192e749f637904e2f62d7b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 16 Jan 2017 18:24:56 +0100 Subject: netfilter: conntrack: remove GC_MAX_EVICTS break Instead of breaking loop and instant resched, don't bother checking this in first place (the loop calls cond_resched for every bucket anyway). Suggested-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3a073cd9fcf4..6feb5d370319 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -88,8 +88,6 @@ static __read_mostly bool nf_conntrack_locks_all; #define GC_MAX_BUCKETS_DIV 64u /* upper bound of scan intervals */ #define GC_INTERVAL_MAX (2 * HZ) -/* maximum conntracks to evict per gc run */ -#define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -979,8 +977,7 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched_rcu_qs(); - } while (++buckets < goal && - expired_count < GC_MAX_EVICTS); + } while (++buckets < goal); if (gc_work->exiting) return; @@ -1005,7 +1002,7 @@ static void gc_worker(struct work_struct *work) * In case we have lots of evictions next scan is done immediately. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + if (ratio >= 90) { gc_work->next_gc_run = 0; next_run = 0; } else if (expired_count) { -- cgit From e5072053b09642b8ff417d47da05b84720aea3ee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Jan 2017 02:01:22 +0100 Subject: netfilter: conntrack: refine gc worker heuristics, redux This further refines the changes made to conntrack gc_worker in commit e0df8cae6c16 ("netfilter: conntrack: refine gc worker heuristics"). The main idea of that change was to reduce the scan interval when evictions take place. However, on the reporters' setup, there are 1-2 million conntrack entries in total and roughly 8k new (and closing) connections per second. In this case we'll always evict at least one entry per gc cycle and scan interval is always at 1 jiffy because of this test: } else if (expired_count) { gc_work->next_gc_run /= 2U; next_run = msecs_to_jiffies(1); being true almost all the time. Given we scan ~10k entries per run its clearly wrong to reduce interval based on nonzero eviction count, it will only waste cpu cycles since a vast majorities of conntracks are not timed out. Thus only look at the ratio (scanned entries vs. evicted entries) to make a decision on whether to reduce or not. Because evictor is supposed to only kick in when system turns idle after a busy period, pick a high ratio -- this makes it 50%. We thus keep the idea of increasing scan rate when its likely that table contains many expired entries. In order to not let timed-out entries hang around for too long (important when using event logging, in which case we want to timely destroy events), we now scan the full table within at most GC_MAX_SCAN_JIFFIES (16 seconds) even in worst-case scenario where all timed-out entries sit in same slot. I tested this with a vm under synflood (with sysctl net.netfilter.nf_conntrack_tcp_timeout_syn_recv=3). While flood is ongoing, interval now stays at its max rate (GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV -> 125ms). With feedback from Nicolas Dichtel. Reported-by: Denys Fedoryshchenko Cc: Nicolas Dichtel Fixes: b87a2f9199ea82eaadc ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Tested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Tested-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6feb5d370319..4e8083c5e01d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -85,9 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 64u -/* upper bound of scan intervals */ -#define GC_INTERVAL_MAX (2 * HZ) +#define GC_MAX_BUCKETS_DIV 128u +/* upper bound of full table scan */ +#define GC_MAX_SCAN_JIFFIES (16u * HZ) +/* desired ratio of entries found to be expired */ +#define GC_EVICT_RATIO 50u static struct conntrack_gc_work conntrack_gc_work; @@ -936,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { + unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int i, goal, buckets = 0, expired_count = 0; struct conntrack_gc_work *gc_work; unsigned int ratio, scanned = 0; @@ -994,27 +997,25 @@ static void gc_worker(struct work_struct *work) * 1. Minimize time until we notice a stale entry * 2. Maximize scan intervals to not waste cycles * - * Normally, expired_count will be 0, this increases the next_run time - * to priorize 2) above. + * Normally, expire ratio will be close to 0. * - * As soon as a timed-out entry is found, move towards 1) and increase - * the scan frequency. - * In case we have lots of evictions next scan is done immediately. + * As soon as a sizeable fraction of the entries have expired + * increase scan frequency. */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) { - gc_work->next_gc_run = 0; - next_run = 0; - } else if (expired_count) { - gc_work->next_gc_run /= 2U; - next_run = msecs_to_jiffies(1); + if (ratio > GC_EVICT_RATIO) { + gc_work->next_gc_run = min_interval; } else { - if (gc_work->next_gc_run < GC_INTERVAL_MAX) - gc_work->next_gc_run += msecs_to_jiffies(1); + unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - next_run = gc_work->next_gc_run; + BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); + + gc_work->next_gc_run += min_interval; + if (gc_work->next_gc_run > max) + gc_work->next_gc_run = max; } + next_run = gc_work->next_gc_run; gc_work->last_bucket = i; queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } @@ -1022,7 +1023,7 @@ static void gc_worker(struct work_struct *work) static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = GC_INTERVAL_MAX; + gc_work->next_gc_run = HZ; gc_work->exiting = false; } @@ -1914,7 +1915,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); return 0; -- cgit From 59cfa789d04c35b6c647aacf4cc89b3d4d430cfe Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Thu, 19 Jan 2017 11:50:32 +0530 Subject: MAINTAINERS: update cxgb4 maintainer Ganesg will be taking over as maintainer from now Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c36976d3bd1a..af3456d33a18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3564,7 +3564,7 @@ F: drivers/infiniband/hw/cxgb3/ F: include/uapi/rdma/cxgb3-abi.h CXGB4 ETHERNET DRIVER (CXGB4) -M: Hariprasad S +M: Ganesh Goudar L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported -- cgit From 03e4deff4987f79c34112c5ba4eb195d4f9382b0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 19 Jan 2017 16:26:21 +0800 Subject: ipv6: addrconf: Avoid addrconf_disable_change() using RCU read-side lock Just like commit 4acd4945cd1e ("ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock"), it is unnecessary to make addrconf_disable_change() use RCU iteration over the netdev list, since it already holds the RTNL lock, or we may meet Illegal context switch in RCU read-side critical section. Signed-off-by: Kefeng Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c1e124bc8e1e..f60e88e56255 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5540,8 +5540,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -5550,7 +5549,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf) dev_disable_change(idev); } } - rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) -- cgit From ad05df399f3343b10664827a3860669a8a80782d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Jan 2017 10:33:29 +0100 Subject: net/mlx5e: Remove unused variable A cleanup removed the only user of this variable mlx5/core/en_ethtool.c: In function 'mlx5e_set_channels': mlx5/core/en_ethtool.c:546:6: error: unused variable 'ncv' [-Werror=unused-variable] Let's remove the declaration as well. Fixes: 639e9e94160e ("net/mlx5e: Remove unnecessary checks when setting num channels") Signed-off-by: Arnd Bergmann Reported-by: Stephen Rothwell Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b1b9eb6ee135..5197817e4b2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -543,7 +543,6 @@ static int mlx5e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; -- cgit From 0dbd7ff3ac5017a46033a9d0a87a8267d69119d9 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 19 Jan 2017 16:36:39 +0300 Subject: tcp: initialize max window for a new fastopen socket Found that if we run LTP netstress test with large MSS (65K), the first attempt from server to send data comparable to this MSS on fastopen connection will be delayed by the probe timer. Here is an example: < S seq 0:0 win 43690 options [mss 65495 wscale 7 tfo cookie] length 32 > S. seq 0:0 ack 1 win 43690 options [mss 65495 wscale 7] length 0 < . ack 1 win 342 length 0 Inside tcp_sendmsg(), tcp_send_mss() returns max MSS in 'mss_now', as well as in 'size_goal'. This results the segment not queued for transmition until all the data copied from user buffer. Then, inside __tcp_push_pending_frames(), it breaks on send window test and continues with the check probe timer. Fragmentation occurs in tcp_write_wakeup()... +0.2 > P. seq 1:43777 ack 1 win 342 length 43776 < . ack 43777, win 1365 length 0 > P. seq 43777:65001 ack 1 win 342 options [...] length 21224 ... This also contradicts with the fact that we should bound to the half of the window if it is large. Fix this flaw by correctly initializing max_window. Before that, it could have large values that affect further calculations of 'size_goal'. Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f51919535ca7..dd2560c83a85 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash -- cgit From 69fed99baac186013840ced3524562841296034f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 19:44:42 -0800 Subject: gianfar: Do not reuse pages from emergency reserve A driver using dev_alloc_page() must not reuse a page that had to use emergency memory reserve. Otherwise all packets using this page will be immediately dropped, unless for very specific sockets having SOCK_MEMALLOC bit set. This issue might be hard to debug, because only a fraction of the RX ring buffer would suffer from drops. Fixes: 75354148ce69 ("gianfar: Add paged allocation and Rx S/G") Signed-off-by: Eric Dumazet Cc: Claudiu Manoil Acked-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a6e7afa878be..c1b671667920 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2948,7 +2948,7 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, } /* try reuse page */ - if (unlikely(page_count(page) != 1)) + if (unlikely(page_count(page) != 1 || page_is_pfmemalloc(page))) return false; /* change offset to the other half */ -- cgit From 47b5d64691350e116093c9b47b55ca6b9433bc50 Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Wed, 7 Sep 2016 20:17:38 +0800 Subject: btrfs: fix false enospc error when truncating heavily reflinked file Below test script can reveal this bug: dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=100 dev=$(losetup --show -f fs.img) mkdir -p /mnt/mntpoint mkfs.btrfs -f $dev mount $dev /mnt/mntpoint cd /mnt/mntpoint echo "workdir is: /mnt/mntpoint" blocksize=$((128 * 1024)) dd if=/dev/zero of=testfile bs=$blocksize count=1 sync count=$((17*1024*1024*1024/blocksize)) echo "file size is:" $((count*blocksize)) for ((i = 1; i <= $count; i++)); do dst_offset=$((blocksize * i)) xfs_io -f -c "reflink testfile 0 $dst_offset $blocksize"\ testfile > /dev/null done sync truncate --size 0 testfile The last truncate operation will fail for ENOSPC reason, but indeed it should not fail. In btrfs_truncate(), we use a temporary block_rsv to do truncate operation. With every btrfs_truncate_inode_items() call, we migrate space to this block_rsv, but forget to cleanup previous reservation, which will make this block_rsv's reserved bytes keep growing, and this reserved space will only be released in the end of btrfs_truncate(), this metadata leak will impact other's metadata reservation. In this case, it's "btrfs_start_transaction(root, 2);" fails for enospc error, which make this truncate operation fail. Call btrfs_block_rsv_release() to fix this bug. Signed-off-by: Wang Xiaoguang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 128e52489f81..415a6ad5ba22 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9215,6 +9215,7 @@ static int btrfs_truncate(struct inode *inode) break; } + btrfs_block_rsv_release(fs_info, rsv, -1); ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, 0); BUG_ON(ret); /* shouldn't happen */ -- cgit From 97dcdea076ecef41ea4aaa23d4397c2f622e4265 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Fri, 23 Dec 2016 15:00:18 +0530 Subject: Btrfs: Fix deadlock between direct IO and fast fsync The following deadlock is seen when executing generic/113 test, ---------------------------------------------------------+---------------------------------------------------- Direct I/O task Fast fsync task ---------------------------------------------------------+---------------------------------------------------- btrfs_direct_IO __blockdev_direct_IO do_blockdev_direct_IO do_direct_IO btrfs_get_blocks_direct while (blocks needs to written) get_more_blocks (first iteration) btrfs_get_blocks_direct btrfs_create_dio_extent down_read(&BTRFS_I(inode) >dio_sem) Create and add extent map and ordered extent up_read(&BTRFS_I(inode) >dio_sem) btrfs_sync_file btrfs_log_dentry_safe btrfs_log_inode_parent btrfs_log_inode btrfs_log_changed_extents down_write(&BTRFS_I(inode) >dio_sem) Collect new extent maps and ordered extents wait for ordered extent completion get_more_blocks (second iteration) btrfs_get_blocks_direct btrfs_create_dio_extent down_read(&BTRFS_I(inode) >dio_sem) -------------------------------------------------------------------------------------------------------------- In the above description, Btrfs direct I/O code path has not yet started submitting bios for file range covered by the initial ordered extent. Meanwhile, The fast fsync task obtains the write semaphore and waits for I/O on the ordered extent to get completed. However, the Direct I/O task is now blocked on obtaining the read semaphore. To resolve the deadlock, this commit modifies the Direct I/O code path to obtain the read semaphore before invoking __blockdev_direct_IO(). The semaphore is then given up after __blockdev_direct_IO() returns. This allows the Direct I/O code to complete I/O on all the ordered extents it creates. Signed-off-by: Chandan Rajendra Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 415a6ad5ba22..bed6cf5f67a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7215,7 +7215,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, struct extent_map *em = NULL; int ret; - down_read(&BTRFS_I(inode)->dio_sem); if (type != BTRFS_ORDERED_NOCOW) { em = create_pinned_em(inode, start, len, orig_start, block_start, block_len, orig_block_len, @@ -7234,7 +7233,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, em = ERR_PTR(ret); } out: - up_read(&BTRFS_I(inode)->dio_sem); return em; } @@ -8695,6 +8693,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; + down_read(&BTRFS_I(inode)->dio_sem); } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8707,6 +8706,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) iter, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { + up_read(&BTRFS_I(inode)->dio_sem); current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { if (dio_data.reserve) -- cgit From 91298eec05cd8d4e828cf7ee5d4a6334f70cf69a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 1 Dec 2016 13:43:31 -0800 Subject: Btrfs: fix truncate down when no_holes feature is enabled For such a file mapping, [0-4k][hole][8k-12k] In NO_HOLES mode, we don't have the [hole] extent any more. Commit c1aa45759e90 ("Btrfs: fix shrinking truncate when the no_holes feature is enabled") fixed disk isize not being updated in NO_HOLES mode when data is not flushed. However, even if data has been flushed, we can still have trouble in updating disk isize since we updated disk isize to 'start' of the last evicted extent. Reviewed-by: Chris Mason Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bed6cf5f67a4..177e7284909c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4505,8 +4505,19 @@ search_again: if (found_type > min_type) { del_item = 1; } else { - if (item_end < new_size) + if (item_end < new_size) { + /* + * With NO_HOLES mode, for the following mapping + * + * [0-4k][hole][8k-12k] + * + * if truncating isize down to 6k, it ends up + * isize being 8k. + */ + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) + last_size = new_size; break; + } if (found_key.offset >= new_size) del_item = 1; else -- cgit From e326ce013a8e851193eb337aafb1aa396c533a61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 20 Jan 2017 03:25:34 +0100 Subject: Revert "PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag" Revert commit 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag) as it caused system suspend (in the default configuration) to fail on Dell XPS13 (9360) with the Kaby Lake processor. Fixes: 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag) Reported-by: Paul Menzel Signed-off-by: Rafael J. Wysocki --- Documentation/power/states.txt | 4 +--- drivers/acpi/sleep.c | 8 -------- include/linux/suspend.h | 2 -- kernel/power/suspend.c | 4 ++-- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 8a39ce45d8a0..008ecb588317 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -35,9 +35,7 @@ only one way to cause the system to go into the Suspend-To-RAM state (write The default suspend mode (ie. the one to be used without writing anything into /sys/power/mem_sleep) is either "deep" (if Suspend-To-RAM is supported) or "s2idle", but it can be overridden by the value of the "mem_sleep_default" -parameter in the kernel command line. On some ACPI-based systems, depending on -the information in the FADT, the default may be "s2idle" even if Suspend-To-RAM -is supported. +parameter in the kernel command line. The properties of all of the sleep states are described below. diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index ce1855fd584b..deb0ff78eba8 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -691,14 +691,6 @@ static void acpi_sleep_suspend_setup(void) if (acpi_sleep_state_supported(i)) sleep_states[i] = 1; - /* - * Use suspend-to-idle by default if ACPI_FADT_LOW_POWER_S0 is set and - * the default suspend mode was not selected from the command line. - */ - if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0 && - mem_sleep_default > PM_SUSPEND_MEM) - mem_sleep_default = PM_SUSPEND_FREEZE; - suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); freeze_set_ops(&acpi_freeze_ops); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0c729c3c8549..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -194,8 +194,6 @@ struct platform_freeze_ops { }; #ifdef CONFIG_SUSPEND -extern suspend_state_t mem_sleep_default; - /** * suspend_set_ops - set platform dependent suspend operations * @ops: The new suspend operations to set. diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f67ceb7768b8..15e6baef5c73 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -46,7 +46,7 @@ static const char * const mem_sleep_labels[] = { const char *mem_sleep_states[PM_SUSPEND_MAX]; suspend_state_t mem_sleep_current = PM_SUSPEND_FREEZE; -suspend_state_t mem_sleep_default = PM_SUSPEND_MAX; +static suspend_state_t mem_sleep_default = PM_SUSPEND_MEM; unsigned int pm_suspend_global_flags; EXPORT_SYMBOL_GPL(pm_suspend_global_flags); @@ -168,7 +168,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops) } if (valid_state(PM_SUSPEND_MEM)) { mem_sleep_states[PM_SUSPEND_MEM] = mem_sleep_labels[PM_SUSPEND_MEM]; - if (mem_sleep_default >= PM_SUSPEND_MEM) + if (mem_sleep_default == PM_SUSPEND_MEM) mem_sleep_current = PM_SUSPEND_MEM; } -- cgit From 1443ebbacfd7f8d84fbbbf629ef66a12dc8a4b4e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 18 Jan 2017 10:48:22 -0800 Subject: cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy A side effect of keeping intel_pstate sysfs limits in sync with cpufreq is that the now sysfs limits can't enforced under performance policy. For example, if the max_perf_pct is changed from 100 to 80, this will call intel_pstate_set_policy(), which will change the max_perf to 100 again for performance policy. Same issue happens, when no_turbo is set. This change calculates max and min frequency using sysfs performance limits in intel_pstate_verify_policy() and adjusts policy limits by calling cpufreq_verify_within_limits(). Also, it causes the setting of performance limits to be skipped if no_turbo is set. Fixes: 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync) Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e261438cd690..ed215c5592f6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1951,7 +1951,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits = &performance_limits; perf_limits = limits; } - if (policy->max >= policy->cpuinfo.max_freq) { + if (policy->max >= policy->cpuinfo.max_freq && + !limits->no_turbo) { pr_debug("set performance\n"); intel_pstate_set_performance_limits(perf_limits); goto out; @@ -1993,6 +1994,17 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) policy->policy != CPUFREQ_POLICY_PERFORMANCE) return -EINVAL; + /* When per-CPU limits are used, sysfs limits are not used */ + if (!per_cpu_limits) { + unsigned int max_freq, min_freq; + + max_freq = policy->cpuinfo.max_freq * + limits->max_sysfs_pct / 100; + min_freq = policy->cpuinfo.max_freq * + limits->min_sysfs_pct / 100; + cpufreq_verify_within_limits(policy, min_freq, max_freq); + } + return 0; } -- cgit From 7a37052adb5e843bcfff6c98aee9b60bb087b910 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 19 Jan 2017 15:21:34 +0800 Subject: ACPICA: Tables: Fix hidden logic related to acpi_tb_install_standard_table() There is a hidden logic for acpi_tb_install_standard_table() as it can be invoked from the boot stage and during runtime. 1. When it is invoked from the OS boot stage, the ACPICA mutex may not have been initialized yet and so acpi_ut_acquire_mutex()/acpi_ut_release_mutex() are not invoked in these code paths: acpi_initialize_tables acpi_tb_parse_root_table acpi_tb_install_standard_table (4 invocations) acpi_install_table acpi_tb_install_standard_table 2. When it is invoked during the runtime, ACPICA mutex is used as appropriate: acpi_ex_load_op acpi_tb_install_and_load_table acpi_tb_install_standard_table acpi_load_table acpi_tb_install_and_load_table acpi_tb_install_standard_table The mutex is now used in acpi_tb_install_and_load_table(), while it actually should be in acpi_tb_install_standard_table(). This introduces another problem in acpi_tb_install_standard_table() where acpi_gbl_table_handler is invoked from and the lock contexts are thus not consistent for the table handlers. This triggers a regression when acpi_get_table()/acpi_put_table() start to hold table mutex during runtime. The regression is noticed by LKP as new errors reported by ACPICA mutex debugging facility. [ 2.043693] ACPI Error: Mutex [ACPI_MTX_Tables] already acquired by this thread [497483776] (20160930/utmutex-254) [ 2.054084] ACPI Error: Mutex [0x2] is not acquired, cannot release (20160930/utmutex-326) And it triggers a deadlock: [ 247.066214] INFO: task swapper/0:1 blocked for more than 120 seconds. ... [ 247.091271] Call Trace: ... [ 247.121523] down_timeout+0x47/0x50 [ 247.125065] acpi_os_wait_semaphore+0x47/0x62 [ 247.129475] acpi_ut_acquire_mutex+0x43/0x81 [ 247.133798] acpi_get_table+0x2d/0x84 [ 247.137513] acpi_table_attr_init+0xcd/0x100 [ 247.146590] acpi_sysfs_table_handler+0x5d/0xb8 [ 247.151174] acpi_bus_table_handler+0x23/0x2a [ 247.155583] acpi_tb_install_standard_table+0xe0/0x213 [ 247.164489] acpi_tb_install_and_load_table+0x3a/0x82 [ 247.169592] acpi_ex_load_op+0x194/0x201 ... [ 247.200108] acpi_ns_evaluate+0x1bb/0x247 [ 247.204170] acpi_evaluate_object+0x178/0x274 [ 247.213249] acpi_processor_set_pdc+0x154/0x17b ... The table mutex is held in acpi_tb_install_and_load_table() and is re-visited by acpi_get_table(). Noticing that the early mutex requirement actually belongs to the OSL layer and has already been handled in acpi_os_wait_semaphore()/acpi_os_signal_semaphore(), the regression canbe fixed by removing this hidden logic from the ACPICA core to the OS-specific code. Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() and early_acpi_os_unmap_memory() from Linux kernel") Reported-and-tested-by: Tomi Sarvela Reported-by: Ye Xiaolong Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbdata.c | 9 ++------- drivers/acpi/acpica/tbinstal.c | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c index 82b0b5710979..b0399e8f6d27 100644 --- a/drivers/acpi/acpica/tbdata.c +++ b/drivers/acpi/acpica/tbdata.c @@ -852,23 +852,18 @@ acpi_tb_install_and_load_table(acpi_physical_address address, ACPI_FUNCTION_TRACE(tb_install_and_load_table); - (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); - /* Install the table and load it into the namespace */ status = acpi_tb_install_standard_table(address, flags, TRUE, override, &i); if (ACPI_FAILURE(status)) { - goto unlock_and_exit; + goto exit; } - (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); status = acpi_tb_load_table(i, acpi_gbl_root_node); - (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); -unlock_and_exit: +exit: *table_index = i; - (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 5fdf251a9f97..01e1b3d63fc0 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -217,6 +217,10 @@ acpi_tb_install_standard_table(acpi_physical_address address, goto release_and_exit; } + /* Acquire the table lock */ + + (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); + if (reload) { /* * Validate the incoming table signature. @@ -244,7 +248,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, new_table_desc.signature.integer)); status = AE_BAD_SIGNATURE; - goto release_and_exit; + goto unlock_and_exit; } /* Check if table is already registered */ @@ -279,7 +283,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, /* Table is still loaded, this is an error */ status = AE_ALREADY_EXISTS; - goto release_and_exit; + goto unlock_and_exit; } else { /* * Table was unloaded, allow it to be reloaded. @@ -290,6 +294,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, * indicate the re-installation. */ acpi_tb_uninstall_table(&new_table_desc); + (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); *table_index = i; return_ACPI_STATUS(AE_OK); } @@ -303,11 +308,19 @@ acpi_tb_install_standard_table(acpi_physical_address address, /* Invoke table handler if present */ + (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); if (acpi_gbl_table_handler) { (void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_INSTALL, new_table_desc.pointer, acpi_gbl_table_handler_context); } + (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); + +unlock_and_exit: + + /* Release the table lock */ + + (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); release_and_exit: -- cgit From 4151e9a61c26bc86a356edfea713c0f913582760 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 17 Jan 2017 15:15:18 +0100 Subject: drm/exynos/decon5433: do not disable video after reset decon_commit is called just after reset so video is disabled anyway. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 09e8cc36948e..d4920091e35c 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -142,8 +142,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) m->crtc_vsync_end = m->crtc_vsync_start + 1; } - decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID, 0); - /* enable clock gate */ val = CMU_CLKGAGE_MODE_SFR_F | CMU_CLKGAGE_MODE_MEM_F; writel(val, ctx->addr + DECON_CMU); -- cgit From 1202a096328ed3de59e2a722038c4d80ec59a958 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 17 Jan 2017 15:15:19 +0100 Subject: drm/exynos/decon5433: fix CMU programming DECON_CMU register has reserved bits which should not be zeroed, otherwise IP can behave strangely and cause IOMMU faults. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index d4920091e35c..3a9aca1c62f9 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -142,10 +142,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) m->crtc_vsync_end = m->crtc_vsync_start + 1; } - /* enable clock gate */ - val = CMU_CLKGAGE_MODE_SFR_F | CMU_CLKGAGE_MODE_MEM_F; - writel(val, ctx->addr + DECON_CMU); - if (ctx->out_type & (IFTYPE_I80 | I80_HW_TRG)) decon_setup_trigger(ctx); -- cgit From 11d8bcef7a0399e1d2519f207fd575fc404306b4 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 17 Jan 2017 15:15:20 +0100 Subject: drm/exynos/decon5433: set STANDALONE_UPDATE_F on output enablement DECON_TV requires STANDALONE_UPDATE after output enabling, otherwise it does not start. This change is neutral for DECON. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 3a9aca1c62f9..75eeb831ed6a 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -183,6 +183,8 @@ static void decon_commit(struct exynos_drm_crtc *crtc) /* enable output and display signal */ decon_set_bits(ctx, DECON_VIDCON0, VIDCON0_ENVID | VIDCON0_ENVID_F, ~0); + + decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0); } static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, -- cgit From f1225ee4c8fcf09afaa199b8b1f0450f38b8cd11 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 Jan 2017 10:39:09 -0800 Subject: swiotlb-xen: update dev_addr after swapping pages In xen_swiotlb_map_page and xen_swiotlb_map_sg_attrs, if the original page is not suitable, we swap it for another page from the swiotlb pool. In these cases, we don't update the previously calculated dma address for the page before calling xen_dma_map_page. Thus, we end up calling xen_dma_map_page passing the wrong dev_addr, resulting in xen_dma_map_page mistakenly assuming that the page is foreign when it is local. Fix the bug by updating dev_addr appropriately. This change has no effect on x86, because xen_dma_map_page is a stub there. Signed-off-by: Stefano Stabellini Signed-off-by: Pooya Keshavarzi Tested-by: Pooya Keshavarzi Reviewed-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f905d6eeb048..f8afc6dcc29f 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -414,9 +414,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (map == SWIOTLB_MAP_ERROR) return DMA_ERROR_CODE; + dev_addr = xen_phys_to_bus(map); xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), dev_addr, map & ~PAGE_MASK, size, dir, attrs); - dev_addr = xen_phys_to_bus(map); /* * Ensure that the address returned is DMA'ble @@ -575,13 +575,14 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sg_dma_len(sgl) = 0; return 0; } + dev_addr = xen_phys_to_bus(map); xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT), dev_addr, map & ~PAGE_MASK, sg->length, dir, attrs); - sg->dma_address = xen_phys_to_bus(map); + sg->dma_address = dev_addr; } else { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed -- cgit From 6391a4481ba0796805d6581e42f9f0418c099e34 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 20 Jan 2017 14:32:42 +0800 Subject: virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too, fixing this by adding a hint (has_data_valid) and set it only on the receiving path. Cc: Rolf Neugebauer Signed-off-by: Jason Wang Acked-by: Rolf Neugebauer Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- drivers/net/tun.c | 2 +- drivers/net/virtio_net.c | 2 +- include/linux/virtio_net.h | 6 +++++- net/packet/af_packet.c | 4 ++-- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5c26653eceb5..402618565838 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -825,7 +825,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, return -EINVAL; if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - macvtap_is_little_endian(q))) + macvtap_is_little_endian(q), true)) BUG(); if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cd8e02c94be0..2cd10b26b650 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1360,7 +1360,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, return -EINVAL; if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun))) { + tun_is_little_endian(tun), true)) { struct skb_shared_info *sinfo = skb_shinfo(skb); pr_err("unexpected GSO type: " "0x%x, gso_size %d, hdr_len %d\n", diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4a105006ca63..347424351ade 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1104,7 +1104,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) hdr = skb_vnet_hdr(skb); if (virtio_net_hdr_from_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev))) + virtio_is_little_endian(vi->vdev), false)) BUG(); if (vi->mergeable_rx_bufs) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 56436472ccc7..5209b5ed2a64 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, - bool little_endian) + bool little_endian, + bool has_data_valid) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ @@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); + } else if (has_data_valid && + skb->ip_summed == CHECKSUM_UNNECESSARY) { + hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b9e1a13b4ba3..3d555c79a7b5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1976,7 +1976,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, return -EINVAL; *len -= sizeof(vnet_hdr); - if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le())) + if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true)) return -EINVAL; return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); @@ -2237,7 +2237,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (po->has_vnet_hdr) { if (virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le())) { + vio_le(), true)) { spin_lock(&sk->sk_receive_queue.lock); goto drop_n_account; } -- cgit From 2c561b2b728ca4013e76d6439bde2c137503745e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Fri, 20 Jan 2017 14:33:55 +0800 Subject: r8152: fix rtl8152_post_reset function The rtl8152_post_reset() should sumbit rx urb and interrupt transfer, otherwise the rx wouldn't work and the linking change couldn't be detected. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f3b48ad90865..0e99af090734 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3545,12 +3545,14 @@ static int rtl8152_post_reset(struct usb_interface *intf) if (netif_carrier_ok(netdev)) { mutex_lock(&tp->control); tp->rtl_ops.enable(tp); + rtl_start_rx(tp); rtl8152_set_rx_mode(netdev); mutex_unlock(&tp->control); netif_wake_queue(netdev); } napi_enable(&tp->napi); + usb_submit_urb(tp->intr_urb, GFP_KERNEL); return 0; } -- cgit From e363116b90906f326c9cde5473b4b9a99ba476df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2017 07:57:42 -0800 Subject: ipv6: seg6_genl_set_tunsrc() must check kmemdup() return value seg6_genl_get_tunsrc() and set_tun_src() do not handle tun_src being possibly NULL, so we must check kmemdup() return value and abort if it is NULL Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Eric Dumazet Cc: David Lebrun Acked-by: David Lebrun Signed-off-by: David S. Miller --- net/ipv6/seg6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index b172d85c650a..a855eb325b03 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -176,6 +176,8 @@ static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) val = nla_data(info->attrs[SEG6_ATTR_DST]); t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + if (!t_new) + return -ENOMEM; mutex_lock(&sdata->lock); -- cgit From 0e73fc9a56f22f2eec4d2b2910c649f7af67b74d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Jan 2017 13:01:57 +0000 Subject: net: sctp: fix array overrun read on sctp_timer_tbl The comparison on the timeout can lead to an array overrun read on sctp_timer_tbl because of an off-by-one error. Fix this by using < instead of <= and also compare to the array size rather than SCTP_EVENT_TIMEOUT_MAX. Fixes CoverityScan CID#1397639 ("Out-of-bounds read") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/sctp/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 95d7b15dad21..e371a0d90068 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[] = { /* Lookup timer debug name. */ const char *sctp_tname(const sctp_subtype_t id) { - if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) + if (id.timeout < ARRAY_SIZE(sctp_timer_tbl)) return sctp_timer_tbl[id.timeout]; return "unknown_timer"; } -- cgit From 91e744653cb80554f3fdfd1d31c5ddf7b6169f37 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 Jan 2017 11:29:43 -0500 Subject: Revert "net: sctp: fix array overrun read on sctp_timer_tbl" This reverts commit 0e73fc9a56f22f2eec4d2b2910c649f7af67b74d. This fix wasn't correct, a better one is coming right up. Signed-off-by: David S. Miller --- net/sctp/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index e371a0d90068..95d7b15dad21 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[] = { /* Lookup timer debug name. */ const char *sctp_tname(const sctp_subtype_t id) { - if (id.timeout < ARRAY_SIZE(sctp_timer_tbl)) + if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) return sctp_timer_tbl[id.timeout]; return "unknown_timer"; } -- cgit From 0629a330cf55454962168dd3ee46fad53a39323e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2017 15:52:52 +0100 Subject: qed: avoid possible stack overflow in qed_ll2_acquire_connection struct qed_ll2_info is rather large, so putting it on the stack can cause an overflow, as this warning tries to tell us: drivers/net/ethernet/qlogic/qed/qed_ll2.c: In function 'qed_ll2_start': drivers/net/ethernet/qlogic/qed/qed_ll2.c:2159:1: error: the frame size of 1056 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] qed_ll2_start_ooo() already uses a dynamic allocation for the structure to work around that problem, and we could do the same in qed_ll2_start() as well as qed_roce_ll2_start(), but since the structure is only used to pass a couple of initialization values here, it seems nicer to replace it with a different structure. Lacking any idea for better naming, I'm adding 'struct qed_ll2_conn', which now contains all the initialization data, and this now simply gets copied into struct qed_ll2_info rather than assigning all members one by one. Signed-off-by: Arnd Bergmann Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 88 +++++++++++++----------------- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 24 ++++---- drivers/net/ethernet/qlogic/qed/qed_roce.c | 2 +- 3 files changed, 53 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 8e5cb7605b0f..873ce2cd76ba 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -297,7 +297,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) list_del(&p_pkt->list_entry); b_last_packet = list_empty(&p_tx->active_descq); list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); - if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) { struct qed_ooo_buffer *p_buffer; p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; tx_frag = p_pkt->bds_set[0].tx_frag; - if (p_ll2_conn->gsi_enable) + if (p_ll2_conn->conn.gsi_enable) qed_ll2b_release_tx_gsi_packet(p_hwfn, p_ll2_conn-> my_id, @@ -378,7 +378,7 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) spin_unlock_irqrestore(&p_tx->lock, flags); tx_frag = p_pkt->bds_set[0].tx_frag; - if (p_ll2_conn->gsi_enable) + if (p_ll2_conn->conn.gsi_enable) qed_ll2b_complete_tx_gsi_packet(p_hwfn, p_ll2_conn->my_id, p_pkt->cookie, @@ -550,7 +550,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); - if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) { struct qed_ooo_buffer *p_buffer; p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@ -738,7 +738,7 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn, rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1, p_buffer->vlan, bd_flags, l4_hdr_offset_w, - p_ll2_conn->tx_dest, 0, + p_ll2_conn->conn.tx_dest, 0, first_frag, p_buffer->packet_length, p_buffer, true); @@ -858,7 +858,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn, u16 buf_idx; int rc = 0; - if (p_ll2_info->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_info->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return rc; if (!rx_num_ooo_buffers) @@ -901,7 +901,7 @@ static void qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn) { - if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return; qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@ -913,7 +913,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn, { struct qed_ooo_buffer *p_buffer; - if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type != QED_LL2_TYPE_ISCSI_OOO) return; qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@ -945,23 +945,19 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev, { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; - struct qed_ll2_info *ll2_info; + struct qed_ll2_conn ll2_info; int rc; - ll2_info = kzalloc(sizeof(*ll2_info), GFP_KERNEL); - if (!ll2_info) - return -ENOMEM; - ll2_info->conn_type = QED_LL2_TYPE_ISCSI_OOO; - ll2_info->mtu = params->mtu; - ll2_info->rx_drop_ttl0_flg = params->drop_ttl0_packets; - ll2_info->rx_vlan_removal_en = params->rx_vlan_stripping; - ll2_info->tx_tc = OOO_LB_TC; - ll2_info->tx_dest = CORE_TX_DEST_LB; - - rc = qed_ll2_acquire_connection(hwfn, ll2_info, + ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO; + ll2_info.mtu = params->mtu; + ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; + ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; + ll2_info.tx_tc = OOO_LB_TC; + ll2_info.tx_dest = CORE_TX_DEST_LB; + + rc = qed_ll2_acquire_connection(hwfn, &ll2_info, QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, handle); - kfree(ll2_info); if (rc) { DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n"); goto out; @@ -1006,7 +1002,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, u8 action_on_error) { - enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type; struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; struct core_rx_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@ -1032,7 +1028,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->sb_index = p_rx->rx_sb_index; p_ramrod->complete_event_flg = 1; - p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu); DMA_REGPAIR_LE(p_ramrod->bd_base, p_rx->rxq_chain.p_phys_addr); cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain); @@ -1040,8 +1036,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, qed_chain_get_pbl_phys(&p_rx->rcq_chain)); - p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg; - p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en; + p_ramrod->drop_ttl0_flg = p_ll2_conn->conn.rx_drop_ttl0_flg; + p_ramrod->inner_vlan_removal_en = p_ll2_conn->conn.rx_vlan_removal_en; p_ramrod->queue_id = p_ll2_conn->queue_id; p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0 : 1; @@ -1056,14 +1052,14 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, } p_ramrod->action_on_error.error_type = action_on_error; - p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn) { - enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn.conn_type; struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; struct core_tx_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@ -1075,7 +1071,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; - if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) p_ll2_conn->tx_stats_en = 0; else p_ll2_conn->tx_stats_en = 1; @@ -1096,7 +1092,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); p_ramrod->sb_index = p_tx->tx_sb_index; - p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->conn.mtu); p_ramrod->stats_en = p_ll2_conn->tx_stats_en; p_ramrod->stats_id = p_ll2_conn->tx_stats_id; @@ -1106,7 +1102,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->pbl_size = cpu_to_le16(pbl_size); memset(&pq_params, 0, sizeof(pq_params)); - pq_params.core.tc = p_ll2_conn->tx_tc; + pq_params.core.tc = p_ll2_conn->conn.tx_tc; pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params); p_ramrod->qm_pq_id = cpu_to_le16(pq_id); @@ -1123,7 +1119,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); } - p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + p_ramrod->gsi_offload_flag = p_ll2_conn->conn.gsi_enable; return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -1224,7 +1220,7 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", - p_ll2_info->conn_type, rx_num_desc); + p_ll2_info->conn.conn_type, rx_num_desc); out: return rc; @@ -1262,7 +1258,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", - p_ll2_info->conn_type, tx_num_desc); + p_ll2_info->conn.conn_type, tx_num_desc); out: if (rc) @@ -1273,7 +1269,7 @@ out: } int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, - struct qed_ll2_info *p_params, + struct qed_ll2_conn *p_params, u16 rx_num_desc, u16 tx_num_desc, u8 *p_connection_handle) @@ -1302,15 +1298,7 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, if (!p_ll2_info) return -EBUSY; - p_ll2_info->conn_type = p_params->conn_type; - p_ll2_info->mtu = p_params->mtu; - p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg; - p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en; - p_ll2_info->tx_tc = p_params->tx_tc; - p_ll2_info->tx_dest = p_params->tx_dest; - p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; - p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; - p_ll2_info->gsi_enable = p_params->gsi_enable; + p_ll2_info->conn = *p_params; rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); if (rc) @@ -1371,9 +1359,9 @@ static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn, SET_FIELD(action_on_error, CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, - p_ll2_conn->ai_err_packet_too_big); + p_ll2_conn->conn.ai_err_packet_too_big); SET_FIELD(action_on_error, - CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf); + CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->conn.ai_err_no_buf); return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); } @@ -1600,7 +1588,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", p_ll2->queue_id, p_ll2->cid, - p_ll2->conn_type, + p_ll2->conn.conn_type, prod_idx, first_frag_len, num_of_bds, @@ -1676,7 +1664,7 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", p_ll2_conn->queue_id, - p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod); + p_ll2_conn->cid, p_ll2_conn->conn.conn_type, db_msg.spq_prod); } int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, @@ -1817,7 +1805,7 @@ int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) qed_ll2_rxq_flush(p_hwfn, connection_handle); } - if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->conn.conn_type == QED_LL2_TYPE_ISCSI_OOO) qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); return rc; @@ -1993,7 +1981,7 @@ static void qed_ll2_register_cb_ops(struct qed_dev *cdev, static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) { - struct qed_ll2_info ll2_info; + struct qed_ll2_conn ll2_info; struct qed_ll2_buffer *buffer, *tmp_buffer; enum qed_ll2_conn_type conn_type; struct qed_ptt *p_ptt; @@ -2041,6 +2029,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) /* Prepare the temporary ll2 information */ memset(&ll2_info, 0, sizeof(ll2_info)); + ll2_info.conn_type = conn_type; ll2_info.mtu = params->mtu; ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; @@ -2120,7 +2109,6 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) } ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); - return 0; release_terminate_all: diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 6625a3ae5a33..31417928b635 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -112,15 +112,8 @@ struct qed_ll2_tx_queue { bool b_completing_packet; }; -struct qed_ll2_info { - /* Lock protecting the state of LL2 */ - struct mutex mutex; +struct qed_ll2_conn { enum qed_ll2_conn_type conn_type; - u32 cid; - u8 my_id; - u8 queue_id; - u8 tx_stats_id; - bool b_active; u16 mtu; u8 rx_drop_ttl0_flg; u8 rx_vlan_removal_en; @@ -128,10 +121,21 @@ struct qed_ll2_info { enum core_tx_dest tx_dest; enum core_error_handle ai_err_packet_too_big; enum core_error_handle ai_err_no_buf; + u8 gsi_enable; +}; + +struct qed_ll2_info { + /* Lock protecting the state of LL2 */ + struct mutex mutex; + struct qed_ll2_conn conn; + u32 cid; + u8 my_id; + u8 queue_id; + u8 tx_stats_id; + bool b_active; u8 tx_stats_en; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; - u8 gsi_enable; }; /** @@ -149,7 +153,7 @@ struct qed_ll2_info { * @return 0 on success, failure otherwise */ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, - struct qed_ll2_info *p_params, + struct qed_ll2_conn *p_params, u16 rx_num_desc, u16 tx_num_desc, u8 *p_connection_handle); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 2a16547c8966..2dbdb3298991 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -2632,7 +2632,7 @@ static int qed_roce_ll2_start(struct qed_dev *cdev, { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_roce_ll2_info *roce_ll2; - struct qed_ll2_info ll2_params; + struct qed_ll2_conn ll2_params; int rc; if (!params) { -- cgit From df384d435a5c034c735df3d9ea87a03172c59b56 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2017 15:52:53 +0100 Subject: bcm63xx_enet: avoid uninitialized variable warning gcc-7 and probably earlier versions get confused by this function and print a harmless warning: drivers/net/ethernet/broadcom/bcm63xx_enet.c: In function 'bcm_enet_open': drivers/net/ethernet/broadcom/bcm63xx_enet.c:1130:3: error: 'phydev' may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an initialization for the 'phydev' variable when it is unused and changes the check to test for that NULL pointer to make it clear that we always pass a valid pointer here. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 3b14d5144228..c483618b57bd 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -913,6 +913,8 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; + } else { + phydev = NULL; } /* mask all interrupts and request them */ @@ -1083,7 +1085,7 @@ static int bcm_enet_open(struct net_device *dev) enet_dmac_writel(priv, priv->dma_chan_int_mask, ENETDMAC_IRMASK, priv->tx_chan); - if (priv->has_phy) + if (phydev) phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1126,7 +1128,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - if (priv->has_phy) + if (phydev) phy_disconnect(phydev); return ret; -- cgit From cdb749cef16bceb74950fc8668f2632ff7cac9aa Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 18 Jan 2017 17:19:00 +0100 Subject: bpf: fix samples xdp_tx_iptunnel and tc_l2_redirect with fake KBUILD_MODNAME Fix build errors for samples/bpf xdp_tx_iptunnel and tc_l2_redirect, when dynamic debugging is enabled (CONFIG_DYNAMIC_DEBUG) by defining a fake KBUILD_MODNAME. Just like Daniel Borkmann fixed other samples/bpf in commit 96a8eb1eeed2 ("bpf: fix samples to add fake KBUILD_MODNAME"). Fixes: 12d8bb64e3f6 ("bpf: xdp: Add XDP example for head adjustment") Fixes: 90e02896f1a4 ("bpf: Add test for bpf_redirect to ipip/ip6tnl") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/tc_l2_redirect_kern.c | 1 + samples/bpf/xdp_tx_iptunnel_kern.c | 1 + 2 files changed, 2 insertions(+) diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c index 92a44729dbe4..7ef2a12b25b2 100644 --- a/samples/bpf/tc_l2_redirect_kern.c +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c index 85c38ecd3a2d..0f4f6e8c8611 100644 --- a/samples/bpf/xdp_tx_iptunnel_kern.c +++ b/samples/bpf/xdp_tx_iptunnel_kern.c @@ -8,6 +8,7 @@ * encapsulating the incoming packet in an IPv4/v6 header * and then XDP_TX it out. */ +#define KBUILD_MODNAME "foo" #include #include #include -- cgit From e048fc50d7bde23136e098e04a324d7e3404408d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 23:03:08 -0800 Subject: net/mlx5e: Do not recycle pages from emergency reserve A driver using dev_alloc_page() must not reuse a page allocated from emergency memory reserve. Otherwise all packets using this page will be immediately dropped, unless for very specific sockets having SOCK_MEMALLOC bit set. This issue might be hard to debug, because only a fraction of received packets would be dropped. Fixes: 4415a0319f92 ("net/mlx5e: Implement RX mapped page cache for page recycle") Signed-off-by: Eric Dumazet Cc: Tariq Toukan Cc: Saeed Mahameed Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0e2fb3ed1790..06d5e6fecb0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -193,6 +193,9 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } + if (unlikely(page_is_pfmemalloc(dma_info->page))) + return false; + cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; return true; -- cgit From 90c311b0eeead647b708a723dbdde1eda3dcad05 Mon Sep 17 00:00:00 2001 From: Vineeth Remanan Pillai Date: Thu, 19 Jan 2017 08:35:39 -0800 Subject: xen-netfront: Fix Rx stall during network stress and OOM During an OOM scenario, request slots could not be created as skb allocation fails. So the netback cannot pass in packets and netfront wrongly assumes that there is no more work to be done and it disables polling. This causes Rx to stall. The issue is with the retry logic which schedules the timer if the created slots are less than NET_RX_SLOTS_MIN. The count of new request slots to be pushed are calculated as a difference between new req_prod and rsp_cons which could be more than the actual slots, if there are unconsumed responses. The fix is to calculate the count of newly created slots as the difference between new req_prod and old req_prod. Signed-off-by: Vineeth Remanan Pillai Reviewed-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a479cd99911d..8315fe73ecd0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -321,7 +321,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx.req_prod_pvt = req_prod; /* Not enough requests? Try again later. */ - if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) { + if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) { mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); return; } -- cgit From b6677449dff674cf5b81429b11d5c7f358852ef9 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 20 Jan 2017 18:12:17 +0100 Subject: bridge: netlink: call br_changelink() during br_dev_newlink() Any bridge options specified during link creation (e.g. ip link add) are ignored as br_dev_newlink() does not process them. Use br_changelink() to do it. Fixes: 133235161721 ("bridge: implement rtnl_link_ops->changelink") Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 71c7453268c1..7109b389ea58 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); -} - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@ -1115,6 +1101,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data); + if (err) + return err; + + return register_netdevice(dev); +} + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ -- cgit From 63d762b88cb5510f2bfdb5112ced18cde867ae61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Jan 2017 09:33:34 +0300 Subject: platform/x86: mlx-platform: free first dev on error There is an off-by-one error so we don't unregister priv->pdev_mux[0]. Also it's slightly simpler as a while loop instead of a for loop. Fixes: 58cbbee2391c ("x86/platform/mellanox: Introduce support for Mellanox systems platform") Signed-off-by: Dan Carpenter Acked-by: Vadim Pasternak Signed-off-by: Andy Shevchenko --- drivers/platform/x86/mlx-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 97b4c3a219c0..25f15df5c2d7 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -326,7 +326,7 @@ static int __init mlxplat_init(void) return 0; fail_platform_mux_register: - for (i--; i > 0 ; i--) + while (--i >= 0) platform_device_unregister(priv->pdev_mux[i]); platform_device_unregister(priv->pdev_i2c); fail_alloc: -- cgit From 44e6861646748a21b55725adc0780342f4440066 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jan 2017 16:28:47 +0100 Subject: platform/x86: surface3-wmi: Shut up unused-function warning The newly added driver guards its "resume" callback with an warning in some configurations: drivers/platform/x86/surface3-wmi.c:248:12: error: 's3_wmi_resume' defined but not used [-Werror=unused-function] Using a __maybe_unused annotation without an #ifdef avoids the mistake more reliably. Fixes: 3dda3b3798f9 ("platform/x86: Add custom surface3 platform device for controlling LID") Signed-off-by: Arnd Bergmann Reviewed-by: Benjamin Tissoires Signed-off-by: Darren Hart --- drivers/platform/x86/surface3-wmi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/platform/x86/surface3-wmi.c b/drivers/platform/x86/surface3-wmi.c index cbf4d83a7271..6ac1d1349d7e 100644 --- a/drivers/platform/x86/surface3-wmi.c +++ b/drivers/platform/x86/surface3-wmi.c @@ -244,13 +244,11 @@ static int s3_wmi_remove(struct platform_device *device) return 0; } -#ifdef CONFIG_PM -static int s3_wmi_resume(struct device *dev) +static int __maybe_unused s3_wmi_resume(struct device *dev) { s3_wmi_send_lid_state(); return 0; } -#endif static SIMPLE_DEV_PM_OPS(s3_wmi_pm, NULL, s3_wmi_resume); static struct platform_driver s3_wmi_driver = { -- cgit From e95ac4574b23a5fd8f5c5f2c19ef69ac15b7252c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 18 Jan 2017 09:13:46 +0100 Subject: platform/x86: surface3-wmi: fix uninitialized symbol The patch 3dda3b3798f9: "platform/x86: Add custom surface3 platform device for controlling LID" from Nov 25, 2016, leads to the following static checker warning: drivers/platform/x86/surface3-wmi.c:168 s3_wmi_check_platform_device() error: uninitialized symbol 'ts_adev'. Reported-by: Dan Carpenter Signed-off-by: Benjamin Tissoires Signed-off-by: Andy Shevchenko --- drivers/platform/x86/surface3-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/surface3-wmi.c b/drivers/platform/x86/surface3-wmi.c index 6ac1d1349d7e..25b176996cb7 100644 --- a/drivers/platform/x86/surface3-wmi.c +++ b/drivers/platform/x86/surface3-wmi.c @@ -139,7 +139,7 @@ static acpi_status s3_wmi_attach_spi_device(acpi_handle handle, static int s3_wmi_check_platform_device(struct device *dev, void *data) { - struct acpi_device *adev, *ts_adev; + struct acpi_device *adev, *ts_adev = NULL; acpi_handle handle; acpi_status status; -- cgit From 5a00b6c2438460b870a451f14593fc40d3c7edf6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 18:39:40 +0200 Subject: platform/x86: intel_mid_powerbtn: Set IRQ_ONESHOT The commit 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") starts refusing misconfigured interrupt handlers. This makes intel_mid_powerbtn not working anymore. Add a mandatory flag to a threaded IRQ request in the driver. Fixes: 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_mid_powerbtn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 1fc0de870ff8..361770568ad0 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -77,7 +77,7 @@ static int mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0, + error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_ONESHOT, DRIVER_NAME, input); if (error) { dev_err(&pdev->dev, "Unable to request irq %d for mfld power" -- cgit From 773c7220e22d193e5667c352fcbf8d47eefc817f Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 13 Jan 2017 12:48:06 -0500 Subject: scsi: virtio_scsi: Reject commands when virtqueue is broken In the case of a graceful set of detaches, where the virtio-scsi-ccw disk is removed from the guest prior to the controller, the guest behaves quite normally. Specifically, the detach gets us into sd_sync_cache to issue a Synchronize Cache(10) command, which immediately fails (and is retried a couple of times) because the device has been removed. Later, the removal of the controller sees two CRWs presented, but there's no further indication of the removal from the guest viewpoint. [ 17.217458] sd 0:0:0:0: [sda] Synchronizing SCSI cache [ 17.219257] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK [ 21.449400] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 [ 21.449406] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 However, on s390, the SCSI disks can be removed "by surprise" when an entire controller (host) is removed and all associated disks are removed via the loop in scsi_forget_host. The same call to sd_sync_cache is made, but because the controller has already been removed, the Synchronize Cache(10) command is neither issued (and then failed) nor rejected. That the I/O isn't returned means the guest cannot have other devices added nor removed, and other tasks (such as shutdown or reboot) issued by the guest will not complete either. The virtio ring has already been marked as broken (via virtio_break_device in virtio_ccw_remove), but we still attempt to queue the command only to have it remain there. The calling sequence provides a bit of distinction for us: virtscsi_queuecommand() -> virtscsi_kick_cmd() -> virtscsi_add_cmd() -> virtqueue_add_sgs() -> virtqueue_add() if success return 0 elseif vq->broken or vring_mapping_error() return -EIO else return -ENOSPC A return of ENOSPC is generally a temporary condition, so returning "host busy" from virtscsi_queuecommand makes sense here, to have it redriven in a moment or two. But the EIO return code is more of a permanent error and so it would be wise to return the I/O itself and allow the calling thread to finish gracefully. The result is these four kernel messages in the guest (the fourth one does not occur prior to this patch): [ 22.921562] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 [ 22.921580] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 [ 22.921978] sd 0:0:0:0: [sda] Synchronizing SCSI cache [ 22.921993] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK I opted to fill in the same response data that is returned from the more graceful device detach, where the disk device is removed prior to the controller device. Signed-off-by: Eric Farman Reviewed-by: Fam Zheng Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index ec91bd07f00a..c680d7641311 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -534,7 +534,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, { struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); + unsigned long flags; int req_size; + int ret; BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); @@ -562,8 +564,15 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, req_size = sizeof(cmd->req.cmd); } - if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0) + ret = virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)); + if (ret == -EIO) { + cmd->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + spin_lock_irqsave(&req_vq->vq_lock, flags); + virtscsi_complete_cmd(vscsi, cmd); + spin_unlock_irqrestore(&req_vq->vq_lock, flags); + } else if (ret != 0) { return SCSI_MLQUEUE_HOST_BUSY; + } return 0; } -- cgit From 92549cdc288f47f3a98cf80ac5890c91f5876a06 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jan 2017 14:22:24 -0800 Subject: iwlwifi: fix kernel crash when unregistering thermal zone A recent firmware change seems to have enabled thermal zones on the iwlwifi driver. Unfortunately, my device fails when registering the thermal zone. This doesn't stop the driver from attempting to unregister the thermal zone at unload time, triggering a NULL pointer deference in strlen() off the thermal_zone_device_unregister() path. Don't unregister if name is NULL, for that case we failed registering. Do the same for the cooling zone. Signed-off-by: Jens Axboe Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 63a051be832e..bec7d9c46087 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -843,8 +843,10 @@ static void iwl_mvm_thermal_zone_unregister(struct iwl_mvm *mvm) return; IWL_DEBUG_TEMP(mvm, "Thermal zone device unregister\n"); - thermal_zone_device_unregister(mvm->tz_device.tzone); - mvm->tz_device.tzone = NULL; + if (mvm->tz_device.tzone) { + thermal_zone_device_unregister(mvm->tz_device.tzone); + mvm->tz_device.tzone = NULL; + } } static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm) @@ -853,8 +855,10 @@ static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm) return; IWL_DEBUG_TEMP(mvm, "Cooling device unregister\n"); - thermal_cooling_device_unregister(mvm->cooling_dev.cdev); - mvm->cooling_dev.cdev = NULL; + if (mvm->cooling_dev.cdev) { + thermal_cooling_device_unregister(mvm->cooling_dev.cdev); + mvm->cooling_dev.cdev = NULL; + } } #endif /* CONFIG_THERMAL */ -- cgit From a5badd1e97e6caeca78ad74191f12fc923c403a8 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 14 Jan 2017 19:51:52 -0800 Subject: iio: health: afe4403: retrieve a valid iio_dev in suspend/resume The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Replace it with spi functions to get the correct iio_dev. Signed-off-by: Alison Schofield Acked-by: Andrew F. Davis Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/health/afe4403.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 9a081465c42f..6bb23a49e81e 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -422,7 +422,7 @@ MODULE_DEVICE_TABLE(of, afe4403_of_match); static int __maybe_unused afe4403_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; @@ -443,7 +443,7 @@ static int __maybe_unused afe4403_suspend(struct device *dev) static int __maybe_unused afe4403_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; -- cgit From 802ecfc113df1e15af1d028427cbbe785ae9cc4a Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 14 Jan 2017 19:52:50 -0800 Subject: iio: health: afe4404: retrieve a valid iio_dev in suspend/resume The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Replace it with i2c functions to get the correct iio_dev. Signed-off-by: Alison Schofield Acked-by: Andrew F. Davis Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/health/afe4404.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 45266404f7e3..964f5231a831 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -428,7 +428,7 @@ MODULE_DEVICE_TABLE(of, afe4404_of_match); static int __maybe_unused afe4404_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; @@ -449,7 +449,7 @@ static int __maybe_unused afe4404_suspend(struct device *dev) static int __maybe_unused afe4404_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; -- cgit From 828f84ee8f84710ea1818b3565add268bcb824c8 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Mon, 16 Jan 2017 18:04:18 -0800 Subject: iio: health: max30100: fixed parenthesis around FIFO count check FIFO was being read every sample after the "almost full" state was reached. This was due to an incorrect placement of the parenthesis in the while condition check. Note - the fixes tag is not actually correct, but the fix in this patch would also be needed for it to function correctly so we'll go with that one. Backports should pick up both. Signed-off-by: Matt Ranostay Fixes: b74fccad7 ("iio: health: max30100: correct FIFO check condition") Cc: Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron --- drivers/iio/health/max30100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 90ab8a2d2846..183c14329d6e 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -238,7 +238,7 @@ static irqreturn_t max30100_interrupt_handler(int irq, void *private) mutex_lock(&data->lock); - while (cnt || (cnt = max30100_fifo_count(data) > 0)) { + while (cnt || (cnt = max30100_fifo_count(data)) > 0) { ret = max30100_read_measurement(data); if (ret) break; -- cgit From d1aaf20ee655888c227d5137b7a63551f8d15416 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 16 Jan 2017 11:27:52 -0800 Subject: iio: adc: palmas_gpadc: retrieve a valid iio_dev in suspend/resume The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Use dev_get_drvdata() for a platform device to get the correct iio_dev. Signed-off-by: Alison Schofield Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/palmas_gpadc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index 2bbf0c521beb..7d61b566e148 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -775,7 +775,7 @@ static int palmas_adc_wakeup_reset(struct palmas_gpadc *adc) static int palmas_gpadc_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; @@ -798,7 +798,7 @@ static int palmas_gpadc_suspend(struct device *dev) static int palmas_gpadc_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; -- cgit From cfee5d63767b2e7997c1f36420d008abbe61565c Mon Sep 17 00:00:00 2001 From: Zach Ploskey Date: Sun, 22 Jan 2017 00:47:19 -0800 Subject: platform/x86: ideapad-laptop: handle ACPI event 1 On Ideapad laptops, ACPI event 1 is currently not handled. Many models log "ideapad_laptop: Unknown event: 1" every 20 seconds or so while running on battery power. Some convertible laptops receive this event when switching in and out of tablet mode. This adds and additional case for event 1 in ideapad_acpi_notify to call ideapad_input_report(priv, vpc_bit), so that the event is reported to userspace and we avoid unnecessary logging. Fixes bug #107481 (https://bugzilla.kernel.org/show_bug.cgi?id=107481) Fixes bug #65751 (https://bugzilla.kernel.org/show_bug.cgi?id=65751) Signed-off-by: Zach Ploskey Signed-off-by: Andy Shevchenko --- drivers/platform/x86/ideapad-laptop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 410741acb3c9..f46ece2ce3c4 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -813,6 +813,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) case 8: case 7: case 6: + case 1: ideapad_input_report(priv, vpc_bit); break; case 5: -- cgit From 5c113b5e0082e90d2e1c7b12e96a7b8cf0623e27 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Wed, 18 Jan 2017 21:50:39 +0000 Subject: iio: dht11: Use usleep_range instead of msleep for start signal The DHT22 (AM2302) datasheet specifies that the LOW start pulse should not exceed 20ms. However, observations with an oscilloscope of an RPi Model 2B (rev 1.1) communicating with a DHT22 sensor showed that the driver was consistently sending start pulses longer than 20ms: Kernel 4.7.10-v7+ (n=132): Minimum pulse length: 20.20ms Maximum: 29.84ms Mean: 24.96ms StDev: 2.82ms Sensor response rate: 100% Read success rate: 76% On kernel 4.8, the start pulse was so long that the sensor would not even respond 97% of the time: Kernel 4.8.16-v7+ (n=100): Minimum pulse length: 30.4ms Maximum: 74.4ms Mean: 39.3ms StDev: 10.2ms Sensor response rate: 3% Read success rate: 3% The driver would return ETIMEDOUT and write log messages like this: [ 51.430987] dht11 dht11@0: Only 1 signal edges detected [ 66.311019] dht11 dht11@0: Only 0 signal edges detected Replacing msleep(18) with usleep_range(18000, 20000) made the pulse length sane again and restored responsiveness: Kernel 4.8.16-v7+ with usleep_range (n=123): Minimum pulse length: 18.16ms Maximum: 20.20ms Mean: 19.85ms StDev: 0.51ms Sensor response rate: 100% Read success rate: 84% Cc: stable@vger.kernel.org Signed-off-by: John Brooks Reviewed-by: Harald Geyer Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/dht11.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index 9c47bc98f3ac..2a22ad920333 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -71,7 +71,8 @@ * a) select an implementation using busy loop polling on those systems * b) use the checksum to do some probabilistic decoding */ -#define DHT11_START_TRANSMISSION 18 /* ms */ +#define DHT11_START_TRANSMISSION_MIN 18000 /* us */ +#define DHT11_START_TRANSMISSION_MAX 20000 /* us */ #define DHT11_MIN_TIMERES 34000 /* ns */ #define DHT11_THRESHOLD 49000 /* ns */ #define DHT11_AMBIG_LOW 23000 /* ns */ @@ -228,7 +229,8 @@ static int dht11_read_raw(struct iio_dev *iio_dev, ret = gpio_direction_output(dht11->gpio, 0); if (ret) goto err; - msleep(DHT11_START_TRANSMISSION); + usleep_range(DHT11_START_TRANSMISSION_MIN, + DHT11_START_TRANSMISSION_MAX); ret = gpio_direction_input(dht11->gpio); if (ret) goto err; -- cgit From 4eccbfc36186926b570310bfbd44f4216cd05c63 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Fri, 20 Jan 2017 12:14:03 -0600 Subject: amd-xgbe: Add a hardware quirk for register definitions A newer version of the hardware is using the same PCI ids for the network device but has altered register definitions for determining the window settings for the indirect PCS access. Add support to check for this hardware and if found use the new register values. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 ++ drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 4 ++-- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 15 ++++++++++++++- drivers/net/ethernet/amd/xgbe/xgbe.h | 2 ++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 5b7ba25e0065..8a280e7d66bd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -891,6 +891,8 @@ #define PCS_V1_WINDOW_SELECT 0x03fc #define PCS_V2_WINDOW_DEF 0x9060 #define PCS_V2_WINDOW_SELECT 0x9064 +#define PCS_V2_RV_WINDOW_DEF 0x1060 +#define PCS_V2_RV_WINDOW_SELECT 0x1064 /* PCS register entry bit positions and sizes */ #define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index aaf0350076a9..c8e8a4a271ec 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1151,7 +1151,7 @@ static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); mmd_data = XPCS16_IOREAD(pdata, offset); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); @@ -1183,7 +1183,7 @@ static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); XPCS16_IOWRITE(pdata, offset, mmd_data); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index e76b7f65b805..c2730f15bd8b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -265,6 +265,7 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct xgbe_prv_data *pdata; struct device *dev = &pdev->dev; void __iomem * const *iomap_table; + struct pci_dev *rdev; unsigned int ma_lo, ma_hi; unsigned int reg; int bar_mask; @@ -326,8 +327,20 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (netif_msg_probe(pdata)) dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + /* Set the PCS indirect addressing definition registers */ + rdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); + if (rdev && + (rdev->vendor == PCI_VENDOR_ID_AMD) && (rdev->device == 0x15d0)) { + pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; + } else { + pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; + } + pci_dev_put(rdev); + /* Configure the PCS indirect addressing support */ - reg = XPCS32_IOREAD(pdata, PCS_V2_WINDOW_DEF); + reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg); pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); pdata->xpcs_window <<= 6; pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index f52a9bd05bac..00108815b55e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -955,6 +955,8 @@ struct xgbe_prv_data { /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; + unsigned int xpcs_window_def_reg; + unsigned int xpcs_window_sel_reg; unsigned int xpcs_window; unsigned int xpcs_window_size; unsigned int xpcs_window_mask; -- cgit From 738f7f647371ff4cfc9646c99dba5b58ad142db3 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Fri, 20 Jan 2017 12:14:13 -0600 Subject: amd-xgbe: Check xgbe_init() return code The xgbe_init() routine returns a return code indicating success or failure, but the return code is not checked. Add code to xgbe_init() to issue a message when failures are seen and add code to check the xgbe_init() return code. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 4 +++- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index c8e8a4a271ec..a7d16db5c4b2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -3407,8 +3407,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata) /* Flush Tx queues */ ret = xgbe_flush_tx_queues(pdata); - if (ret) + if (ret) { + netdev_err(pdata->netdev, "error flushing TX queues\n"); return ret; + } /* * Initialize DMA related features diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 9943629fcbf9..1c87cc204075 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1070,7 +1070,9 @@ static int xgbe_start(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_start\n"); - hw_if->init(pdata); + ret = hw_if->init(pdata); + if (ret) + return ret; xgbe_napi_enable(pdata, 1); -- cgit From 7f1931b35f0909695543a8c12f72ccd2d20ff241 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jan 2017 13:19:05 +0100 Subject: ARM: imx: hide unused variable in #ifdef A bugfix added a new local variable that is only used inside of an #ifdef section, and unused if CONFIG_PERF_EVENTS is disabled: arch/arm/mach-imx/mmdc.c:63:25: warning: 'cpuhp_mmdc_state' defined but not used [-Wunused-variable] This moves the variable down inside that same ifdef. Fixes: a051f220d6b9 ("ARM/imx/mmcd: Fix broken cpu hotplug handling") Signed-off-by: Arnd Bergmann Acked-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 699157759120..c03bf28d8bbc 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -60,7 +60,6 @@ #define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu) -static enum cpuhp_state cpuhp_mmdc_state; static int ddr_type; struct fsl_mmdc_devtype_data { @@ -82,6 +81,7 @@ static const struct of_device_id imx_mmdc_dt_ids[] = { #ifdef CONFIG_PERF_EVENTS +static enum cpuhp_state cpuhp_mmdc_state; static DEFINE_IDA(mmdc_ida); PMU_EVENT_ATTR_STRING(total-cycles, mmdc_pmu_total_cycles, "event=0x00") -- cgit From c26665ab5c49ad3e142e0f054ca3204f259ba09c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:40 +0100 Subject: x86/microcode/intel: Drop stashed AP patch pointer optimization This was meant to save us the scanning of the microcode containter in the initrd since the first AP had already done that but it can also hurt us: Imagine a single hyperthreaded CPU (Intel(R) Atom(TM) CPU N270, for example) which updates the microcode on the BSP but since the microcode engine is shared between the two threads, the update on CPU1 doesn't happen because it has already happened on CPU0 and we don't find a newer microcode revision on CPU1. Which doesn't set the intel_ucode_patch pointer and at initrd jettisoning time we don't save the microcode patch for later application. Now, when we suspend to RAM, the loaded microcode gets cleared so we need to reload but there's no patch saved in the cache. Removing the optimization fixes this issue and all is fine and dandy. Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading") Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/intel.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 3f329b74e040..8325d8a09ab0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -41,7 +41,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; -/* Current microcode patch used in early patching */ +/* Current microcode patch used in early patching on the APs. */ struct microcode_intel *intel_ucode_patch; static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, @@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void) struct ucode_cpu_info uci; struct cpio_data cp; - /* - * AP loading didn't find any microcode patch, no need to save anything. - */ - if (!intel_ucode_patch || IS_ERR(intel_ucode_patch)) - return 0; - if (!load_builtin_intel_microcode(&cp)) cp = find_microcode_in_initrd(ucode_path, false); @@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void) return 0; } - /* * @res_patch, output: a pointer to the patch we found. */ -- cgit From e9748e0364fe82dc037d22900ff13a62d04518bf Mon Sep 17 00:00:00 2001 From: Ziyuan Xu Date: Tue, 17 Jan 2017 09:22:56 +0800 Subject: mmc: dw_mmc: force setup bus if active slots exist It's necessary to setup bus if any slots are present. - update clock after ctrl reset - if the host has genpd node, we can guarantee the clock is available before starting request. Otherwies, the clock register is reset once power off the pd, and host can't output the active clock during communication. Fixes: e9ed8835e990 ("mmc: dw_mmc: add runtime PM callback") Fixes: df9bcc2bc0a1 ("mmc: dw_mmc: add missing codes for runtime resume") cc: Reported-by: Randy Li Reported-by: S. Gilles Signed-off-by: Ziyuan Xu Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index b44306b886cb..73db08558e4d 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -3354,10 +3354,11 @@ int dw_mci_runtime_resume(struct device *dev) if (!slot) continue; - if (slot->mmc->pm_flags & MMC_PM_KEEP_POWER) { + if (slot->mmc->pm_flags & MMC_PM_KEEP_POWER) dw_mci_set_ios(slot->mmc, &slot->mmc->ios); - dw_mci_setup_bus(slot, true); - } + + /* Force setup bus to guarantee available clock output */ + dw_mci_setup_bus(slot, true); } /* Now that slots are all setup, we can enable card detect */ -- cgit From 4c833368f0bf748d4147bf301b1f95bc8eccb3c0 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 22 Jan 2017 16:50:23 +0800 Subject: x86/fpu: Set the xcomp_bv when we fake up a XSAVES area I got the following calltrace on a Apollo Lake SoC with 32-bit kernel: WARNING: CPU: 2 PID: 261 at arch/x86/include/asm/fpu/internal.h:363 fpu__restore+0x1f5/0x260 [...] Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLIRVPA.X64.0138.B35.1608091058 08/09/2016 Call Trace: dump_stack() __warn() ? fpu__restore() warn_slowpath_null() fpu__restore() __fpu__restore_sig() fpu__restore_sig() restore_sigcontext.isra.9() sys_sigreturn() do_int80_syscall_32() entry_INT80_32() The reason is that a #GP occurs when executing XRSTORS. The root cause is that we forget to set the xcomp_bv when we fake up the XSAVES area in the copyin_to_xsaves() function. Signed-off-by: Kevin Hao Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/1485075023-30161-1-git-send-email-haokexin@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/fpu/xstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..e287b9075527 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1070,6 +1070,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } -- cgit From 7f59b319111bbc3a5f32730c8a43b201e9522f52 Mon Sep 17 00:00:00 2001 From: Sébastien Szymanski Date: Wed, 18 Jan 2017 11:09:51 +0100 Subject: ARM: dts: imx6dl: fix GPIO4 range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPIO4_11 is on pin 152(MX6DL_PAD_KEY_ROW2) and not on pin 151(MX6DL_PAD_KEY_ROW1). I found the error while booting a mainline kernel on APF6S SoM and noticed the following message: [ 2.609337] imx6dl-pinctrl 20e0000.iomuxc: pin MX6DL_PAD_KEY_ROW1 already requested by 20a8000.gpio:105; cannot claim for 20a8000.gpio:107 [ 2.621884] imx6dl-pinctrl 20e0000.iomuxc: pin-151 (20a8000.gpio:107) status -22 [ 2.629303] spi_imx 2008000.ecspi: Can't get CS GPIO 107 With this patch, the message is gone and spi_imx driver probes correctly. Fixes: bb728d662bed ("ARM: dts: add gpio-ranges property to iMX GPIO controllers") Signed-off-by: Sébastien Szymanski Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6dl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi index 1ade1951e620..7aa120fbdc71 100644 --- a/arch/arm/boot/dts/imx6dl.dtsi +++ b/arch/arm/boot/dts/imx6dl.dtsi @@ -137,7 +137,7 @@ &gpio4 { gpio-ranges = <&iomuxc 5 136 1>, <&iomuxc 6 145 1>, <&iomuxc 7 150 1>, <&iomuxc 8 146 1>, <&iomuxc 9 151 1>, <&iomuxc 10 147 1>, - <&iomuxc 11 151 1>, <&iomuxc 12 148 1>, <&iomuxc 13 153 1>, + <&iomuxc 11 152 1>, <&iomuxc 12 148 1>, <&iomuxc 13 153 1>, <&iomuxc 14 149 1>, <&iomuxc 15 154 1>, <&iomuxc 16 39 7>, <&iomuxc 23 56 1>, <&iomuxc 24 61 7>, <&iomuxc 31 46 1>; }; -- cgit From 7941c59e45f3b6d30e07375e9b6713427e0a9f98 Mon Sep 17 00:00:00 2001 From: Jürg Billeter Date: Mon, 10 Oct 2016 18:30:01 +0200 Subject: iwlwifi: fix double hyphen in MODULE_FIRMWARE for 8000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mistakenly, the driver is trying to load the 8000C firmware with an incorrect name (i.e. with two hyphens where there should be only one) and that fails. Fix that by removing the hyphen from the format macro. Fixes: e1ba684f762b ("iwlwifi: 8000: fix MODULE_FIRMWARE input") Signed-off-by: Jürg Billeter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index d02ca1491d16..8d3e53fac1da 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -91,7 +91,7 @@ #define IWL8000_FW_PRE "iwlwifi-8000C-" #define IWL8000_MODULE_FIRMWARE(api) \ - IWL8000_FW_PRE "-" __stringify(api) ".ucode" + IWL8000_FW_PRE __stringify(api) ".ucode" #define IWL8265_FW_PRE "iwlwifi-8265-" #define IWL8265_MODULE_FIRMWARE(api) \ -- cgit From 03c902bff524e0cf664737a33f2365f7837040bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Dec 2016 12:03:36 +0100 Subject: iwlwifi: mvm: avoid crash on restart w/o reserved queues When the firmware restarts in a situation in which any station has no queue reserved anymore because that queue was used, the code will crash trying to access the queue_info array at the offset 255, which is far too big. Fix this by checking that a queue is actually reserved before writing its status. Fixes: 8d98ae6eb0d5 ("iwlwifi: mvm: re-assign old queues after hw restart in dqa mode") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 636c8b03e318..09e9e2e3ed04 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1164,9 +1164,10 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, .frame_limit = IWL_FRAME_LIMIT, }; - /* Make sure reserved queue is still marked as such (or allocated) */ - mvm->queue_info[mvm_sta->reserved_queue].status = - IWL_MVM_QUEUE_RESERVED; + /* Make sure reserved queue is still marked as such (if allocated) */ + if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; -- cgit From d6040764adcb5cb6de1489422411d701c158bb69 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Fri, 13 Jan 2017 11:54:08 +0000 Subject: crypto: api - Clear CRYPTO_ALG_DEAD bit before registering an alg Make sure CRYPTO_ALG_DEAD bit is cleared before proceeding with the algorithm registration. This fixes qat-dh registration when driver is restarted Cc: Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- crypto/algapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index df939b54b09f..1fad2a6b3bbb 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -356,6 +356,7 @@ int crypto_register_alg(struct crypto_alg *alg) struct crypto_larval *larval; int err; + alg->cra_flags &= ~CRYPTO_ALG_DEAD; err = crypto_check_alg(alg); if (err) return err; -- cgit From 11e3b725cfc282efe9d4a354153e99d86a16af08 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Jan 2017 13:46:29 +0000 Subject: crypto: arm64/aes-blk - honour iv_out requirement in CBC and CTR modes Update the ARMv8 Crypto Extensions and the plain NEON AES implementations in CBC and CTR modes to return the next IV back to the skcipher API client. This is necessary for chaining to work correctly. Note that for CTR, this is only done if the request is a round multiple of the block size, since otherwise, chaining is impossible anyway. Cc: # v3.16+ Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 88 +++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index c53dbeae79f2..838dad5c209f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt) cbz w6, .Lcbcencloop ld1 {v0.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x5 + enc_prepare w3, x2, x6 .Lcbcencloop: ld1 {v1.16b}, [x1], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x5, w6 + encrypt_block v0, w3, x2, x6, w7 st1 {v0.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcencloop + st1 {v0.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) @@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt) cbz w6, .LcbcdecloopNx ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x5 + dec_prepare w3, x2, x6 .LcbcdecloopNx: #if INTERLEAVE >= 2 @@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt) .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x5, w6 + decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ st1 {v0.16b}, [x0], #16 @@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt) bne .Lcbcdecloop .Lcbcdecout: FRAME_POP + st1 {v7.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_decrypt) @@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt) AES_ENTRY(aes_ctr_encrypt) FRAME_PUSH - cbnz w6, .Lctrfirst /* 1st time around? */ - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 -#if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ - bcs .Lctrinc - add x5, x5, #1 /* increment BE ctr */ - b .LctrincNx -#else - b .Lctrinc -#endif -.Lctrfirst: + cbz w6, .Lctrnotfirst /* 1st time around? */ enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 + +.Lctrnotfirst: + umov x8, v4.d[1] /* keep swabbed ctr in reg */ + rev x8, x8 #if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ + cmn w8, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: subs w4, w4, #INTERLEAVE @@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt) #if INTERLEAVE == 2 mov v0.8b, v4.8b mov v1.8b, v4.8b - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v0.d[1], x7 - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v1.d[1], x7 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ do_encrypt_block2x @@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt) st1 {v0.16b-v1.16b}, [x0], #32 #else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w5 + dup v7.4s, w8 mov v0.16b, v4.16b add v7.4s, v7.4s, v8.4s mov v1.16b, v4.16b @@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x5, x5, #INTERLEAVE + add x8, x8, #INTERLEAVE #endif - cbz w4, .LctroutNx -.LctrincNx: - rev x7, x5 + rev x7, x8 ins v4.d[1], x7 + cbz w4, .Lctrout b .LctrloopNx -.LctroutNx: - sub x5, x5, #1 - rev x7, x5 - ins v4.d[1], x7 - b .Lctrout .Lctr1x: adds w4, w4, #INTERLEAVE beq .Lctrout @@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrloop: mov v0.16b, v4.16b encrypt_block v0, w3, x2, x6, w7 + + adds x8, x8, #1 /* increment BE ctr */ + rev x7, x8 + ins v4.d[1], x7 + bcs .Lctrcarry /* overflow? */ + +.Lctrcarrydone: subs w4, w4, #1 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 - beq .Lctrout -.Lctrinc: - adds x5, x5, #1 /* increment BE ctr */ - rev x7, x5 - ins v4.d[1], x7 - bcc .Lctrloop /* no overflow? */ - umov x7, v4.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins v4.d[0], x7 - b .Lctrloop + bne .Lctrloop + +.Lctrout: + st1 {v4.16b}, [x5] /* return next CTR value */ + FRAME_POP + ret + .Lctrhalfblock: ld1 {v3.8b}, [x1] eor v3.8b, v0.8b, v3.8b st1 {v3.8b}, [x0] -.Lctrout: FRAME_POP ret + +.Lctrcarry: + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrcarrydone AES_ENDPROC(aes_ctr_encrypt) .ltorg -- cgit From 880a38547ff08715ce4f1daf9a4bb30c87676e68 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 20 Jan 2017 15:21:35 +0200 Subject: userns: Make ucounts lock irq-safe The ucounts_lock is being used to protect various ucounts lifecycle management functionalities. However, those services can also be invoked when a pidns is being freed in an RCU callback (e.g. softirq context). This can lead to deadlocks. There were already efforts trying to prevent similar deadlocks in add7c65ca426 ("pid: fix lockdep deadlock warning due to ucount_lock"), however they just moved the context from hardirq to softrq. Fix this issue once and for all by explictly making the lock disable irqs altogether. Dmitry Vyukov reported: > I've got the following deadlock report while running syzkaller fuzzer > on eec0d3d065bfcdf9cd5f56dd2a36b94d12d32297 of linux-next (on odroid > device if it matters): > > ================================= > [ INFO: inconsistent lock state ] > 4.10.0-rc3-next-20170112-xc2-dirty #6 Not tainted > --------------------------------- > inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. > swapper/2/0 [HC0[0]:SC1[1]:HE1:SE0] takes: > (ucounts_lock){+.?...}, at: [< inline >] spin_lock > ./include/linux/spinlock.h:302 > (ucounts_lock){+.?...}, at: [] > put_ucounts+0x60/0x138 kernel/ucount.c:162 > {SOFTIRQ-ON-W} state was registered at: > [] mark_lock+0x220/0xb60 kernel/locking/lockdep.c:3054 > [< inline >] mark_irqflags kernel/locking/lockdep.c:2941 > [] __lock_acquire+0x388/0x3260 kernel/locking/lockdep.c:3295 > [] lock_acquire+0xa4/0x138 kernel/locking/lockdep.c:3753 > [< inline >] __raw_spin_lock ./include/linux/spinlock_api_smp.h:144 > [] _raw_spin_lock+0x90/0xd0 kernel/locking/spinlock.c:151 > [< inline >] spin_lock ./include/linux/spinlock.h:302 > [< inline >] get_ucounts kernel/ucount.c:131 > [] inc_ucount+0x80/0x6c8 kernel/ucount.c:189 > [< inline >] inc_mnt_namespaces fs/namespace.c:2818 > [] alloc_mnt_ns+0x78/0x3a8 fs/namespace.c:2849 > [] create_mnt_ns+0x28/0x200 fs/namespace.c:2959 > [< inline >] init_mount_tree fs/namespace.c:3199 > [] mnt_init+0x258/0x384 fs/namespace.c:3251 > [] vfs_caches_init+0x6c/0x80 fs/dcache.c:3626 > [] start_kernel+0x414/0x460 init/main.c:648 > [] __primary_switched+0x6c/0x70 arch/arm64/kernel/head.S:456 > irq event stamp: 2316924 > hardirqs last enabled at (2316924): [< inline >] rcu_do_batch > kernel/rcu/tree.c:2911 > hardirqs last enabled at (2316924): [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > hardirqs last enabled at (2316924): [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > hardirqs last enabled at (2316924): [] > rcu_process_callbacks+0x7a4/0xc28 kernel/rcu/tree.c:3166 > hardirqs last disabled at (2316923): [< inline >] rcu_do_batch > kernel/rcu/tree.c:2900 > hardirqs last disabled at (2316923): [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > hardirqs last disabled at (2316923): [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > hardirqs last disabled at (2316923): [] > rcu_process_callbacks+0x210/0xc28 kernel/rcu/tree.c:3166 > softirqs last enabled at (2316912): [] > _local_bh_enable+0x4c/0x80 kernel/softirq.c:155 > softirqs last disabled at (2316913): [< inline >] > do_softirq_own_stack ./include/linux/interrupt.h:488 > softirqs last disabled at (2316913): [< inline >] > invoke_softirq kernel/softirq.c:371 > softirqs last disabled at (2316913): [] > irq_exit+0x264/0x308 kernel/softirq.c:405 > > other info that might help us debug this: > Possible unsafe locking scenario: > > CPU0 > ---- > lock(ucounts_lock); > > lock(ucounts_lock); > > *** DEADLOCK *** > > 1 lock held by swapper/2/0: > #0: (rcu_callback){......}, at: [< inline >] __rcu_reclaim > kernel/rcu/rcu.h:108 > #0: (rcu_callback){......}, at: [< inline >] rcu_do_batch > kernel/rcu/tree.c:2919 > #0: (rcu_callback){......}, at: [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > #0: (rcu_callback){......}, at: [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > #0: (rcu_callback){......}, at: [] > rcu_process_callbacks+0x720/0xc28 kernel/rcu/tree.c:3166 > > stack backtrace: > CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.10.0-rc3-next-20170112-xc2-dirty #6 > Hardware name: Hardkernel ODROID-C2 (DT) > Call trace: > [] dump_backtrace+0x0/0x440 arch/arm64/kernel/traps.c:500 > [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:225 > [] dump_stack+0x110/0x168 > [] print_usage_bug.part.27+0x49c/0x4bc > kernel/locking/lockdep.c:2387 > [< inline >] print_usage_bug kernel/locking/lockdep.c:2357 > [< inline >] valid_state kernel/locking/lockdep.c:2400 > [< inline >] mark_lock_irq kernel/locking/lockdep.c:2617 > [] mark_lock+0x934/0xb60 kernel/locking/lockdep.c:3065 > [< inline >] mark_irqflags kernel/locking/lockdep.c:2923 > [] __lock_acquire+0x640/0x3260 kernel/locking/lockdep.c:3295 > [] lock_acquire+0xa4/0x138 kernel/locking/lockdep.c:3753 > [< inline >] __raw_spin_lock ./include/linux/spinlock_api_smp.h:144 > [] _raw_spin_lock+0x90/0xd0 kernel/locking/spinlock.c:151 > [< inline >] spin_lock ./include/linux/spinlock.h:302 > [] put_ucounts+0x60/0x138 kernel/ucount.c:162 > [] dec_ucount+0xf4/0x158 kernel/ucount.c:214 > [< inline >] dec_pid_namespaces kernel/pid_namespace.c:89 > [] delayed_free_pidns+0x40/0xe0 kernel/pid_namespace.c:156 > [< inline >] __rcu_reclaim kernel/rcu/rcu.h:118 > [< inline >] rcu_do_batch kernel/rcu/tree.c:2919 > [< inline >] invoke_rcu_callbacks kernel/rcu/tree.c:3182 > [< inline >] __rcu_process_callbacks kernel/rcu/tree.c:3149 > [] rcu_process_callbacks+0x768/0xc28 kernel/rcu/tree.c:3166 > [] __do_softirq+0x324/0x6e0 kernel/softirq.c:284 > [< inline >] do_softirq_own_stack ./include/linux/interrupt.h:488 > [< inline >] invoke_softirq kernel/softirq.c:371 > [] irq_exit+0x264/0x308 kernel/softirq.c:405 > [] __handle_domain_irq+0xc0/0x150 kernel/irq/irqdesc.c:636 > [] gic_handle_irq+0x68/0xd8 > Exception stack(0xffff8000648e7dd0 to 0xffff8000648e7f00) > 7dc0: ffff8000648d4b3c 0000000000000007 > 7de0: 0000000000000000 1ffff0000c91a967 1ffff0000c91a967 1ffff0000c91a967 > 7e00: ffff20000a4b6b68 0000000000000001 0000000000000007 0000000000000001 > 7e20: 1fffe4000149ae90 ffff200009d35000 0000000000000000 0000000000000002 > 7e40: 0000000000000000 0000000000000000 0000000002624a1a 0000000000000000 > 7e60: 0000000000000000 ffff200009cbcd88 000060006d2ed000 0000000000000140 > 7e80: ffff200009cff000 ffff200009cb6000 ffff200009cc2020 ffff200009d2159d > 7ea0: 0000000000000000 ffff8000648d4380 0000000000000000 ffff8000648e7f00 > 7ec0: ffff20000820a478 ffff8000648e7f00 ffff20000820a47c 0000000010000145 > 7ee0: 0000000000000140 dfff200000000000 ffffffffffffffff ffff20000820a478 > [] el1_irq+0xb8/0x130 arch/arm64/kernel/entry.S:486 > [< inline >] arch_local_irq_restore > ./arch/arm64/include/asm/irqflags.h:81 > [] rcu_idle_exit+0x64/0xa8 kernel/rcu/tree.c:1030 > [< inline >] cpuidle_idle_call kernel/sched/idle.c:200 > [] do_idle+0x1dc/0x2d0 kernel/sched/idle.c:243 > [] cpu_startup_entry+0x24/0x28 kernel/sched/idle.c:345 > [] secondary_start_kernel+0x2cc/0x358 > arch/arm64/kernel/smp.c:276 > [<000000000279f1a4>] 0x279f1a4 Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Fixes: add7c65ca426 ("pid: fix lockdep deadlock warning due to ucount_lock") Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces") Cc: stable@vger.kernel.org Link: https://www.spinics.net/lists/kernel/msg2426637.html Signed-off-by: Nikolay Borisov Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 9d20d5dd298a..4bbd38ec3788 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -128,10 +128,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (!ucounts) { - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) @@ -141,7 +141,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->uid = uid; atomic_set(&new->count, 0); - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (ucounts) { kfree(new); @@ -152,16 +152,18 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) } if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) ucounts = NULL; - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); return ucounts; } static void put_ucounts(struct ucounts *ucounts) { + unsigned long flags; + if (atomic_dec_and_test(&ucounts->count)) { - spin_lock(&ucounts_lock); + spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); - spin_unlock(&ucounts_lock); + spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); } -- cgit From 9f427a0e474a67b454420c131709600d44850486 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 20 Jan 2017 12:58:34 -0800 Subject: net: mpls: Fix multipath selection for LSR use case MPLS multipath for LSR is broken -- always selecting the first nexthop in the one label case. For example: $ ip -f mpls ro ls 100 nexthop as to 200 via inet 172.16.2.2 dev virt12 nexthop as to 300 via inet 172.16.3.2 dev virt13 101 nexthop as to 201 via inet6 2000:2::2 dev virt12 nexthop as to 301 via inet6 2000:3::2 dev virt13 In this example incoming packets have a single MPLS labels which means BOS bit is set. The BOS bit is passed from mpls_forward down to mpls_multipath_hash which never processes the hash loop because BOS is 1. Update mpls_multipath_hash to process the entire label stack. mpls_hdr_len tracks the total mpls header length on each pass (on pass N mpls_hdr_len is N * sizeof(mpls_shim_hdr)). When the label is found with the BOS set it verifies the skb has sufficient header for ipv4 or ipv6, and find the IPv4 and IPv6 header by using the last mpls_hdr pointer and adding 1 to advance past it. With these changes I have verified the code correctly sees the label, BOS, IPv4 and IPv6 addresses in the network header and icmp/tcp/udp traffic for ipv4 and ipv6 are distributed across the nexthops. Fixes: 1c78efa8319ca ("mpls: flow-based multipath selection") Acked-by: Robert Shearman Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 15fe97644ffe..5b77377e5a15 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static u32 mpls_multipath_hash(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) { struct mpls_entry_decoded dec; + unsigned int mpls_hdr_len = 0; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; u32 hash = 0; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; label_index++) { - if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + mpls_hdr_len += sizeof(*hdr); + if (!pskb_may_pull(skb, mpls_hdr_len)) break; /* Read and decode the current label */ @@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, eli_seen = true; } - bos = dec.bos; - if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct iphdr))) { + if (!dec.bos) + continue; + + /* found bottom label; does skb have room for a header? */ + if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { const struct iphdr *v4hdr; - v4hdr = (const struct iphdr *)(mpls_hdr(skb) + - label_index); + v4hdr = (const struct iphdr *)(hdr + 1); if (v4hdr->version == 4) { hash = jhash_3words(ntohl(v4hdr->saddr), ntohl(v4hdr->daddr), v4hdr->protocol, hash); } else if (v4hdr->version == 6 && - pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct ipv6hdr))) { + pskb_may_pull(skb, mpls_hdr_len + + sizeof(struct ipv6hdr))) { const struct ipv6hdr *v6hdr; - v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + - label_index); - + v6hdr = (const struct ipv6hdr *)(hdr + 1); hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); hash = jhash_1word(v6hdr->nexthdr, hash); } } + + break; } return hash; } static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) + struct sk_buff *skb) { int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; @@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive <= 0) return NULL; - hash = mpls_multipath_hash(rt, skb, bos); + hash = mpls_multipath_hash(rt, skb); nh_index = hash % alive; if (alive == rt->rt_nhn) goto out; @@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr); - /* Pop the label */ - skb_pull(skb, sizeof(*hdr)); - skb_reset_network_header(skb); - - skb_orphan(skb); - rt = mpls_route_input_rcu(net, dec.label); if (!rt) goto drop; - nh = mpls_select_multipath(rt, skb, dec.bos); + nh = mpls_select_multipath(rt, skb); if (!nh) goto drop; @@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!mpls_output_possible(out_dev)) goto drop; + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + if (skb_warn_if_lro(skb)) goto drop; -- cgit From d0e287a401d9acf67b75180b26e2d62b7d482652 Mon Sep 17 00:00:00 2001 From: Rask Ingemann Lambertsen Date: Sat, 21 Jan 2017 17:11:43 +0100 Subject: regulator: axp20x: AXP806: Fix dcdcb being set instead of dcdce A typo or copy-paste bug means that the register access intended for regulator dcdce goes to dcdcb instead. This patch corrects it. Fixes: 2ca342d391e3 (regulator: axp20x: Support AXP806 variant) Signed-off-by: Rask Ingemann Lambertsen Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/regulator/axp20x-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index e6a512ebeae2..a3ade9e4ef47 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -272,7 +272,7 @@ static const struct regulator_desc axp806_regulators[] = { 64, AXP806_DCDCD_V_CTRL, 0x3f, AXP806_PWR_OUT_CTRL1, BIT(3)), AXP_DESC(AXP806, DCDCE, "dcdce", "vine", 1100, 3400, 100, - AXP806_DCDCB_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), + AXP806_DCDCE_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), AXP_DESC(AXP806, ALDO1, "aldo1", "aldoin", 700, 3300, 100, AXP806_ALDO1_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(5)), AXP_DESC(AXP806, ALDO2, "aldo2", "aldoin", 700, 3400, 100, -- cgit From b32728ffef7f233dbdabb3f11814bdf692aaf501 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 23 Jan 2017 08:12:19 -0700 Subject: xen-blkfront: feature flags handling adjustments Don't truncate the "feature-persistent" value read from xenstore: Any non-zero value is supposed to enable the feature, just like is already being done for feature_secdiscard. Just like the other feature_* fields, feature_flush and feature_fua are boolean flags, and hence fit well into a single bit. Keep all bit fields together to limit gaps. Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b2bdfa81f929..386b17208605 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -197,13 +197,13 @@ struct blkfront_info /* Number of pages per ring buffer. */ unsigned int nr_ring_pages; struct request_queue *rq; - unsigned int feature_flush; - unsigned int feature_fua; + unsigned int feature_flush:1; + unsigned int feature_fua:1; unsigned int feature_discard:1; unsigned int feature_secdiscard:1; + unsigned int feature_persistent:1; unsigned int discard_granularity; unsigned int discard_alignment; - unsigned int feature_persistent:1; /* Number of 4KB segments handled */ unsigned int max_indirect_segments; int is_ready; @@ -2323,8 +2323,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) blkfront_setup_discard(info); info->feature_persistent = - xenbus_read_unsigned(info->xbdev->otherend, - "feature-persistent", 0); + !!xenbus_read_unsigned(info->xbdev->otherend, + "feature-persistent", 0); indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); -- cgit From 3b4f18843e511193e7eb616710e838f5852e661d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 23 Jan 2017 08:11:37 -0700 Subject: xen-blkfront: correct maximum segment accounting Making use of "max_indirect_segments=" has issues: - blkfront_setup_indirect() may end up with zero psegs when PAGE_SIZE is sufficiently much larger than XEN_PAGE_SIZE - the variable driven by the command line option (xen_blkif_max_segments) has a somewhat different purpose, and hence should namely never end up being zero - as long as the specified value is lower than the legacy default, we better don't use indirect segments at all (or we'd in fact lower throughput) Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 386b17208605..265f1a7072e9 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2223,7 +2223,7 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) } else grants = info->max_indirect_segments; - psegs = grants / GRANTS_PER_PSEG; + psegs = DIV_ROUND_UP(grants, GRANTS_PER_PSEG); err = fill_grant_buffer(rinfo, (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); @@ -2328,8 +2328,11 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); - info->max_indirect_segments = min(indirect_segments, - xen_blkif_max_segments); + if (indirect_segments > xen_blkif_max_segments) + indirect_segments = xen_blkif_max_segments; + if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) + indirect_segments = 0; + info->max_indirect_segments = indirect_segments; } /* @@ -2652,6 +2655,9 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) + xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); -- cgit From d2b3964a0780d2d2994eba57f950d6c9fe489ed8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 20 Jan 2017 09:31:54 -0800 Subject: xfs: fix COW writeback race Due to the way how xfs_iomap_write_allocate tries to convert the whole found extents from delalloc to real space we can run into a race condition with multiple threads doing writes to this same extent. For the non-COW case that is harmless as the only thing that can happen is that we call xfs_bmapi_write on an extent that has already been converted to a real allocation. For COW writes where we move the extent from the COW to the data fork after I/O completion the race is, however, not quite as harmless. In the worst case we are now calling xfs_bmapi_write on a region that contains hole in the COW work, which will trip up an assert in debug builds or lead to file system corruption in non-debug builds. This seems to be reproducible with workloads of small O_DSYNC write, although so far I've not managed to come up with a with an isolated reproducer. The fix for the issue is relatively simple: tell xfs_bmapi_write that we are only asked to convert delayed allocations and skip holes in that case. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 44 ++++++++++++++++++++++++++++++++------------ fs/xfs/libxfs/xfs_bmap.h | 6 +++++- fs/xfs/xfs_iomap.c | 2 +- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 44773c9eb957..ab82dd4a4980 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4514,8 +4514,6 @@ xfs_bmapi_write( int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4603,22 +4601,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cecd094404cc..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0d147428971e..1aa3abd67b36 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -681,7 +681,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = 0; + int flags = XFS_BMAPI_DELALLOC; int nres; if (whichfork == XFS_COW_FORK) -- cgit From eab127717a6af54401ba534790c793ec143cd1fc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Jan 2017 15:31:52 -0800 Subject: net: phy: Avoid deadlock during phy_error() phy_error() is called in the PHY state machine workqueue context, and calls phy_trigger_machine() which does a cancel_delayed_work_sync() of the workqueue we execute from, causing a deadlock situation. Augment phy_trigger_machine() machine with a sync boolean indicating whether we should use cancel_*_sync() or just cancel_*_work(). Fixes: 3c293f4e08b5 ("net: phy: Trigger state machine on state change and not polling.") Reported-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 48da6e93c3f7..e687a9cb4a37 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -649,14 +649,18 @@ void phy_start_machine(struct phy_device *phydev) * phy_trigger_machine - trigger the state machine to run * * @phydev: the phy_device struct + * @sync: indicate whether we should wait for the workqueue cancelation * * Description: There has been a change in state which requires that the * state machine runs. */ -static void phy_trigger_machine(struct phy_device *phydev) +static void phy_trigger_machine(struct phy_device *phydev, bool sync) { - cancel_delayed_work_sync(&phydev->state_queue); + if (sync) + cancel_delayed_work_sync(&phydev->state_queue); + else + cancel_delayed_work(&phydev->state_queue); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); } @@ -693,7 +697,7 @@ static void phy_error(struct phy_device *phydev) phydev->state = PHY_HALTED; mutex_unlock(&phydev->lock); - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, false); } /** @@ -840,7 +844,7 @@ void phy_change(struct phy_device *phydev) } /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, true); return; ignore: @@ -942,7 +946,7 @@ void phy_start(struct phy_device *phydev) if (do_resume) phy_resume(phydev); - phy_trigger_machine(phydev); + phy_trigger_machine(phydev, true); } EXPORT_SYMBOL(phy_start); -- cgit From 4078b76cac68e50ccf1f76a74e7d3d5788aec3fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Jan 2017 16:05:05 -0800 Subject: net: dsa: Check return value of phy_connect_direct() We need to check the return value of phy_connect_direct() in dsa_slave_phy_connect() otherwise we may be continuing the initialization of a slave network device with a PHY that already attached somewhere else and which will soon be in error because the PHY device is in error. The conditions for such an error to occur are that we have a port of our switch that is not disabled, and has the same port number as a PHY address (say both 5) that can be probed using the DSA slave MII bus. We end-up having this slave network device find a PHY at the same address as our port number, and we try to attach to it. A slave network (e.g: port 0) has already attached to our PHY device, and we try to re-attach it with a different network device, but since we ignore the error we would end-up initializating incorrect device references by the time the slave network interface is opened. The code has been (re)organized several times, making it hard to provide an exact Fixes tag, this is a bugfix nonetheless. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 68c9eea00518..ba1b6b9630d2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1105,10 +1105,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) p->phy_interface = p->phy->interface; - phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); - - return 0; + return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); } static int dsa_slave_phy_setup(struct dsa_slave_priv *p, -- cgit From c5f21c9f878b8dcd54d0b9739c025ca73cb4c091 Mon Sep 17 00:00:00 2001 From: Ding Pixel Date: Wed, 18 Jan 2017 17:26:38 +0800 Subject: drm/amdgpu: check ring being ready before using MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return success when the ring is properly initialized, otherwise return failure. Tonga SRIOV VF doesn't have UVD and VCE engines, the initialization of these IPs is bypassed. The system crashes if application submit IB to their rings which are not ready to use. It could be a common issue if IP having ring buffer is disabled for some reason on specific ASIC, so it should check the ring being ready to use. Bug: amdgpu_test crashes system on Tonga VF. Signed-off-by: Ding Pixel Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 29d6d84d1c28..41e41f90265d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -83,6 +83,13 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, } break; } + + if (!(*out_ring && (*out_ring)->adev)) { + DRM_ERROR("Ring %d is not initialized on IP %d\n", + ring, ip_type); + return -EINVAL; + } + return 0; } -- cgit From 3a1d19a29670aa7eb58576a31883d0aa9fb77549 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Thu, 19 Jan 2017 09:57:41 +0800 Subject: drm/amdgpu: fix unload driver issue for virtual display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtual display doesn't allocate amdgpu_encoder when initializing, so will get invaild pointer if try to free amdgpu_encoder when unloading driver. Signed-off-by: Xiangliang Yu Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_virtual.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 762f8e82ceb7..e9a176891e13 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -627,11 +627,8 @@ static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - - kfree(amdgpu_encoder->enc_priv); drm_encoder_cleanup(encoder); - kfree(amdgpu_encoder); + kfree(encoder); } static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { -- cgit From b2fbd04498789def80ceba3d5bbc5af7f2f70a5f Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 20 Jan 2017 21:03:03 +0800 Subject: netfilter: nf_tables: validate the name size when possible Currently, if the user add a stateful object with the name size exceed NFT_OBJ_MAXNAMELEN - 1 (i.e. 31), we truncate it down to 31 silently. This is not friendly, furthermore, this will cause duplicated stateful objects when the first 31 characters of the name is same. So limit the stateful object's name size to NFT_OBJ_MAXNAMELEN - 1. After apply this patch, error message will be printed out like this: # name_32=$(printf "%0.sQ" {1..32}) # nft add counter filter $name_32 :1:1-52: Error: Could not process rule: Numerical result out of range add counter filter QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Also this patch cleans up the codes which missing the name size limit validation in nftables. Fixes: e50092404c1b ("netfilter: nf_tables: add stateful objects") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 21 ++++++++++++++------- net/netfilter/nft_dynset.c | 3 ++- net/netfilter/nft_lookup.c | 3 ++- net/netfilter/nft_objref.c | 6 ++++-- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 091d2dcc63b2..b84c7b25219b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -928,7 +928,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, } static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { - [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, @@ -1854,7 +1855,8 @@ static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, } static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { - [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_RULE_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, @@ -2443,7 +2445,8 @@ nft_select_set_ops(const struct nlattr * const nla[], } static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { - [NFTA_SET_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_FLAGS] = { .type = NLA_U32 }, @@ -3192,8 +3195,10 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { - [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, - [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; @@ -4032,8 +4037,10 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { - [NFTA_OBJ_TABLE] = { .type = NLA_STRING }, - [NFTA_OBJ_NAME] = { .type = NLA_STRING }, + [NFTA_OBJ_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_OBJ_NAME] = { .type = NLA_STRING, + .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 7de2f46734a4..049ad2d9ee66 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -98,7 +98,8 @@ out: } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { - [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING }, + [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, [NFTA_DYNSET_OP] = { .type = NLA_U32 }, [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index d4f97fa7e21d..e21aea7e5ec8 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -49,7 +49,8 @@ static void nft_lookup_eval(const struct nft_expr *expr, } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { - [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SET] = { .type = NLA_STRING, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 415a65ba2b85..1ae8c49ca4a1 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -193,10 +193,12 @@ nft_objref_select_ops(const struct nft_ctx *ctx, } static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { - [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING, + .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, - [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING, + .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, }; -- cgit From 8ac092519ad91931c96d306c4bfae2c6587c325f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 Jan 2017 22:44:12 -0500 Subject: NFSv4.1: Fix a deadlock in layoutget We cannot call nfs4_handle_exception() without first ensuring that the slot has been freed. If not, we end up deadlocking with the process waiting for recovery to complete, and recovery waiting for the slot table to drain. Fixes: 2e80dbe7ac51 ("NFSv4.1: Close callback races for OPEN, LAYOUTGET...") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ecc151697fd4..59bb574d7d7c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8490,6 +8490,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) -- cgit From af2b7fa17eb92e52b65f96604448ff7a2a89ee99 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Mon, 23 Jan 2017 19:42:54 +0000 Subject: powerpc: Add missing error check to prom_find_boot_cpu() prom_init.c calls 'instance-to-package' twice, but the return is not checked during prom_find_boot_cpu(). The result is then passed to prom_getprop(), which could be PROM_ERROR. Add a return check to prevent this. This was found on a pasemi system, where CFE doesn't have a working 'instance-to package' prom call. Before Commit 5c0484e25ec0 ('powerpc: Endian safe trampoline') the area around addr 0 was mostly 0's and this doesn't cause a problem. Once the macro 'FIXUP_ENDIAN' has been added to head_64.S, the low memory area now has non-zero values, which cause the prom_getprop() call to hang. mpe: Also confirmed that under SLOF if 'instance-to-package' did fail with PROM_ERROR we would crash in SLOF. So the bug is not specific to CFE, it's just that other open firmwares don't trigger it because they have a working 'instance-to-package'. Fixes: 5c0484e25ec0 ("powerpc: Endian safe trampoline") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Darren Stevens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ec47a939cbdd..ac83eb04a8b8 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2834,6 +2834,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); -- cgit From 9dce990d2cf57b5ed4e71a9cdbd7eae4335111ff Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Jan 2017 08:05:52 +0100 Subject: s390/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. convert_vx_to_fp() is adapted to handle only a specified number of registers rather than unconditionally handling all of them: other callers of this function are adapted appropriately. Based on an initial patch by Dave Martin. Cc: stable@vger.kernel.org Reported-by: Dave Martin Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 7447ba509c30..12020b55887b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -963,6 +963,11 @@ static int s390_fpregs_set(struct task_struct *target, if (target == current) save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp(fprs, target->thread.fpu.vxrs); + else + memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); + /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; @@ -1067,6 +1072,9 @@ static int s390_vxrs_low_set(struct task_struct *target, if (target == current) save_fpu_regs(); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) -- cgit From dffba9a31c7769be3231c420d4b364c92ba3f1ac Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 23 Jan 2017 14:54:44 -0800 Subject: x86/fpu/xstate: Fix xcomp_bv in XSAVES header The compacted-format XSAVES area is determined at boot time and never changed after. The field xsave.header.xcomp_bv indicates which components are in the fixed XSAVES format. In fpstate_init() we did not set xcomp_bv to reflect the XSAVES format since at the time there is no valid data. However, after we do copy_init_fpstate_to_fpregs() in fpu__clear(), as in commit: b22cbe404a9c x86/fpu: Fix invalid FPU ptrace state after execve() and when __fpu_restore_sig() does fpu__restore() for a COMPAT-mode app, a #GP occurs. This can be easily triggered by doing valgrind on a COMPAT-mode "Hello World," as reported by Joakim Tjernlund and others: https://bugzilla.kernel.org/show_bug.cgi?id=190061 Fix it by setting xcomp_bv correctly. This patch also moves the xcomp_bv initialization to the proper place, which was in copyin_to_xsaves() as of: 4c833368f0bf x86/fpu: Set the xcomp_bv when we fake up a XSAVES area which fixed the bug too, but it's more efficient and cleaner to initialize things once per boot, not for every signal handling operation. Reported-by: Kevin Hao Reported-by: Joakim Tjernlund Signed-off-by: Yu-cheng Yu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V. Shankar Cc: Thomas Gleixner Cc: haokexin@gmail.com Link: http://lkml.kernel.org/r/1485212084-4418-1-git-send-email-yu-cheng.yu@intel.com [ Combined it with 4c833368f0bf. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 4 +++- arch/x86/kernel/fpu/xstate.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e4e97a5355ce..de7234401275 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state) * it will #GP. Make sure it is replaced after the memset(). */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | + xfeatures_mask; if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e287b9075527..1d7770447b3e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1070,7 +1070,6 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } -- cgit From f05fea5b3574a5926c53865eea27139bb40b2f2b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Jan 2017 10:10:16 +1100 Subject: powerpc/eeh: Fix wrong flag passed to eeh_unfreeze_pe() In __eeh_clear_pe_frozen_state(), we should pass the flag's value instead of its address to eeh_unfreeze_pe(). The isolated flag is cleared if no error returned from __eeh_clear_pe_frozen_state(). We never observed the error from the function. So the isolated flag should have been always cleared, no real issue is caused because of the misused @flag. This fixes the code by passing the value of @flag to eeh_unfreeze_pe(). Fixes: 5cfb20b96f6 ("powerpc/eeh: Emulate EEH recovery for VFIO devices") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index d88573bdd090..b94887165a10 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) -- cgit From f2574030b0e33263b8a1c28fa3c4fa9292283799 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jan 2017 21:37:20 +1100 Subject: powerpc: Revert the initial stack protector support Unfortunately the stack protector support we merged recently only works on some toolchains. If the toolchain is built without glibc support everything works fine, but if glibc is built then it leads to a panic at boot. The solution is not rc5 material, so revert the support for now. This reverts commits: 6533b7c16ee5 ("powerpc: Initial stack protector (-fstack-protector) support") 902e06eb86cd ("powerpc/32: Change the stack protector canary value per task") Fixes: 6533b7c16ee5 ("powerpc: Initial stack protector (-fstack-protector) support") Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 - arch/powerpc/include/asm/stackprotector.h | 40 ------------------------------- arch/powerpc/kernel/Makefile | 4 ---- arch/powerpc/kernel/asm-offsets.c | 3 --- arch/powerpc/kernel/entry_32.S | 6 +---- arch/powerpc/kernel/process.c | 6 ----- 6 files changed, 1 insertion(+), 59 deletions(-) delete mode 100644 arch/powerpc/include/asm/stackprotector.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a8ee573fe610..a46d1c0d14d3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -164,7 +164,6 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select HAVE_ARCH_HARDENED_USERCOPY select HAVE_KERNEL_GZIP - select HAVE_CC_STACKPROTECTOR config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h deleted file mode 100644 index 6720190eabec..000000000000 --- a/arch/powerpc/include/asm/stackprotector.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * GCC stack protector support. - * - * Stack protector works by putting predefined pattern at the start of - * the stack frame and verifying that it hasn't been overwritten when - * returning from the function. The pattern is called stack canary - * and gcc expects it to be defined by a global variable called - * "__stack_chk_guard" on PPC. This unfortunately means that on SMP - * we cannot have a different canary value per task. - */ - -#ifndef _ASM_STACKPROTECTOR_H -#define _ASM_STACKPROTECTOR_H - -#include -#include -#include - -extern unsigned long __stack_chk_guard; - -/* - * Initialize the stackprotector canary value. - * - * NOTE: this must only be called from functions that never return, - * and it must always be inlined. - */ -static __always_inline void boot_init_stack_canary(void) -{ - unsigned long canary; - - /* Try to get a semi random initial value. */ - get_random_bytes(&canary, sizeof(canary)); - canary ^= mftb(); - canary ^= LINUX_VERSION_CODE; - - current->stack_canary = canary; - __stack_chk_guard = current->stack_canary; -} - -#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 23f8082d7bfa..f4c2b52e58b3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -19,10 +19,6 @@ CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -# -fstack-protector triggers protection checks in this code, -# but it is being used too early to link to meaningful stack_chk logic. -CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) - ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0601e6a7297c..195a9fc8f81c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,9 +91,6 @@ int main(void) DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp)); #endif -#ifdef CONFIG_CC_STACKPROTECTOR - DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary)); -#endif DEFINE(KSP, offsetof(struct thread_struct, ksp)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5742dbdbee46..3841d749a430 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -674,11 +674,7 @@ BEGIN_FTR_SECTION mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ END_FTR_SECTION_IFSET(CPU_FTR_SPE) #endif /* CONFIG_SPE */ -#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - lwz r0,TSK_STACK_CANARY(r2) - lis r4,__stack_chk_guard@ha - stw r0,__stack_chk_guard@l(r4) -#endif + lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 04885cec24df..5dd056df0baa 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -64,12 +64,6 @@ #include #include -#ifdef CONFIG_CC_STACKPROTECTOR -#include -unsigned long __stack_chk_guard __read_mostly; -EXPORT_SYMBOL(__stack_chk_guard); -#endif - /* Transactional Memory debug */ #ifdef TM_DEBUG_SW #define TM_DEBUG(x...) printk(KERN_INFO x) -- cgit From 5d03a2fd2292e71936c4235885c35ccc3c94695b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Tue, 24 Jan 2017 10:31:18 +0100 Subject: USB: serial: option: add device ID for HP lt2523 (Novatel E371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yet another laptop vendor rebranded Novatel E371. Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7ce31a4c7e7f..42cc72e54c05 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2007,6 +2007,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit From a971c5545c3d45a1e33fda6e57913bb75aaa20c9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 23 Jan 2017 14:54:10 -0200 Subject: ARM: dts: imx: Pass 'chosen' and 'memory' nodes Commit 7f107887d199 ("ARM: dts: imx: Remove skeleton.dtsi") causes boot issues when the bootloader does not create a 'chosen' node if such node is not present in the dtb. The reason for the boot failure is well explained by Javier Martinez Canillas: "the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info." , so pass an empty 'chosen' node to fix the boot problem. This issue has been seen in the kernelci reports with Barebox as bootloader. Also pass the 'memory' node in order to fix boot issues on the SolidRun iMX6 platforms. Fixes: 7f107887d199 ("ARM: dts: imx: Remove skeleton.dtsi") Reported-by: kernelci.org bot Reported-by: Russell King Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx1.dtsi | 8 ++++++++ arch/arm/boot/dts/imx23.dtsi | 8 ++++++++ arch/arm/boot/dts/imx25.dtsi | 8 ++++++++ arch/arm/boot/dts/imx27.dtsi | 8 ++++++++ arch/arm/boot/dts/imx28.dtsi | 8 ++++++++ arch/arm/boot/dts/imx31.dtsi | 8 ++++++++ arch/arm/boot/dts/imx35.dtsi | 8 ++++++++ arch/arm/boot/dts/imx50.dtsi | 8 ++++++++ arch/arm/boot/dts/imx51.dtsi | 8 ++++++++ arch/arm/boot/dts/imx53.dtsi | 8 ++++++++ arch/arm/boot/dts/imx6qdl.dtsi | 8 ++++++++ arch/arm/boot/dts/imx6sl.dtsi | 8 ++++++++ arch/arm/boot/dts/imx6sx.dtsi | 8 ++++++++ arch/arm/boot/dts/imx6ul.dtsi | 8 ++++++++ arch/arm/boot/dts/imx7s.dtsi | 8 ++++++++ 15 files changed, 120 insertions(+) diff --git a/arch/arm/boot/dts/imx1.dtsi b/arch/arm/boot/dts/imx1.dtsi index b792eee3899b..2ee40bc9ec21 100644 --- a/arch/arm/boot/dts/imx1.dtsi +++ b/arch/arm/boot/dts/imx1.dtsi @@ -18,6 +18,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { gpio0 = &gpio1; diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index ac2a9da62b6c..43ccbbf754a3 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -16,6 +16,14 @@ #size-cells = <1>; interrupt-parent = <&icoll>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { gpio0 = &gpio0; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 831d09a28155..acd475659156 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -14,6 +14,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index 9d8b5969ee3b..b397384248f4 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -19,6 +19,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 3aabf65a6a52..d6a2190b60ef 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -17,6 +17,14 @@ #size-cells = <1>; interrupt-parent = <&icoll>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &mac0; diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 85cd8be22f71..23b0d2cf9acd 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -12,6 +12,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { serial0 = &uart1; diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index 9f40e6229189..d0496c65cea2 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -13,6 +13,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx50.dtsi b/arch/arm/boot/dts/imx50.dtsi index fe0221e4cbf7..ceae909e2201 100644 --- a/arch/arm/boot/dts/imx50.dtsi +++ b/arch/arm/boot/dts/imx50.dtsi @@ -17,6 +17,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index 33526cade735..1ee1d542d9ad 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -19,6 +19,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index ca51dc03e327..2e516f4985e4 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -19,6 +19,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 89b834f3fa17..e7d30f45b161 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -16,6 +16,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 19cbd879c448..cc9572ea2860 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -14,6 +14,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 10f333016197..dd4ec85ecbaa 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -15,6 +15,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { can0 = &flexcan1; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 39845a7e0463..53d3f8e41e9b 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -15,6 +15,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { ethernet0 = &fec1; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 8ff2cbdd8f0d..be33dfc86838 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -50,6 +50,14 @@ / { #address-cells = <1>; #size-cells = <1>; + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + * Also for U-Boot there must be a pre-existing /memory node. + */ + chosen {}; + memory { device_type = "memory"; reg = <0 0>; }; aliases { gpio0 = &gpio1; -- cgit From 690e5325b8c7d5db05fc569c0f7b888bb4248272 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Jan 2017 14:50:19 +0100 Subject: block: fix use after free in __blkdev_direct_IO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't dereference the dio structure after submitting the last bio for this request, as I/O completion might have happened before the code is run. Introduce a local is_sync variable instead. Fixes: 542ff7bf ("block: new direct I/O implementation") Signed-off-by: Christoph Hellwig Reported-by: Matias Bjørling Tested-by: Matias Bjørling Signed-off-by: Jens Axboe --- fs/block_dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 5db5d1340d69..3c47614a4b32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,7 +331,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; - bool is_read = (iov_iter_rw(iter) == READ); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; int ret; @@ -344,7 +344,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); - dio->is_sync = is_sync_kiocb(iocb); + dio->is_sync = is_sync = is_sync_kiocb(iocb); if (dio->is_sync) dio->waiter = current; else @@ -398,7 +398,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } blk_finish_plug(&plug); - if (!dio->is_sync) + if (!is_sync) return -EIOCBQUEUED; for (;;) { -- cgit From 0d6da872d3e4a60f43c295386d7ff9a4cdcd57e9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 23 Jan 2017 22:59:44 +0100 Subject: s390/mm: Fix cmma unused transfer from pgste into pte The last pgtable rework silently disabled the CMMA unused state by setting a local pte variable (a parameter) instead of propagating it back into the caller. Fix it. Fixes: ebde765c0e85 ("s390/mm: uninline ptep_xxx functions from pgtable.h") Cc: stable@vger.kernel.org # v4.6+ Cc: Martin Schwidefsky Cc: Claudio Imbrenda Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7a1897c51c54..d56ef26d4681 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -202,7 +202,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, return pgste; } -static inline void ptep_xchg_commit(struct mm_struct *mm, +static inline pte_t ptep_xchg_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pgste_t pgste, pte_t old, pte_t new) { @@ -220,6 +220,7 @@ static inline void ptep_xchg_commit(struct mm_struct *mm, } else { *ptep = new; } + return old; } pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, @@ -231,7 +232,7 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_direct(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } @@ -246,7 +247,7 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } -- cgit From 115865fa0826ed18ca04717cf72d0fe874c0fe7f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jan 2017 09:29:09 +0100 Subject: mac80211: don't try to sleep in rate_control_rate_init() In my previous patch, I missed that rate_control_rate_init() is called from some places that cannot sleep, so it cannot call ieee80211_recalc_min_chandef(). Remove that call for now to fix the context bug, we'll have to find a different way to fix the minimum channel width issue. Fixes: 96aa2e7cf126 ("mac80211: calculate min channel width correctly") Reported-by: Xiaolong Ye (via lkp-robot) Signed-off-by: Johannes Berg --- net/mac80211/rate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 9e2641d45587..206698bc93f4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -40,8 +40,6 @@ void rate_control_rate_init(struct sta_info *sta) ieee80211_sta_set_rx_nss(sta); - ieee80211_recalc_min_chandef(sta->sdata); - if (!ref) return; -- cgit From ad8e66b4a80182174f73487ed25fd2140cf43361 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 28 Dec 2016 12:48:28 +0200 Subject: IB/srp: fix mr allocation when the device supports sg gaps If the device support arbitrary sg list mapping (device cap IB_DEVICE_SG_GAPS_REG set) we allocate the memory regions with IB_MR_TYPE_SG_GAPS. Fixes: 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Cc: # 4.7+ Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8ddc07123193..0f67cf909462 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -371,6 +371,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_desc *d; struct ib_mr *mr; int i, ret = -EINVAL; + enum ib_mr_type mr_type; if (pool_size <= 0) goto err; @@ -384,9 +385,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); + if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, - max_page_list_len); + mr = ib_alloc_mr(pd, mr_type, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); if (ret == -ENOMEM) -- cgit From 0a475ef4226e305bdcffe12b401ca1eab06c4913 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 4 Jan 2017 15:59:37 +0200 Subject: IB/srp: fix invalid indirect_sg_entries parameter value After setting indirect_sg_entries module_param to huge value (e.g 500,000), srp_alloc_req_data() fails to allocate indirect descriptors for the request ring (kmalloc fails). This commit enforces the maximum value of indirect_sg_entries to be SG_MAX_SEGMENTS as signified in module param description. Fixes: 65e8617fba17 (scsi: rename SCSI_MAX_{SG, SG_CHAIN}_SEGMENTS) Fixes: c07d424d6118 (IB/srp: add support for indirect tables that don't fit in SRP_CMD) Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Laurence Oberman Reviewed-by: Bart Van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0f67cf909462..79bf48477ddb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3699,6 +3699,12 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + if (indirect_sg_entries > SG_MAX_SEGMENTS) { + pr_warn("Clamping indirect_sg_entries to %u\n", + SG_MAX_SEGMENTS); + indirect_sg_entries = SG_MAX_SEGMENTS; + } + srp_remove_wq = create_workqueue("srp_remove"); if (!srp_remove_wq) { ret = -ENOMEM; -- cgit From 7b9e1d89e1b6a3b99a8fdd949aa0f98dd5bf2f6b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Jan 2017 17:22:01 +0200 Subject: MAINTAINERS: Add myself to X86 PLATFORM DRIVERS as a co-maintainer For last few months Darren and I are co-maintaining PDx86 subsystem. Make this fact official by updating MAINTAINERS database. Acked-by: Darren Hart Signed-off-by: Andy Shevchenko --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c36976d3bd1a..235d66237938 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13436,6 +13436,7 @@ F: arch/x86/ X86 PLATFORM DRIVERS M: Darren Hart +M: Andy Shevchenko L: platform-driver-x86@vger.kernel.org T: git git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git S: Maintained -- cgit From 1e5db6c31ade4150c2e2b1a21e39f776c38fea39 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 18 Jan 2017 00:40:39 +0200 Subject: IB/iser: Fix sg_tablesize calculation For devices that can register page list that is bigger than USHRT_MAX, we actually take the wrong value for sg_tablesize. E.g: for CX4 max_fast_reg_page_list_len is 65536 (bigger than USHRT_MAX) so we set sg_tablesize to 0 by mistake. Therefore, each IO that is bigger than 4k splitted to "< 4k" chunks that cause performance degredation. Remove wrong sg_tablesize assignment, and use the value that was set during address resolution handler with the needed casting. Cc: # v4.5+ Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9104e6b8cac9..1c911876556e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -651,13 +651,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } - /* - * Limit the sg_tablesize and max_sectors based on the device - * max fastreg page list length. - */ - shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, - ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); - if (iscsi_host_add(shost, ib_conn->device->ib_device->dma_device)) { mutex_unlock(&iser_conn->state_mutex); -- cgit From 83236f0157feec0f01bf688a1474b889bdcc5ad0 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 18 Jan 2017 00:40:40 +0200 Subject: IB/iser: remove unused variable from iser_conn struct max_sectors calculation was fixed in commit: 9c674815d346 ("IB/iser: Fix max_sectors calculation"). Thus, iser_conn variable scsi_max_sectors is not needed anymore. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Tested-by: Raju Rangoju Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ++++ drivers/infiniband/ulp/iser/iscsi_iser.h | 2 -- drivers/infiniband/ulp/iser/iser_verbs.c | 13 +------------ 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1c911876556e..e71af717e71b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -672,6 +672,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9; shost->max_sectors = min(iser_max_sectors, max_fr_sectors); + iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n", + iser_conn, shost->sg_tablesize, + shost->max_sectors); + if (cmds_max > max_cmds) { iser_info("cmds_max changed from %u to %u\n", cmds_max, max_cmds); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0be6a7c5ddb5..9d0b22ad58c1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -496,7 +496,6 @@ struct ib_conn { * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize - * @scsi_max_sectors: scsi host max sectors */ struct iser_conn { struct ib_conn ib_conn; @@ -519,7 +518,6 @@ struct iser_conn { struct iser_rx_desc *rx_descs; u32 num_rx_descs; unsigned short scsi_sg_tablesize; - unsigned int scsi_max_sectors; bool snd_w_inv; }; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 8ae7a3beddb7..6a9d1cb548ee 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -707,18 +707,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, device->ib_device->attrs.max_fast_reg_page_list_len); - if (sg_tablesize > sup_sg_tablesize) { - sg_tablesize = sup_sg_tablesize; - iser_conn->scsi_max_sectors = sg_tablesize * SIZE_4K / 512; - } else { - iser_conn->scsi_max_sectors = max_sectors; - } - - iser_conn->scsi_sg_tablesize = sg_tablesize; - - iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n", - iser_conn, iser_conn->scsi_sg_tablesize, - iser_conn->scsi_max_sectors); + iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); } /** -- cgit From bd00fdf198e2da475a2f4265a83686ab42d998a8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 24 Jan 2017 17:50:26 +0100 Subject: vfio/spapr: fail tce_iommu_attach_group() when iommu_data is null The recently added mediated VFIO driver doesn't know about powerpc iommu. It thus doesn't register a struct iommu_table_group in the iommu group upon device creation. The iommu_data pointer hence remains null. This causes a kernel oops when userspace tries to set the iommu type of a container associated with a mediated device to VFIO_SPAPR_TCE_v2_IOMMU. [ 82.585440] mtty mtty: MDEV: Registered [ 87.655522] iommu: Adding device 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 to group 10 [ 87.655527] vfio_mdev 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001: MDEV: group_id = 10 [ 116.297184] Unable to handle kernel paging request for data at address 0x00000030 [ 116.297389] Faulting instruction address: 0xd000000007870524 [ 116.297465] Oops: Kernel access of bad area, sig: 11 [#1] [ 116.297611] SMP NR_CPUS=2048 [ 116.297611] NUMA [ 116.297627] PowerNV ... [ 116.297954] CPU: 33 PID: 7067 Comm: qemu-system-ppc Not tainted 4.10.0-rc5-mdev-test #8 [ 116.297993] task: c000000e7718b680 task.stack: c000000e77214000 [ 116.298025] NIP: d000000007870524 LR: d000000007870518 CTR: 0000000000000000 [ 116.298064] REGS: c000000e77217990 TRAP: 0300 Not tainted (4.10.0-rc5-mdev-test) [ 116.298103] MSR: 9000000000009033 [ 116.298107] CR: 84004444 XER: 00000000 [ 116.298154] CFAR: c00000000000888c DAR: 0000000000000030 DSISR: 40000000 SOFTE: 1 GPR00: d000000007870518 c000000e77217c10 d00000000787b0ed c000000eed2103c0 GPR04: 0000000000000000 0000000000000000 c000000eed2103e0 0000000f24320000 GPR08: 0000000000000104 0000000000000001 0000000000000000 d0000000078729b0 GPR12: c00000000025b7e0 c00000000fe08400 0000000000000001 000001002d31d100 GPR16: 000001002c22c850 00003ffff315c750 0000000043145680 0000000043141bc0 GPR20: ffffffffffffffed fffffffffffff000 0000000020003b65 d000000007706018 GPR24: c000000f16cf0d98 d000000007706000 c000000003f42980 c000000003f42980 GPR28: c000000f1575ac00 c000000003f429c8 0000000000000000 c000000eed2103c0 [ 116.298504] NIP [d000000007870524] tce_iommu_attach_group+0x10c/0x360 [vfio_iommu_spapr_tce] [ 116.298555] LR [d000000007870518] tce_iommu_attach_group+0x100/0x360 [vfio_iommu_spapr_tce] [ 116.298601] Call Trace: [ 116.298610] [c000000e77217c10] [d000000007870518] tce_iommu_attach_group+0x100/0x360 [vfio_iommu_spapr_tce] (unreliable) [ 116.298671] [c000000e77217cb0] [d0000000077033a0] vfio_fops_unl_ioctl+0x278/0x3e0 [vfio] [ 116.298713] [c000000e77217d40] [c0000000002a3ebc] do_vfs_ioctl+0xcc/0x8b0 [ 116.298745] [c000000e77217de0] [c0000000002a4700] SyS_ioctl+0x60/0xc0 [ 116.298782] [c000000e77217e30] [c00000000000b220] system_call+0x38/0xfc [ 116.298812] Instruction dump: [ 116.298828] 7d3f4b78 409effc8 3d220000 e9298020 3c800140 38a00018 608480c0 e8690028 [ 116.298869] 4800249d e8410018 7c7f1b79 41820230 2fa90000 419e0114 e9090020 [ 116.298914] ---[ end trace 1e10b0ced08b9120 ]--- This patch fixes the oops. Reported-by: Vaibhav Jain Signed-off-by: Greg Kurz Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index c8823578a1b2..128d10282d16 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1270,6 +1270,10 @@ static int tce_iommu_attach_group(void *iommu_data, /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ table_group = iommu_group_get_iommudata(iommu_group); + if (!table_group) { + ret = -ENODEV; + goto unlock_exit; + } if (tce_groups_attached(container) && (!table_group->ops || !table_group->ops->take_ownership || -- cgit From b1a27eac7fefff33ccf6acc919fc0725bf9815fb Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 22 Jan 2017 14:41:22 +0100 Subject: IB/cxgb3: fix misspelling in header guard Use CXGB3_... instead of CXBG3_... Fixes: a85fb3383340 ("IB/cxgb3: Move user vendor structures") Cc: stable@vger.kernel.org # 4.9 Signed-off-by: Nicolas Iooss Reviewed-by: Leon Romanovsky Acked-by: Steve Wise Signed-off-by: Doug Ledford --- include/uapi/rdma/cxgb3-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h index 48a19bda071b..d24eee12128f 100644 --- a/include/uapi/rdma/cxgb3-abi.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -30,7 +30,7 @@ * SOFTWARE. */ #ifndef CXGB3_ABI_USER_H -#define CXBG3_ABI_USER_H +#define CXGB3_ABI_USER_H #include -- cgit From f39aac7e839368e3895dff952f3bfa0a22e20060 Mon Sep 17 00:00:00 2001 From: Jingju Hou Date: Sun, 22 Jan 2017 18:20:56 +0800 Subject: net: phy: marvell: Add Wake from LAN support for 88E1510 PHY Signed-off-by: Jingju Hou Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 0b78210c0fa7..ed0d235cf850 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1679,6 +1679,8 @@ static struct phy_driver marvell_drivers[] = { .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .did_interrupt = &m88e1121_did_interrupt, + .get_wol = &m88e1318_get_wol, + .set_wol = &m88e1318_set_wol, .resume = &marvell_resume, .suspend = &marvell_suspend, .get_sset_count = marvell_get_sset_count, -- cgit From 059aa734824165507c65fd30a55ff000afd14983 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Jan 2017 14:04:29 -0500 Subject: nfs: Don't increment lock sequence ID after NFS4ERR_MOVED Xuan Qi reports that the Linux NFSv4 client failed to lock a file that was migrated. The steps he observed on the wire: 1. The client sent a LOCK request to the source server 2. The source server replied NFS4ERR_MOVED 3. The client switched to the destination server 4. The client sent the same LOCK request to the destination server with a bumped lock sequence ID 5. The destination server rejected the LOCK request with NFS4ERR_BAD_SEQID RFC 3530 section 8.1.5 provides a list of NFS errors which do not bump a lock sequence ID. However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section 9.1.7, this list has been updated by the addition of NFS4ERR_MOVED. Reported-by: Xuan Qi Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index bca536341d1a..1b1ca04820a3 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -282,7 +282,7 @@ enum nfsstat4 { static inline bool seqid_mutating_err(u32 err) { - /* rfc 3530 section 8.1.5: */ + /* See RFC 7530, section 9.1.7 */ switch (err) { case NFS4ERR_STALE_CLIENTID: case NFS4ERR_STALE_STATEID: @@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_BADXDR: case NFS4ERR_RESOURCE: case NFS4ERR_NOFILEHANDLE: + case NFS4ERR_MOVED: return false; }; return true; -- cgit From a430607b2ef7c3be090f88c71cfcb1b3988aa7c0 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 24 Jan 2017 11:34:20 -0500 Subject: NFSv4.0: always send mode in SETATTR after EXCLUSIVE4 Some nfsv4.0 servers may return a mode for the verifier following an open with EXCLUSIVE4 createmode, but this does not mean the client should skip setting the mode in the following SETATTR. It should only do that for EXCLUSIVE4_1 or UNGAURDED createmode. Fixes: 5334c5bdac92 ("NFS: Send attributes in OPEN request for NFS4_CREATE_EXCLUSIVE4_1") Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 59bb574d7d7c..0a0eaecf9676 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2700,7 +2700,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) -- cgit From 92fdb527eecff7e5eb945a3fbf4743110f5c1171 Mon Sep 17 00:00:00 2001 From: Yuriy Kolerov Date: Wed, 28 Dec 2016 11:46:26 +0300 Subject: ARCv2: MCIP: Deprecate setting of affinity in Device Tree Ignore value of interrupt distribution mode for common interrupts in IDU since setting of affinity using value from Device Tree is deprecated in ARC. Originally it is done in idu_irq_xlate() function and it is semantically wrong and does not guaranty that an affinity value will be set properly. idu_irq_enable() function is better place for initialization of common interrupts. By default send all common interrupts to all available online CPUs. The affinity of common interrupts in IDU must be set manually since in some cases the kernel will not call irq_set_affinity() by itself: 1. When the kernel is not configured with support of SMP. 2. When the kernel is configured with support of SMP but upper interrupt controllers does not support setting of the affinity and cannot propagate it to IDU. Signed-off-by: Yuriy Kolerov Signed-off-by: Vineet Gupta --- .../interrupt-controller/snps,archs-idu-intc.txt | 3 ++ arch/arc/kernel/mcip.c | 52 +++++++++------------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt index 0dcb7c7d3e40..944657684d73 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt @@ -15,6 +15,9 @@ Properties: Second cell specifies the irq distribution mode to cores 0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + The second cell in interrupts property is deprecated and may be ignored by + the kernel. + intc accessed via the special ARC AUX register interface, hence "reg" property is not specified. diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 9274f8ade8c7..9988b427a1e0 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -175,7 +175,6 @@ static void idu_irq_unmask(struct irq_data *data) raw_spin_unlock_irqrestore(&mcip_lock, flags); } -#ifdef CONFIG_SMP static int idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, bool force) @@ -205,12 +204,27 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, return IRQ_SET_MASK_OK; } -#endif + +static void idu_irq_enable(struct irq_data *data) +{ + /* + * By default send all common interrupts to all available online CPUs. + * The affinity of common interrupts in IDU must be set manually since + * in some cases the kernel will not call irq_set_affinity() by itself: + * 1. When the kernel is not configured with support of SMP. + * 2. When the kernel is configured with support of SMP but upper + * interrupt controllers does not support setting of the affinity + * and cannot propagate it to IDU. + */ + idu_irq_set_affinity(data, cpu_online_mask, false); + idu_irq_unmask(data); +} static struct irq_chip idu_irq_chip = { .name = "MCIP IDU Intc", .irq_mask = idu_irq_mask, .irq_unmask = idu_irq_unmask, + .irq_enable = idu_irq_enable, #ifdef CONFIG_SMP .irq_set_affinity = idu_irq_set_affinity, #endif @@ -243,36 +257,14 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type) { - irq_hw_number_t hwirq = *out_hwirq = intspec[0]; - int distri = intspec[1]; - unsigned long flags; - + /* + * Ignore value of interrupt distribution mode for common interrupts in + * IDU which resides in intspec[1] since setting an affinity using value + * from Device Tree is deprecated in ARC. + */ + *out_hwirq = intspec[0]; *out_type = IRQ_TYPE_NONE; - /* XXX: validate distribution scheme again online cpu mask */ - if (distri == 0) { - /* 0 - Round Robin to all cpus, otherwise 1 bit per core */ - raw_spin_lock_irqsave(&mcip_lock, flags); - idu_set_dest(hwirq, BIT(num_online_cpus()) - 1); - idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); - raw_spin_unlock_irqrestore(&mcip_lock, flags); - } else { - /* - * DEST based distribution for Level Triggered intr can only - * have 1 CPU, so generalize it to always contain 1 cpu - */ - int cpu = ffs(distri); - - if (cpu != fls(distri)) - pr_warn("IDU irq %lx distri mode set to cpu %x\n", - hwirq, cpu); - - raw_spin_lock_irqsave(&mcip_lock, flags); - idu_set_dest(hwirq, cpu); - idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST); - raw_spin_unlock_irqrestore(&mcip_lock, flags); - } - return 0; } -- cgit From 3f5c34c6d4688b3b7e1dbc7bbc68a2f03a0d6b0c Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Tue, 24 Jan 2017 10:12:00 -0800 Subject: Input: wm97xx - make missing platform data non-fatal Commit 6480af4915d6 ("power_supply: wm97xx_battery: use power_supply_get_drvdata") made wm97xx platform data mandatory, although it's still optional. This patch fixes an oops during driver probe on one of my MIPS boards with a wm9712. Signed-off-by: Manuel Lauss Reviewed-by: Robert Jarzmik Acked-by: Charles Keepax Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wm97xx-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index 83cf11312fd9..c9d1c91e1887 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -682,7 +682,7 @@ static int wm97xx_probe(struct device *dev) } platform_set_drvdata(wm->battery_dev, wm); wm->battery_dev->dev.parent = dev; - wm->battery_dev->dev.platform_data = pdata->batt_pdata; + wm->battery_dev->dev.platform_data = pdata ? pdata->batt_pdata : NULL; ret = platform_device_add(wm->battery_dev); if (ret < 0) goto batt_reg_err; -- cgit From 7630ea4bda18df2ee1c64dfdca1724a9cc32f920 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 22 Jan 2017 17:41:32 +0100 Subject: Documentation: net: phy: improve explanation when to specify the PHY ID The old description basically read like "ethernet-phy-idAAAA.BBBB" can be specified when you know the actual PHY ID. However, specifying this has a side-effect: it forces Linux to bind to a certain PHY driver (the one that matches the ID given in the compatible string), ignoring the ID which is reported by the actual PHY. Whenever a device is shipped with (multiple) different PHYs during it's production lifetime then explicitly specifying "ethernet-phy-idAAAA.BBBB" could break certain revisions of that device. Signed-off-by: Martin Blumenstingl Reviewed-by: Andrew Lunn Acked-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/phy.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index ff1bc4b1bb3b..fb5056b22685 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -19,8 +19,9 @@ Optional Properties: specifications. If neither of these are specified, the default is to assume clause 22. - If the phy's identifier is known then the list may contain an entry - of the form: "ethernet-phy-idAAAA.BBBB" where + If the PHY reports an incorrect ID (or none at all) then the + "compatible" list may contain an entry with the correct PHY ID in the + form: "ethernet-phy-idAAAA.BBBB" where AAAA - The value of the 16 bit Phy Identifier 1 register as 4 hex digits. This is the chip vendor OUI bits 3:18 BBBB - The value of the 16 bit Phy Identifier 2 register as -- cgit From 6a0b76c04ec157c88ca943debf78a8ee58469f2d Mon Sep 17 00:00:00 2001 From: hayeswang Date: Mon, 23 Jan 2017 14:18:43 +0800 Subject: r8152: don't execute runtime suspend if the tx is not empty Runtime suspend shouldn't be executed if the tx queue is not empty, because the device is not idle. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0e99af090734..e1466b4d2b6c 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "6" +#define NET_VERSION "7" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -3574,6 +3574,8 @@ static bool delay_autosuspend(struct r8152 *tp) */ if (!sw_linking && tp->rtl_ops.in_nway(tp)) return true; + else if (!skb_queue_empty(&tp->tx_queue)) + return true; else return false; } -- cgit From a59b7e0246774e28193126fe7fdbbd0ae9c67dcc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 23 Jan 2017 11:11:42 +0100 Subject: mlxsw: spectrum_router: Correctly reallocate adjacency entries mlxsw_sp_nexthop_group_mac_update() is called in one of two cases: 1) When the MAC of a nexthop needs to be updated 2) When the size of a nexthop group has changed In the second case the adjacency entries for the nexthop group need to be reallocated from the adjacency table. In this case we must write to the entries the MAC addresses of all the nexthops that should be offloaded and not only those whose MAC changed. Otherwise, these entries would be filled with garbage data, resulting in packet loss. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 01d0efa9c5c7..9e494a446b7e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1172,7 +1172,8 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) + struct mlxsw_sp_nexthop_group *nh_grp, + bool reallocate) { u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; @@ -1187,7 +1188,7 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, continue; } - if (nh->update) { + if (nh->update || reallocate) { err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, adj_index, nh); if (err) @@ -1248,7 +1249,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, + false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1276,7 +1278,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; -- cgit From 36425cd67052e3becf325fd4d3ba5691791ef7e4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 24 Jan 2017 10:23:42 -0800 Subject: ARC: udelay: fix inline assembler by adding LP_COUNT to clobber list commit 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint") modified the inline assembly to setup LP_COUNT register manually and NOT rely on gcc to do it (with the +l inline assembler contraint hint, now being retired in the compiler) However the fix was flawed as we didn't add LP_COUNT to asm clobber list, meaning gcc doesn't know that LP_COUNT or zero-delay-loops are in action in the inline asm. This resulted in some fun - as nested ZOL loops were being generared | mov lp_count,250000 ;16 # tmp235, | lp .L__GCC__LP14 # <======= OUTER LOOP (gcc generated) | .L14: | ld r2, [r5] # MEM[(volatile u32 *)prephitmp_43], w | dmb 1 | breq r2, -1, @.L21 #, w,, | bbit0 r2,1,@.L13 # w,, | ld r4,[r7] ;25 # loops_per_jiffy, loops_per_jiffy | mpymu r3,r4,r6 #, loops_per_jiffy, tmp234 | | mov lp_count, r3 # <====== INNER LOOP (from inline asm) | lp 1f | nop | 1: | nop_s | .L__GCC__LP14: ; loop end, start is @.L14 #, This caused issues with drivers relying on sane behaviour of udelay friends. With LP_COUNT added to clobber list, gcc doesn't generate the outer loop in say above case. Addresses STAR 9001146134 Reported-by: Joao Pinto Fixes: 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint") Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/delay.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index a36e8601114d..d5da2115d78a 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -26,7 +26,9 @@ static inline void __delay(unsigned long loops) " lp 1f \n" " nop \n" "1: \n" - : : "r"(loops)); + : + : "r"(loops) + : "lp_count"); } extern void __bad_udelay(void); -- cgit From 517e7610d2ce04d1b8d8b6c6d1a36dcce5cac6ab Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 19 Jan 2017 17:05:00 -0800 Subject: ARCv2: MCIP: update the BCR per current changes Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 3 +-- include/soc/arc/mcip.h | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 9988b427a1e0..9f6b68fd4f3b 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -93,11 +93,10 @@ static void mcip_probe_n_setup(void) READ_BCR(ARC_REG_MCIP_BCR, mp); sprintf(smp_cpuinfo_buf, - "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n", + "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n", mp.ver, mp.num_cores, IS_AVAIL1(mp.ipi, "IPI "), IS_AVAIL1(mp.idu, "IDU "), - IS_AVAIL1(mp.llm, "LLM "), IS_AVAIL1(mp.dbg, "DEBUG "), IS_AVAIL1(mp.gfrc, "GFRC")); diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index 6902c2a8bd23..4b6b489a8d7c 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -55,17 +55,17 @@ struct mcip_cmd { struct mcip_bcr { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad3:8, - idu:1, llm:1, num_cores:6, - iocoh:1, gfrc:1, dbg:1, pad2:1, - msg:1, sem:1, ipi:1, pad:1, + unsigned int pad4:6, pw_dom:1, pad3:1, + idu:1, pad2:1, num_cores:6, + pad:1, gfrc:1, dbg:1, pw:1, + msg:1, sem:1, ipi:1, slv:1, ver:8; #else unsigned int ver:8, - pad:1, ipi:1, sem:1, msg:1, - pad2:1, dbg:1, gfrc:1, iocoh:1, - num_cores:6, llm:1, idu:1, - pad3:8; + slv:1, ipi:1, sem:1, msg:1, + pw:1, dbg:1, gfrc:1, pad:1, + num_cores:6, pad2:1, idu:1, + pad3:1, pw_dom:1, pad4:6; #endif }; -- cgit From bf02454a741b58682a82c314a9a46bed930ed2f7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 12 Jan 2017 14:30:29 -0800 Subject: ARC: smp-boot: Decouple Non masters waiting API from jump to entry point For run-on-reset SMP configs, non master cores call a routine which waits until Master gives it a "go" signal (currently using a shared mem flag). The same routine then jumps off the well known entry point of all non Master cores i.e. @first_lines_of_secondary This patch moves out the last part into one single place in early boot code. This is better in terms of absraction (the wait API only waits) and returns, leaving out the "jump off to" part. In actual implementation this requires some restructuring of the early boot code as well as Master now jumps to BSS setup explicitly, vs. falling thru into it before. Technically this patch doesn't cause any functional change, it just moves the ugly #ifdef'ry from assembly code to "C" Signed-off-by: Vineet Gupta --- arch/arc/kernel/head.S | 14 +++++++------- arch/arc/kernel/smp.c | 6 ++++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 689dd867fdff..8b90d25a15cc 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -71,14 +71,14 @@ ENTRY(stext) GET_CPU_ID r5 cmp r5, 0 mov.nz r0, r5 -#ifdef CONFIG_ARC_SMP_HALT_ON_RESET - ; Non-Master can proceed as system would be booted sufficiently - jnz first_lines_of_secondary -#else + bz .Lmaster_proceed + ; Non-Masters wait for Master to boot enough and bring them up - jnz arc_platform_smp_wait_to_boot -#endif - ; Master falls thru + ; when they resume, tail-call to entry point + mov blink, @first_lines_of_secondary + j arc_platform_smp_wait_to_boot + +.Lmaster_proceed: #endif ; Clear BSS before updating any globals diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 88674d972c9d..44a0d21ed342 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -98,14 +98,16 @@ static void arc_default_smp_cpu_kick(int cpu, unsigned long pc) void arc_platform_smp_wait_to_boot(int cpu) { + /* for halt-on-reset, we've waited already */ + if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET)) + return; + while (wake_flag != cpu) ; wake_flag = 0; - __asm__ __volatile__("j @first_lines_of_secondary \n"); } - const char *arc_platform_smp_cpuinfo(void) { return plat_smp_ops.info ? : ""; -- cgit From 7d211c81e97ef8505610ef82e14e302ab415bad1 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Thu, 19 Jan 2017 13:20:39 -0800 Subject: IB/vmw_pvrdma: Don't leak info from alloc_ucontext Clear out the user response struct correctly. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Reported-by: Dan Carpenter Signed-off-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 54891370d18a..c2aa52638dcb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -306,7 +306,7 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_uc *cmd = &req.create_uc; struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; - struct pvrdma_alloc_ucontext_resp uresp; + struct pvrdma_alloc_ucontext_resp uresp = {0}; int ret; void *ptr; -- cgit From ff89b070b7c98eb6782361310ca7a15186f15b2c Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Thu, 19 Jan 2017 13:20:40 -0800 Subject: IB/vmw_pvrdma: Fix incorrect cleanup on pvrdma_pci_probe error path If the interrupt allocation failed we should start freeing the CQ rings rather than unregistering the netdev notifier. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 231a1ce1f4be..bd8fbd3d2032 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1029,7 +1029,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, if (ret) { dev_err(&pdev->dev, "failed to allocate interrupts\n"); ret = -ENOMEM; - goto err_netdevice; + goto err_free_cq_ring; } /* Allocate UAR table. */ @@ -1092,8 +1092,6 @@ err_free_uar_table: err_free_intrs: pvrdma_free_irq(dev); pvrdma_disable_msi_all(dev); -err_netdevice: - unregister_netdevice_notifier(&dev->nb_netdev); err_free_cq_ring: pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); err_free_async_ring: -- cgit From d46d29f072accb069cb42b5fbebcc77d9094a785 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 11 Jan 2017 13:38:52 -0800 Subject: md/raid5-cache: delete meaningless code sector_t is unsigned long, it's never < 0 Reported-by: Julia Lawall Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 0e8ed2c327b0..e0d6dd1835d0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1393,8 +1393,6 @@ static void r5l_do_reclaim(struct r5l_log *log) next_checkpoint = r5c_calculate_new_cp(conf); spin_unlock_irq(&log->io_list_lock); - BUG_ON(reclaimable < 0); - if (reclaimable == 0 || !write_super) return; -- cgit From 86aa1397ddfde563b3692adadb8b8e32e97b4e5e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Jan 2017 17:22:41 -0800 Subject: md/r5cache: read data into orig_page for prexor of cached data With write back cache, we use orig_page to do prexor. This patch makes sure we read data into orig_page for it. Flag R5_OrigPageUPTDODATE is added to show whether orig_page has the latest data from raid disk. We introduce a helper function uptodate_for_rmw() to simplify the a couple conditions in handle_stripe_dirtying(). Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 2 ++ drivers/md/raid5.c | 44 +++++++++++++++++++++++++++++++++++--------- drivers/md/raid5.h | 5 +++++ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index e0d6dd1835d0..95dcaa022e1f 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2349,6 +2349,8 @@ void r5c_release_extra_page(struct stripe_head *sh) struct page *p = sh->dev[i].orig_page; sh->dev[i].orig_page = sh->dev[i].page; + clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); + if (!using_disk_info_extra_page) put_page(p); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 36c13e4be9c9..7780ae44f355 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1015,7 +1015,17 @@ again: if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); - sh->dev[i].vec.bv_page = sh->dev[i].page; + + if (!op_is_write(op) && + test_bit(R5_InJournal, &sh->dev[i].flags)) + /* + * issuing read for a page in journal, this + * must be preparing for prexor in rmw; read + * the data into orig_page + */ + sh->dev[i].vec.bv_page = sh->dev[i].orig_page; + else + sh->dev[i].vec.bv_page = sh->dev[i].page; bi->bi_vcnt = 1; bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_offset = 0; @@ -2380,6 +2390,13 @@ static void raid5_end_read_request(struct bio * bi) } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); + if (test_bit(R5_InJournal, &sh->dev[i].flags)) + /* + * end read for a page in journal, this + * must be preparing for prexor in rmw + */ + set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); + if (atomic_read(&rdev->read_errors)) atomic_set(&rdev->read_errors, 0); } else { @@ -3594,6 +3611,21 @@ unhash: break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); } +/* + * For RMW in write back cache, we need extra page in prexor to store the + * old data. This page is stored in dev->orig_page. + * + * This function checks whether we have data for prexor. The exact logic + * is: + * R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE) + */ +static inline bool uptodate_for_rmw(struct r5dev *dev) +{ + return (test_bit(R5_UPTODATE, &dev->flags)) && + (!test_bit(R5_InJournal, &dev->flags) || + test_bit(R5_OrigPageUPTDODATE, &dev->flags)); +} + static int handle_stripe_dirtying(struct r5conf *conf, struct stripe_head *sh, struct stripe_head_state *s, @@ -3625,9 +3657,7 @@ static int handle_stripe_dirtying(struct r5conf *conf, if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx || test_bit(R5_InJournal, &dev->flags)) && !test_bit(R5_LOCKED, &dev->flags) && - !((test_bit(R5_UPTODATE, &dev->flags) && - (!test_bit(R5_InJournal, &dev->flags) || - dev->page != dev->orig_page)) || + !(uptodate_for_rmw(dev) || test_bit(R5_Wantcompute, &dev->flags))) { if (test_bit(R5_Insync, &dev->flags)) rmw++; @@ -3639,7 +3669,6 @@ static int handle_stripe_dirtying(struct r5conf *conf, i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_InJournal, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags))) { if (test_bit(R5_Insync, &dev->flags)) rcw++; @@ -3693,9 +3722,7 @@ static int handle_stripe_dirtying(struct r5conf *conf, i == sh->pd_idx || i == sh->qd_idx || test_bit(R5_InJournal, &dev->flags)) && !test_bit(R5_LOCKED, &dev->flags) && - !((test_bit(R5_UPTODATE, &dev->flags) && - (!test_bit(R5_InJournal, &dev->flags) || - dev->page != dev->orig_page)) || + !(uptodate_for_rmw(dev) || test_bit(R5_Wantcompute, &dev->flags)) && test_bit(R5_Insync, &dev->flags)) { if (test_bit(STRIPE_PREREAD_ACTIVE, @@ -3722,7 +3749,6 @@ static int handle_stripe_dirtying(struct r5conf *conf, i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_InJournal, &dev->flags) || test_bit(R5_Wantcompute, &dev->flags))) { rcw++; if (test_bit(R5_Insync, &dev->flags) && diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index ed8e1362ab36..461df197d157 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -322,6 +322,11 @@ enum r5dev_flags { * data and parity being written are in the journal * device */ + R5_OrigPageUPTDODATE, /* with write back cache, we read old data into + * dev->orig_page for prexor. When this flag is + * set, orig_page contains latest data in the + * raid disk. + */ }; /* -- cgit From ba02684daf7fb4a827580f909b7c7db61c05ae7d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 12 Jan 2017 17:22:42 -0800 Subject: md/raid5: move comment of fetch_block to right location Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7780ae44f355..13d76767c2cf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3312,13 +3312,6 @@ static int want_replace(struct stripe_head *sh, int disk_idx) return rv; } -/* fetch_block - checks the given member device to see if its data needs - * to be read or computed to satisfy a request. - * - * Returns 1 when no more member devices need to be checked, otherwise returns - * 0 to tell the loop in handle_stripe_fill to continue - */ - static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, int disk_idx, int disks) { @@ -3409,6 +3402,12 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, return 0; } +/* fetch_block - checks the given member device to see if its data needs + * to be read or computed to satisfy a request. + * + * Returns 1 when no more member devices need to be checked, otherwise returns + * 0 to tell the loop in handle_stripe_fill to continue + */ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, int disk_idx, int disks) { -- cgit From a85dd7b8df52e35d8ee3794c65cac5c39128fd80 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Jan 2017 17:12:57 -0800 Subject: md/r5cache: flush data only stripes in r5l_recovery_log() For safer operation, all arrays start in write-through mode, which has been better tested and is more mature. And actually the write-through/write-mode isn't persistent after array restarted, so we always start array in write-through mode. However, if recovery found data-only stripes before the shutdown (from previous write-back mode), it is not safe to start the array in write-through mode, as write-through mode can not handle stripes with data in write-back cache. To solve this problem, we flush all data-only stripes in r5l_recovery_log(). When r5l_recovery_log() returns, the array starts with empty cache in write-through mode. This logic is implemented in r5c_recovery_flush_data_only_stripes(): 1. enable write back cache 2. flush all stripes 3. wake up conf->mddev->thread 4. wait for all stripes get flushed (reuse wait_for_quiescent) 5. disable write back cache The wait in 4 will be waked up in release_inactive_stripe_list() when conf->active_stripes reaches 0. It is safe to wake up mddev->thread here because all the resource required for the thread has been initialized. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/md.c | 5 +++++ drivers/md/raid5-cache.c | 56 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 82821ee0d57f..01175dac0db6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev) if (start_readonly && mddev->ro == 0) mddev->ro = 2; /* read-only, but switch on first write */ + /* + * NOTE: some pers->run(), for example r5l_recovery_log(), wakes + * up mddev->thread. It is important to initialize critical + * resources for mddev->thread BEFORE calling pers->run(). + */ err = pers->run(mddev); if (err) pr_warn("md: pers->run() failed ...\n"); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 95dcaa022e1f..3d7dda85494c 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2060,7 +2060,7 @@ static int r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, struct r5l_recovery_ctx *ctx) { - struct stripe_head *sh, *next; + struct stripe_head *sh; struct mddev *mddev = log->rdev->mddev; struct page *page; sector_t next_checkpoint = MaxSector; @@ -2074,7 +2074,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, WARN_ON(list_empty(&ctx->cached_list)); - list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { + list_for_each_entry(sh, &ctx->cached_list, lru) { struct r5l_meta_block *mb; int i; int offset; @@ -2124,14 +2124,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, ctx->pos = write_pos; ctx->seq += 1; next_checkpoint = sh->log_start; - list_del_init(&sh->lru); - raid5_release_stripe(sh); } log->next_checkpoint = next_checkpoint; __free_page(page); return 0; } +static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log, + struct r5l_recovery_ctx *ctx) +{ + struct mddev *mddev = log->rdev->mddev; + struct r5conf *conf = mddev->private; + struct stripe_head *sh, *next; + + if (ctx->data_only_stripes == 0) + return; + + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK; + + list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { + r5c_make_stripe_write_out(sh); + set_bit(STRIPE_HANDLE, &sh->state); + list_del_init(&sh->lru); + raid5_release_stripe(sh); + } + + md_wakeup_thread(conf->mddev->thread); + /* reuse conf->wait_for_quiescent in recovery */ + wait_event(conf->wait_for_quiescent, + atomic_read(&conf->active_stripes) == 0); + + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; +} + static int r5l_recovery_log(struct r5l_log *log) { struct mddev *mddev = log->rdev->mddev; @@ -2158,32 +2183,31 @@ static int r5l_recovery_log(struct r5l_log *log) pos = ctx.pos; ctx.seq += 10000; - if (ctx.data_only_stripes == 0) { - log->next_checkpoint = ctx.pos; - r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); - ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); - } if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) pr_debug("md/raid:%s: starting from clean shutdown\n", mdname(mddev)); - else { + else pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", mdname(mddev), ctx.data_only_stripes, ctx.data_parity_stripes); - if (ctx.data_only_stripes > 0) - if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { - pr_err("md/raid:%s: failed to rewrite stripes to journal\n", - mdname(mddev)); - return -EIO; - } + if (ctx.data_only_stripes == 0) { + log->next_checkpoint = ctx.pos; + r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++); + ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); + } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { + pr_err("md/raid:%s: failed to rewrite stripes to journal\n", + mdname(mddev)); + return -EIO; } log->log_start = ctx.pos; log->seq = ctx.seq; log->last_checkpoint = pos; r5l_write_super(log, pos); + + r5c_recovery_flush_data_only_stripes(log, &ctx); return 0; } -- cgit From 07e83364845e1e1c7e189a01206a9d7d33831568 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 23 Jan 2017 17:12:58 -0800 Subject: md/r5cache: shift complex rmw from read path to write path Write back cache requires a complex RMW mechanism, where old data is read into dev->orig_page for prexor, and then xor is done with dev->page. This logic is already implemented in the write path. However, current read path is not awared of this requirement. When the array is optimal, the RMW is not required, as the data are read from raid disks. However, when the target stripe is degraded, complex RMW is required to generate right data. To keep read path as clean as possible, we handle read path by flushing degraded, in-journal stripes before processing reads to missing dev. Specifically, when there is read requests to a degraded stripe with data in journal, handle_stripe_fill() calls r5c_make_stripe_write_out() and exits. Then handle_stripe_dirtying() will do the complex RMW and flush the stripe to RAID disks. After that, read requests are handled. There is one more corner case when there is non-overwrite bio for the missing (or out of sync) dev. handle_stripe_dirtying() will not be able to process the non-overwrite bios without constructing the data in handle_stripe_fill(). This is fixed by delaying non-overwrite bios in handle_stripe_dirtying(). So handle_stripe_fill() works on these bios after the stripe is flushed to raid disks. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 13d76767c2cf..dc83da69ca7c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2897,6 +2897,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) return r_sector; } +/* + * There are cases where we want handle_stripe_dirtying() and + * schedule_reconstruction() to delay towrite to some dev of a stripe. + * + * This function checks whether we want to delay the towrite. Specifically, + * we delay the towrite when: + * + * 1. degraded stripe has a non-overwrite to the missing dev, AND this + * stripe has data in journal (for other devices). + * + * In this case, when reading data for the non-overwrite dev, it is + * necessary to handle complex rmw of write back cache (prexor with + * orig_page, and xor with page). To keep read path simple, we would + * like to flush data in journal to RAID disks first, so complex rmw + * is handled in the write patch (handle_stripe_dirtying). + * + */ +static inline bool delay_towrite(struct r5dev *dev, + struct stripe_head_state *s) +{ + return !test_bit(R5_OVERWRITE, &dev->flags) && + !test_bit(R5_Insync, &dev->flags) && s->injournal; +} + static void schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int rcw, int expand) @@ -2917,7 +2941,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->towrite) { + if (dev->towrite && !delay_towrite(dev, s)) { set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantdrain, &dev->flags); if (!expand) @@ -3494,10 +3518,26 @@ static void handle_stripe_fill(struct stripe_head *sh, * midst of changing due to a write */ if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && - !sh->reconstruct_state) + !sh->reconstruct_state) { + + /* + * For degraded stripe with data in journal, do not handle + * read requests yet, instead, flush the stripe to raid + * disks first, this avoids handling complex rmw of write + * back cache (prexor with orig_page, and then xor with + * page) in the read path + */ + if (s->injournal && s->failed) { + if (test_bit(STRIPE_R5C_CACHING, &sh->state)) + r5c_make_stripe_write_out(sh); + goto out; + } + for (i = disks; i--; ) if (fetch_block(sh, s, i, disks)) break; + } +out: set_bit(STRIPE_HANDLE, &sh->state); } @@ -3653,7 +3693,8 @@ static int handle_stripe_dirtying(struct r5conf *conf, } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ struct r5dev *dev = &sh->dev[i]; - if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx || + if (((dev->towrite && !delay_towrite(dev, s)) || + i == sh->pd_idx || i == sh->qd_idx || test_bit(R5_InJournal, &dev->flags)) && !test_bit(R5_LOCKED, &dev->flags) && !(uptodate_for_rmw(dev) || @@ -3717,7 +3758,7 @@ static int handle_stripe_dirtying(struct r5conf *conf, for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if ((dev->towrite || + if (((dev->towrite && !delay_towrite(dev, s)) || i == sh->pd_idx || i == sh->qd_idx || test_bit(R5_InJournal, &dev->flags)) && !test_bit(R5_LOCKED, &dev->flags) && -- cgit From 2e38a37f23c98d7fad87ff022670060b8a0e2bf5 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 24 Jan 2017 10:45:30 -0800 Subject: md/r5cache: disable write back for degraded array write-back cache in degraded mode introduces corner cases to the array. Although we try to cover all these corner cases, it is safer to just disable write-back cache when the array is in degraded mode. In this patch, we disable writeback cache for degraded mode: 1. On device failure, if the array enters degraded mode, raid5_error() will submit async job r5c_disable_writeback_async to disable writeback; 2. In r5c_journal_mode_store(), it is invalid to enable writeback in degraded mode; 3. In r5c_try_caching_write(), stripes with s->failed>0 will be handled in write-through mode. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/raid5.c | 15 ++++++++------- drivers/md/raid5.h | 2 ++ 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 3d7dda85494c..302dea3296ba 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -162,6 +162,8 @@ struct r5l_log { /* to submit async io_units, to fulfill ordering of flush */ struct work_struct deferred_io_work; + /* to disable write back during in degraded mode */ + struct work_struct disable_writeback_work; }; /* @@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work) r5l_do_submit_io(log, io); } +static void r5c_disable_writeback_async(struct work_struct *work) +{ + struct r5l_log *log = container_of(work, struct r5l_log, + disable_writeback_work); + struct mddev *mddev = log->rdev->mddev; + + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) + return; + pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n", + mdname(mddev)); + mddev_suspend(mddev); + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; + mddev_resume(mddev); +} + static void r5l_submit_current_io(struct r5l_log *log) { struct r5l_io_unit *io = log->current_io; @@ -2269,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev, val > R5C_JOURNAL_MODE_WRITE_BACK) return -EINVAL; + if (raid5_calc_degraded(conf) > 0 && + val == R5C_JOURNAL_MODE_WRITE_BACK) + return -EINVAL; + mddev_suspend(mddev); conf->log->r5c_journal_mode = val; mddev_resume(mddev); @@ -2323,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf, set_bit(STRIPE_R5C_CACHING, &sh->state); } + /* + * When run in degraded mode, array is set to write-through mode. + * This check helps drain pending write safely in the transition to + * write-through mode. + */ + if (s->failed) { + r5c_make_stripe_write_out(sh); + return -EAGAIN; + } + for (i = disks; i--; ) { dev = &sh->dev[i]; /* if non-overwrite, use writing-out phase */ @@ -2579,6 +2610,19 @@ ioerr: return ret; } +void r5c_update_on_rdev_error(struct mddev *mddev) +{ + struct r5conf *conf = mddev->private; + struct r5l_log *log = conf->log; + + if (!log) + return; + + if (raid5_calc_degraded(conf) > 0 && + conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK) + schedule_work(&log->disable_writeback_work); +} + int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -2651,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) spin_lock_init(&log->no_space_stripes_lock); INIT_WORK(&log->deferred_io_work, r5l_submit_io_async); + INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async); log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; INIT_LIST_HEAD(&log->stripe_in_journal_list); @@ -2683,6 +2728,7 @@ io_kc: void r5l_exit_log(struct r5l_log *log) { + flush_work(&log->disable_writeback_work); md_unregister_thread(&log->reclaim_thread); mempool_destroy(log->meta_pool); bioset_free(log->bs); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dc83da69ca7c..3c7e106c12a2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -556,7 +556,7 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, * of the two sections, and some non-in_sync devices may * be insync in the section most affected by failed devices. */ -static int calc_degraded(struct r5conf *conf) +int raid5_calc_degraded(struct r5conf *conf) { int degraded, degraded2; int i; @@ -619,7 +619,7 @@ static int has_failed(struct r5conf *conf) if (conf->mddev->reshape_position == MaxSector) return conf->mddev->degraded > conf->max_degraded; - degraded = calc_degraded(conf); + degraded = raid5_calc_degraded(conf); if (degraded > conf->max_degraded) return 1; return 0; @@ -2555,7 +2555,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) spin_lock_irqsave(&conf->device_lock, flags); clear_bit(In_sync, &rdev->flags); - mddev->degraded = calc_degraded(conf); + mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irqrestore(&conf->device_lock, flags); set_bit(MD_RECOVERY_INTR, &mddev->recovery); @@ -2569,6 +2569,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) bdevname(rdev->bdev, b), mdname(mddev), conf->raid_disks - mddev->degraded); + r5c_update_on_rdev_error(mddev); } /* @@ -7091,7 +7092,7 @@ static int raid5_run(struct mddev *mddev) /* * 0 for a fully functional array, 1 or 2 for a degraded array. */ - mddev->degraded = calc_degraded(conf); + mddev->degraded = raid5_calc_degraded(conf); if (has_failed(conf)) { pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n", @@ -7338,7 +7339,7 @@ static int raid5_spare_active(struct mddev *mddev) } } spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded = calc_degraded(conf); + mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irqrestore(&conf->device_lock, flags); print_raid5_conf(conf); return count; @@ -7698,7 +7699,7 @@ static int raid5_start_reshape(struct mddev *mddev) * pre and post number of devices. */ spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded = calc_degraded(conf); + mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irqrestore(&conf->device_lock, flags); } mddev->raid_disks = conf->raid_disks; @@ -7786,7 +7787,7 @@ static void raid5_finish_reshape(struct mddev *mddev) } else { int d; spin_lock_irq(&conf->device_lock); - mddev->degraded = calc_degraded(conf); + mddev->degraded = raid5_calc_degraded(conf); spin_unlock_irq(&conf->device_lock); for (d = conf->raid_disks ; d < conf->raid_disks - mddev->delta_disks; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 461df197d157..1440fa26e296 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -758,6 +758,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, extern struct stripe_head * raid5_get_active_stripe(struct r5conf *conf, sector_t sector, int previous, int noblock, int noquiesce); +extern int raid5_calc_degraded(struct r5conf *conf); extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev); extern void r5l_exit_log(struct r5l_log *log); extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh); @@ -786,4 +787,5 @@ extern void r5c_flush_cache(struct r5conf *conf, int num); extern void r5c_check_stripe_cache_usage(struct r5conf *conf); extern void r5c_check_cached_full_stripe(struct r5conf *conf); extern struct md_sysfs_entry r5c_journal_mode; +extern void r5c_update_on_rdev_error(struct mddev *mddev); #endif -- cgit From 0fb44559ffd67de8517098b81f675fa0210f13f0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 23 Jan 2017 11:17:35 -0800 Subject: af_unix: move unix_mknod() out of bindlock Dmitry reported a deadlock scenario: unix_bind() path: u->bindlock ==> sb_writer do_splice() path: sb_writer ==> pipe->mutex ==> u->bindlock In the unix_bind() code path, unix_mknod() does not have to be done with u->bindlock held, since it is a pure fs operation, so we can just move unix_mknod() out. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Rainer Weikusat Cc: Al Viro Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/unix/af_unix.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 127656ebe7be..cef79873b09d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; + struct path path = { NULL, NULL }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; + if (sun_path[0]) { + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + goto out; + } + } + err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out; + goto out_put; err = -EINVAL; if (u->addr) @@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - struct path path; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; - } addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); @@ -1065,6 +1067,9 @@ out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock); +out_put: + if (err) + path_put(&path); out: return err; } -- cgit From d0fa28f00052391b5df328f502fbbdd4444938b7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 23 Jan 2017 21:37:52 +0200 Subject: virtio_net: fix PAGE_SIZE > 64k I don't have any guests with PAGE_SIZE > 64k but the code seems to be clearly broken in that case as PAGE_SIZE / MERGEABLE_BUFFER_ALIGN will need more than 8 bit and so the code in mergeable_ctx_to_buf_address does not give us the actual true size. Cc: John Fastabend Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 347424351ade..3d1519ea0669 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,8 +48,16 @@ module_param(gso, bool, 0444); */ DECLARE_EWMA(pkt_len, 1, 64) +/* With mergeable buffers we align buffer address and use the low bits to + * encode its true size. Buffer size is up to 1 page so we need to align to + * square root of page size to ensure we reserve enough bits to encode the true + * size. + */ +#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2) + /* Minimum alignment for mergeable packet buffers. */ -#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) +#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \ + 1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT) #define VIRTNET_DRIVER_VERSION "1.0.0" -- cgit From 21b995a9cb093fff33ec91d7cb3822b882a90a1e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2017 16:43:05 -0800 Subject: ip6_tunnel: must reload ipv6h in ip6ip6_tnl_xmit() Since ip6_tnl_parse_tlv_enc_lim() can call pskb_may_pull(), we must reload any pointer that was related to skb->head (or skb->data), or risk use after free. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Eric Dumazet Cc: Dmitry Kozlov Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ net/ipv6/ip6_tunnel.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 75b6108234dd..558631860d91 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -582,6 +582,9 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); + if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 753d6d0860fb..02923f956ac8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1303,6 +1303,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowlabel = key->label; } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; -- cgit From fbfa743a9d2a0ffa24251764f10afc13eb21e739 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2017 16:43:06 -0800 Subject: ipv6: fix ip6_tnl_parse_tlv_enc_lim() This function suffers from multiple issues. First one is that pskb_may_pull() may reallocate skb->head, so the 'raw' pointer needs either to be reloaded or not used at all. Second issue is that NEXTHDR_DEST handling does not validate that the options are present in skb->data, so we might read garbage or access non existent memory. With help from Willem de Bruijn. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 02923f956ac8..ff8ee06491c3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -400,18 +400,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; - __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof(*ipv6h); + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { - __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof(*hdr) > skb->data && - !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; - hdr = (struct ipv6_opt_hdr *) (raw + off); + hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) @@ -422,20 +423,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } else { optlen = ipv6_optlen(hdr); } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { - __u16 i = off + 2; + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ - if (i + sizeof (*tel) > off + optlen) + if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; + tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) - return i; + return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; @@ -443,7 +453,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = hdr->nexthdr; + nexthdr = next; off += optlen; } return 0; -- cgit From 8b3f9337e17aaf710c79e65fd0a3c572a075f498 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 23 Jan 2017 20:44:32 -0800 Subject: vxlan: don't flush static fdb entries on admin down This patch skips flushing static fdb entries in ndo_stop, but flushes all fdb entries during vxlan device delete. This is consistent with the bridge driver fdb Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 8a79cfcac889..746a819f0a41 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2354,7 +2354,7 @@ static int vxlan_open(struct net_device *dev) } /* Purge the forwarding table */ -static void vxlan_flush(struct vxlan_dev *vxlan) +static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) { unsigned int h; @@ -2364,6 +2364,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan) hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { struct vxlan_fdb *f = container_of(p, struct vxlan_fdb, hlist); + if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP))) + continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ if (!is_zero_ether_addr(f->eth_addr)) vxlan_fdb_destroy(vxlan, f); @@ -2385,7 +2387,7 @@ static int vxlan_stop(struct net_device *dev) del_timer_sync(&vxlan->age_timer); - vxlan_flush(vxlan); + vxlan_flush(vxlan, false); vxlan_sock_release(vxlan); return ret; @@ -3058,6 +3060,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + vxlan_flush(vxlan, true); + spin_lock(&vn->sock_lock); if (!hlist_unhashed(&vxlan->hlist)) hlist_del_rcu(&vxlan->hlist); -- cgit From efb5f68f32995c146944a9d4257c3cf8eae2c4a1 Mon Sep 17 00:00:00 2001 From: Balakrishnan Raman Date: Mon, 23 Jan 2017 20:44:33 -0800 Subject: vxlan: do not age static remote mac entries Mac aging is applicable only for dynamically learnt remote mac entries. Check for user configured static remote mac entries and skip aging. Signed-off-by: Balakrishnan Raman Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 746a819f0a41..50b62db213b0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2268,7 +2268,7 @@ static void vxlan_cleanup(unsigned long arg) = container_of(p, struct vxlan_fdb, hlist); unsigned long timeout; - if (f->state & NUD_PERMANENT) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) continue; timeout = f->used + vxlan->cfg.age_interval * HZ; -- cgit From 23d28a859fb847fd7fcfbd31acb3b160abb5d6ae Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 24 Jan 2017 07:28:41 +0100 Subject: ibmveth: Add a proper check for the availability of the checksum features When using the ibmveth driver in a KVM/QEMU based VM, it currently always prints out a scary error message like this when it is started: ibmveth 71000003 (unregistered net_device): unable to change checksum offload settings. 1 rc=-2 ret_attr=71000003 This happens because the driver always tries to enable the checksum offloading without checking for the availability of this feature first. QEMU does not support checksum offloading for the spapr-vlan device, thus we always get the error message here. According to the LoPAPR specification, the "ibm,illan-options" property of the corresponding device tree node should be checked first to see whether the H_ILLAN_ATTRIUBTES hypercall and thus the checksum offloading feature is available. Thus let's do this in the ibmveth driver, too, so that the error message is really only limited to cases where something goes wrong, and does not occur if the feature is just missing. Signed-off-by: Thomas Huth Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmveth.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index a831f947ca8c..309f5c66083c 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1601,8 +1601,11 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + netdev->hw_features = NETIF_F_SG; + if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM; + } netdev->features |= netdev->hw_features; -- cgit From 83d230eb5c638949350f4761acdfc0af5cb1bc00 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 23 Jan 2017 19:43:00 -0800 Subject: xfs: verify dirblocklog correctly sb_dirblklog is added to sb_blocklog to compute the directory block size in bytes. Therefore, we must compare the sum of both those values against XFS_MAX_BLOCKSIZE_LOG, not just dirblklog. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || -- cgit From 5b9f57516337b523f7466a53939aaaea7b78141b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Tue, 24 Jan 2017 10:45:38 +0100 Subject: qmi_wwan/cdc_ether: add device ID for HP lt2523 (Novatel E371) WWAN card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Another rebranded Novatel E371. qmi_wwan should drive this device, while cdc_ether should ignore it. Even though the USB descriptors are plain CDC-ETHER that USB interface is a QMI interface. Ref commit 7fdb7846c9ca ("qmi_wwan/cdc_ether: add device IDs for Dell 5804 (Novatel E371) WWAN card") Cc: Dan Williams Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/qmi_wwan.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index fe7b2886cb6b..86144f9a80ee 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -531,6 +531,7 @@ static const struct driver_info wwan_info = { #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 +#define HP_VENDOR_ID 0x03f0 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -677,6 +678,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* HP lt2523 (Novatel E371) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6fe1cdb0174f..24d5272cdce5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -654,6 +654,13 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* HP lt2523 (Novatel E371) */ + USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, { /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, -- cgit From f1db5c101cd48b5555ed9e061dcc49ed329812ea Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 24 Jan 2017 14:34:22 +0100 Subject: alx: fix wrong condition to free descriptor memory The condition to free the descriptor memory is wrong, we want to free the memory if it is set and not if it is unset. Invert the test to fix this issue. Fixes: b0999223f224b ("alx: add ability to allocate and free alx_napi structures") Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c8f525574d68..765306bd78c2 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -703,7 +703,7 @@ static void alx_free_rings(struct alx_priv *alx) if (alx->qnapi[0] && alx->qnapi[0]->rxq) kfree(alx->qnapi[0]->rxq->bufs); - if (!alx->descmem.virt) + if (alx->descmem.virt) dma_free_coherent(&alx->hw.pdev->dev, alx->descmem.size, alx->descmem.virt, -- cgit From 37187a016c37d7e550544544dba25399ce4589c9 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 24 Jan 2017 14:34:23 +0100 Subject: alx: fix fallback to msi or legacy interrupts If requesting msi-x interrupts fails we should fall back to msi or legacy interrupts. However alx_realloc_ressources don't call alx_init_intr, so we fail to set the right number of tx queues. This results in watchdog timeouts and a nonfunctional adapter. Fixes: d768319cd427 ("alx: enable multiple tx queues") Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 765306bd78c2..75cbd46e429d 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -984,6 +984,7 @@ static int alx_realloc_resources(struct alx_priv *alx) alx_free_rings(alx); alx_free_napis(alx); alx_disable_advanced_intr(alx); + alx_init_intr(alx, false); err = alx_alloc_napis(alx); if (err) -- cgit From 185aceefd80f98dc5b9d73eb6cbb70739a5ce4ea Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Tue, 24 Jan 2017 14:34:24 +0100 Subject: alx: work around hardware bug in interrupt fallback path If requesting msi-x interrupts fails in alx_request_irq we fall back to a single tx queue and msi or legacy interrupts. Currently the adapter stops working in this case and we get tx watchdog timeouts. For reasons unknown the adapter gets confused when we load the dma adresses to the chip in alx_init_ring_ptrs twice: the first time with multiple queues and the second time in the fallback case with a single queue. To fix this move the the call to alx_reinit_rings (which calls alx_init_ring_ptrs) after alx_request_irq. At this time it is clear how much tx queues we have and which dma addresses we use. Fixes: d768319cd427 ("alx: enable multiple tx queues") Signed-off-by: Tobias Regnery Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 75cbd46e429d..7dcc907a449d 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -685,8 +685,6 @@ static int alx_alloc_rings(struct alx_priv *alx) return -ENOMEM; } - alx_reinit_rings(alx); - return 0; } @@ -1242,6 +1240,12 @@ static int __alx_open(struct alx_priv *alx, bool resume) if (err) goto out_free_rings; + /* must be called after alx_request_irq because the chip stops working + * if we copy the dma addresses in alx_init_ring_ptrs twice when + * requesting msi-x interrupts failed + */ + alx_reinit_rings(alx); + netif_set_real_num_tx_queues(alx->dev, alx->num_txq); netif_set_real_num_rx_queues(alx->dev, alx->num_rxq); -- cgit From c929ea0b910355e1876c64431f3d5802f95b3d75 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 20 Jan 2017 16:48:39 +0800 Subject: SUNRPC: cleanup ida information when removing sunrpc module After removing sunrpc module, I get many kmemleak information as, unreferenced object 0xffff88003316b1e0 (size 544): comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc+0x15e/0x1f0 [] ida_pre_get+0xaa/0x150 [] ida_simple_get+0xad/0x180 [] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd] [] lockd+0x4d/0x270 [lockd] [] param_set_timeout+0x55/0x100 [lockd] [] svc_defer+0x114/0x3f0 [sunrpc] [] svc_defer+0x2d7/0x3f0 [sunrpc] [] rpc_show_info+0x8a/0x110 [sunrpc] [] proc_reg_write+0x7f/0xc0 [] __vfs_write+0xdf/0x3c0 [] vfs_write+0xef/0x240 [] SyS_write+0xad/0x130 [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [] 0xffffffffffffffff I found, the ida information (dynamic memory) isn't cleanup. Signed-off-by: Kinglong Mee Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt") Cc: stable@vger.kernel.org # v3.12+ Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 5 +++++ net/sunrpc/sunrpc_syms.c | 1 + 3 files changed, 7 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 85cc819676e8..333ad11b3dd9 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_cleanup_clids(void); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 1efbe48e794f..1dc9f3bac099 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -336,6 +336,11 @@ out: static DEFINE_IDA(rpc_clids); +void rpc_cleanup_clids(void) +{ + ida_destroy(&rpc_clids); +} + static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index d1c330a7953a..c73de181467a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -119,6 +119,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpc_cleanup_clids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); -- cgit From d3f4aadd614c4627244452ad64eaf351179f2c31 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Dec 2016 08:40:57 +0200 Subject: RDMA/core: Add the function ib_mtu_int_to_enum As the functionality to convert the MTU from a number to enum_ib_mtu is ubiquitous, define a dedicated function and remove the duplicated code. Signed-off-by: Ram Amrani Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 11 +---------- drivers/infiniband/hw/cxgb4/provider.c | 11 +---------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 11 +---------- drivers/infiniband/hw/nes/nes_verbs.c | 12 +----------- include/rdma/ib_verbs.h | 14 ++++++++++++++ 5 files changed, 18 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 9d5fe1853da4..6262dc035f3c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1135,16 +1135,7 @@ static int iwch_query_port(struct ib_device *ibdev, memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - if (netdev->mtu >= 4096) - props->active_mtu = IB_MTU_4096; - else if (netdev->mtu >= 2048) - props->active_mtu = IB_MTU_2048; - else if (netdev->mtu >= 1024) - props->active_mtu = IB_MTU_1024; - else if (netdev->mtu >= 512) - props->active_mtu = IB_MTU_512; - else - props->active_mtu = IB_MTU_256; + props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); if (!netif_carrier_ok(netdev)) props->state = IB_PORT_DOWN; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index fa64f5d93b11..3345e1c312f7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -373,16 +373,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - if (netdev->mtu >= 4096) - props->active_mtu = IB_MTU_4096; - else if (netdev->mtu >= 2048) - props->active_mtu = IB_MTU_2048; - else if (netdev->mtu >= 1024) - props->active_mtu = IB_MTU_1024; - else if (netdev->mtu >= 512) - props->active_mtu = IB_MTU_512; - else - props->active_mtu = IB_MTU_256; + props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); if (!netif_carrier_ok(netdev)) props->state = IB_PORT_DOWN; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 29e97df9e1a7..4c000d60d5c6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -100,16 +100,7 @@ static int i40iw_query_port(struct ib_device *ibdev, memset(props, 0, sizeof(*props)); props->max_mtu = IB_MTU_4096; - if (netdev->mtu >= 4096) - props->active_mtu = IB_MTU_4096; - else if (netdev->mtu >= 2048) - props->active_mtu = IB_MTU_2048; - else if (netdev->mtu >= 1024) - props->active_mtu = IB_MTU_1024; - else if (netdev->mtu >= 512) - props->active_mtu = IB_MTU_512; - else - props->active_mtu = IB_MTU_256; + props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); props->lid = 1; if (netif_carrier_ok(iwdev->netdev)) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index aff9fb14768b..5a31f3c6a421 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -478,17 +478,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr memset(props, 0, sizeof(*props)); props->max_mtu = IB_MTU_4096; - - if (netdev->mtu >= 4096) - props->active_mtu = IB_MTU_4096; - else if (netdev->mtu >= 2048) - props->active_mtu = IB_MTU_2048; - else if (netdev->mtu >= 1024) - props->active_mtu = IB_MTU_1024; - else if (netdev->mtu >= 512) - props->active_mtu = IB_MTU_512; - else - props->active_mtu = IB_MTU_256; + props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); props->lid = 1; props->lmc = 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 958a24d8fae7..b567e4452a47 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -352,6 +352,20 @@ static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) } } +static inline enum ib_mtu ib_mtu_int_to_enum(int mtu) +{ + if (mtu >= 4096) + return IB_MTU_4096; + else if (mtu >= 2048) + return IB_MTU_2048; + else if (mtu >= 1024) + return IB_MTU_1024; + else if (mtu >= 512) + return IB_MTU_512; + else + return IB_MTU_256; +} + enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, -- cgit From 097b615965fb1af714fbc2311f68839b1086ebcb Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Dec 2016 08:40:58 +0200 Subject: RDMA/qedr: Fix MTU returned from QP query MTU value returned from QP query should include overhead. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 57c8de208077..84dcd73bd57b 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2016,7 +2016,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->qp_state = qedr_get_ibqp_state(params.state); qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); - qp_attr->path_mtu = iboe_get_mtu(params.mtu); + qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); qp_attr->path_mig_state = IB_MIG_MIGRATED; qp_attr->rq_psn = params.rq_psn; qp_attr->sq_psn = params.sq_psn; -- cgit From 20f5e10ef8bcf29a915642245b66e5a132e38fc4 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Tue, 24 Jan 2017 12:01:31 +0200 Subject: RDMA/qedr: Add uapi header qedr-abi.h Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- include/uapi/rdma/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 82bdf5626859..bb68cb1b04ed 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -16,3 +16,4 @@ header-y += nes-abi.h header-y += ocrdma-abi.h header-y += hns-abi.h header-y += vmw_pvrdma-abi.h +header-y += qedr-abi.h -- cgit From 865cea40b69741c3da2574176876463233b2b67c Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:34 +0200 Subject: RDMA/qedr: Return success when not changing QP state If the user is requesting us to change the QP state to the same state that it is already in, return success instead of failure. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 84dcd73bd57b..27d90a82d731 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1657,7 +1657,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, int status = 0; if (new_state == qp->state) - return 1; + return 0; switch (qp->state) { case QED_ROCE_QP_STATE_RESET: -- cgit From 59e8970b3798e4cbe575ed9cf4d53098760a2a86 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:35 +0200 Subject: RDMA/qedr: Return max inline data in QP query result Return the maximum supported amount of inline data, not the qp's current configured inline data size, when filling out the results of a query qp call. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 27d90a82d731..bb907b14c988 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2028,7 +2028,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_wr; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = qp->max_inline_data; + qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; qp_init_attr->cap = qp_attr->cap; memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], ¶ms.dgid.bytes[0], -- cgit From 91bff997db2ec04f9ba761a55c21642f9803b06c Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:36 +0200 Subject: RDMA/qedr: Remove CQ spinlock from CM completion handlers There is only a single event queue that triggers the completion events for the RDMA CM and it is being processed serially. This means that inherently there can no parallelism of CQ completion handler callbacks, hence the lock is redundant. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr.h | 3 --- drivers/infiniband/hw/qedr/qedr_cm.c | 10 ++-------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 620badd7d4fb..94319abb0df2 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -251,9 +251,6 @@ struct qedr_cq { u16 icid; - /* Lock to protect completion handler */ - spinlock_t comp_handler_lock; - /* Lock to protect multiplem CQ's */ spinlock_t cq_lock; u8 arm_flags; diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 63890ebb72bd..00361f310d15 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -87,11 +87,8 @@ void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt) qedr_inc_sw_gsi_cons(&qp->sq); spin_unlock_irqrestore(&qp->q_lock, flags); - if (cq->ibcq.comp_handler) { - spin_lock_irqsave(&cq->comp_handler_lock, flags); + if (cq->ibcq.comp_handler) (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); - spin_unlock_irqrestore(&cq->comp_handler_lock, flags); - } } void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt, @@ -113,11 +110,8 @@ void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt, spin_unlock_irqrestore(&qp->q_lock, flags); - if (cq->ibcq.comp_handler) { - spin_lock_irqsave(&cq->comp_handler_lock, flags); + if (cq->ibcq.comp_handler) (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); - spin_unlock_irqrestore(&cq->comp_handler_lock, flags); - } } static void qedr_destroy_gsi_cq(struct qedr_dev *dev, -- cgit From c78c31496111f497b4a03f955c100091185da8b6 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:37 +0200 Subject: RDMA/qedr: Don't spam dmesg if QP is in error state It is normal to flush CQEs if the QP is in error state. Hence there's no use in printing a message per CQE to dmesg. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index bb907b14c988..0688dce54f92 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -3234,9 +3234,10 @@ static int qedr_poll_cq_req(struct qedr_dev *dev, IB_WC_SUCCESS, 0); break; case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: - DP_ERR(dev, - "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", - cq->icid, qp->icid); + if (qp->state != QED_ROCE_QP_STATE_ERR) + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, IB_WC_WR_FLUSH_ERR, 1); break; -- cgit From 933e6dcaa0f65eb2f624ad760274020874a1f35e Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:50:38 +0200 Subject: RDMA/qedr: Don't reset QP when queues aren't flushed Fail QP state transition from error to reset if SQ/RQ are not empty and still in the process of flushing out the queued work entries. Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 0688dce54f92..3dae9641f821 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1733,6 +1733,14 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* ERR->XXX */ switch (new_state) { case QED_ROCE_QP_STATE_RESET: + if ((qp->rq.prod != qp->rq.cons) || + (qp->sq.prod != qp->sq.cons)) { + DP_NOTICE(dev, + "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n", + qp->rq.prod, qp->rq.cons, qp->sq.prod, + qp->sq.cons); + status = -EINVAL; + } break; default: status = -EINVAL; -- cgit From 27a4b1a6d6fcf09314359bacefa1e106927ae21b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:39 +0200 Subject: RDMA/qedr: Mark three functions as static mark qedr_get_state_from_ibqp(), __qedr_alloc_mr() and __qedr_post_send() as static since they are only used in the same file. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3dae9641f821..1b9d55965479 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1600,7 +1600,7 @@ err0: return ERR_PTR(-EFAULT); } -enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) +static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) { switch (qp_state) { case QED_ROCE_QP_STATE_RESET: @@ -1621,7 +1621,8 @@ enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) return IB_QPS_ERR; } -enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state) +static enum qed_roce_qp_state qedr_get_state_from_ibqp( + enum ib_qp_state qp_state) { switch (qp_state) { case IB_QPS_RESET: @@ -2310,7 +2311,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr) return rc; } -struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len) +static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, + int max_page_list_len) { struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_dev *dev = get_qedr_dev(ibpd->device); @@ -2712,7 +2714,7 @@ static int qedr_prepare_reg(struct qedr_qp *qp, return 0; } -enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) +static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -2737,7 +2739,7 @@ enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) } } -inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) +static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) { int wq_is_full, err_wr, pbl_is_full; struct qedr_dev *dev = qp->dev; @@ -2774,7 +2776,7 @@ inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) return true; } -int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, +static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); -- cgit From 1a59075197976611bacaa383a6673f9e57e9e98b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:40 +0200 Subject: RDMA/qedr: Fix formatting Remove standalone ';'. List function's parameters in a single line. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 3 +-- drivers/infiniband/hw/qedr/verbs.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 7b74d09a8217..eac0bfc1fc99 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -576,8 +576,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev) return 0; } -void qedr_unaffiliated_event(void *context, - u8 event_code) +void qedr_unaffiliated_event(void *context, u8 event_code) { pr_err("unaffiliated event not implemented yet\n"); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 1b9d55965479..c4f4c2131a45 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1874,7 +1874,6 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n", qp_params.remote_mac_addr); -; qp_params.mtu = qp->mtu; qp_params.lb_indication = false; -- cgit From af2b14b8b8ae21b0047a52c767ac8b44f435a280 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:41 +0200 Subject: RDMA/qedr: Fix RDMA CM loopback The loopback logic in RDMA CM packets compares Ethernet addresses and was accidently inverse. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 00361f310d15..a9a8d8745d2e 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -398,9 +398,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, } if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) - packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; - else packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; + else + packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; packet->roce_mode = roce_mode; memcpy(packet->header.vaddr, ud_header_buffer, header_size); -- cgit From 9c1e0228ab35e52d30abf4b5629c28350833fbcb Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:42 +0200 Subject: RDMA/qedr: Fix and simplify memory leak in PD alloc Free the PD if no internal resources were available. Move userspace code under the relevant 'if'. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index c4f4c2131a45..c7d6c9a783bd 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -471,8 +471,6 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibdev); - struct qedr_ucontext *uctx = NULL; - struct qedr_alloc_pd_uresp uresp; struct qedr_pd *pd; u16 pd_id; int rc; @@ -489,21 +487,33 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, if (!pd) return ERR_PTR(-ENOMEM); - dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); + rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); + if (rc) + goto err; - uresp.pd_id = pd_id; pd->pd_id = pd_id; if (udata && context) { + struct qedr_alloc_pd_uresp uresp; + + uresp.pd_id = pd_id; + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); - if (rc) + if (rc) { DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); - uctx = get_qedr_ucontext(context); - uctx->pd = pd; - pd->uctx = uctx; + dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); + goto err; + } + + pd->uctx = get_qedr_ucontext(context); + pd->uctx->pd = pd; } return &pd->ibpd; + +err: + kfree(pd); + return ERR_PTR(rc); } int qedr_dealloc_pd(struct ib_pd *ibpd) -- cgit From f449c7a2d822c2d81b5bcb2c50eec80796766726 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 24 Jan 2017 13:51:43 +0200 Subject: RDMA/qedr: Dispatch port active event from qedr_add Relying on qede to trigger qedr on startup is problematic. When probing both if qedr loads slowly then qede can assume qedr is missing and not trigger it. This patch adds a triggering from qedr and protects against a race via an atomic bit. Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 20 ++++++++++++++------ drivers/infiniband/hw/qedr/qedr.h | 5 +++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index eac0bfc1fc99..3ac8aa5ef37d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -791,6 +791,9 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, if (device_create_file(&dev->ibdev.dev, qedr_attributes[i])) goto sysfs_err; + if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state)) + qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE); + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n"); return dev; @@ -823,11 +826,10 @@ static void qedr_remove(struct qedr_dev *dev) ib_dealloc_device(&dev->ibdev); } -static int qedr_close(struct qedr_dev *dev) +static void qedr_close(struct qedr_dev *dev) { - qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); - - return 0; + if (test_and_clear_bit(QEDR_ENET_STATE_BIT, &dev->enet_state)) + qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ERR); } static void qedr_shutdown(struct qedr_dev *dev) @@ -836,6 +838,12 @@ static void qedr_shutdown(struct qedr_dev *dev) qedr_remove(dev); } +static void qedr_open(struct qedr_dev *dev) +{ + if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state)) + qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE); +} + static void qedr_mac_address_change(struct qedr_dev *dev) { union ib_gid *sgid = &dev->sgid_tbl[0]; @@ -862,7 +870,7 @@ static void qedr_mac_address_change(struct qedr_dev *dev) ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr); - qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE); + qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_GID_CHANGE); if (rc) DP_ERR(dev, "Error updating mac filter\n"); @@ -876,7 +884,7 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event) { switch (event) { case QEDE_UP: - qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + qedr_open(dev); break; case QEDE_DOWN: qedr_close(dev); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 94319abb0df2..bb32e4792ec9 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -113,6 +113,8 @@ struct qedr_device_attr { struct qed_rdma_events events; }; +#define QEDR_ENET_STATE_BIT (0) + struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -153,6 +155,8 @@ struct qedr_dev { struct qedr_cq *gsi_sqcq; struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; + + unsigned long enet_state; }; #define QEDR_MAX_SQ_PBL (0x8000) @@ -188,6 +192,7 @@ struct qedr_dev { #define QEDR_ROCE_MAX_CNQ_SIZE (0x4000) #define QEDR_MAX_PORT (1) +#define QEDR_PORT (1) #define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) -- cgit From 828f6fa65ce7e80f77f5ab12942e44eb3d9d174e Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Thu, 5 Jan 2017 15:00:05 +0800 Subject: IB/umem: Release pid in error and ODP flow 1. Release pid before enter odp flow 2. Release pid when fail to allocate memory Fixes: 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Fixes: 8ada2c1c0c1d ("IB/core: Add support for on demand paging regions") Signed-off-by: Kenneth Lee Reviewed-by: Haggai Eran Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 1e62a5f0cb28..4609b921f899 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -134,6 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { + put_pid(umem->pid); ret = ib_umem_odp_get(context, umem); if (ret) { kfree(umem); @@ -149,6 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { + put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } -- cgit From 5ce6b04ce96896e8a79e6f60740ced911eaac7a4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 22 Jan 2017 22:10:32 +0800 Subject: netfilter: nft_log: restrict the log prefix length to 127 First, log prefix will be truncated to NF_LOG_PREFIXLEN-1, i.e. 127, at nf_log_packet(), so the extra part is useless. Second, after adding a log rule with a very very long prefix, we will fail to dump the nft rules after this _special_ one, but acctually, they do exist. For example: # name_65000=$(printf "%0.sQ" {1..65000}) # nft add rule filter output log prefix "$name_65000" # nft add rule filter output counter # nft add rule filter output counter # nft list chain filter output table ip filter { chain output { type filter hook output priority 0; policy accept; } } So now, restrict the log prefix length to NF_LOG_PREFIXLEN-1. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_log.h | 2 ++ net/netfilter/nf_log.c | 1 - net/netfilter/nft_log.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h index 8be21e02387d..d0b5fa91ff54 100644 --- a/include/uapi/linux/netfilter/nf_log.h +++ b/include/uapi/linux/netfilter/nf_log.h @@ -9,4 +9,6 @@ #define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ #define NF_LOG_MASK 0x2f +#define NF_LOG_PREFIXLEN 128 + #endif /* _NETFILTER_NF_LOG_H */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3dca90dc24ad..ffb9e8ada899 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -13,7 +13,6 @@ /* Internal logging interface, which relies on the real LOG target modules */ -#define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 6271e40a3dd6..6f6e64423643 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -39,7 +39,8 @@ static void nft_log_eval(const struct nft_expr *expr, static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_GROUP] = { .type = NLA_U16 }, - [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING, + .len = NF_LOG_PREFIXLEN - 1 }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, -- cgit From 35d0ac9070ef619e3bf44324375878a1c540387b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Jan 2017 00:51:32 +0100 Subject: netfilter: nf_tables: fix set->nelems counting with no NLM_F_EXCL If the element exists and no NLM_F_EXCL is specified, do not bump set->nelems, otherwise we leak one set element slot. This problem amplifies if the set is full since the abort path always decrements the counter for the -ENFILE case too, giving one spare extra slot. Fix this by moving set->nelems update to nft_add_set_elem() after successful element insertion. Moreover, remove the element if the set is full so there is no need to rely on the abort path to undo things anymore. Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b84c7b25219b..831a9a16f563 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3745,10 +3745,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err5; } + if (set->size && + !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) { + err = -ENFILE; + goto err6; + } + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; +err6: + set->ops->remove(set, &elem); err5: kfree(trans); err4: @@ -3795,15 +3803,9 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - if (set->size && - !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) - return -ENFILE; - err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); - if (err < 0) { - atomic_dec(&set->nelems); + if (err < 0) break; - } } return err; } -- cgit From de70185de0333783154863278ac87bfbbc54e384 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Jan 2017 00:51:41 +0100 Subject: netfilter: nf_tables: deconstify walk callback function The flush operation needs to modify set and element objects, so let's deconstify this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 +++--- net/netfilter/nf_tables_api.c | 24 ++++++++++++------------ net/netfilter/nft_set_hash.c | 2 +- net/netfilter/nft_set_rbtree.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 924325c46aab..7dfdb517f0be 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -207,9 +207,9 @@ struct nft_set_iter { unsigned int skip; int err; int (*fn)(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, const struct nft_set_iter *iter, - const struct nft_set_elem *elem); + struct nft_set_elem *elem); }; /** @@ -301,7 +301,7 @@ struct nft_set_ops { void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, struct nft_set_iter *iter); unsigned int (*privsize)(const struct nlattr * const nla[]); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 831a9a16f563..5bd0068320fb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3087,9 +3087,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, } static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, const struct nft_set_iter *iter, - const struct nft_set_elem *elem) + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; @@ -3308,9 +3308,9 @@ struct nft_set_dump_args { }; static int nf_tables_dump_setelem(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, const struct nft_set_iter *iter, - const struct nft_set_elem *elem) + struct nft_set_elem *elem) { struct nft_set_dump_args *args; @@ -3322,7 +3322,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); u8 genmask = nft_genmask_cur(net); - const struct nft_set *set; + struct nft_set *set; struct nft_set_dump_args args; struct nft_ctx ctx; struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; @@ -3890,9 +3890,9 @@ err1: } static int nft_flush_set(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, const struct nft_set_iter *iter, - const struct nft_set_elem *elem) + struct nft_set_elem *elem) { struct nft_trans *trans; int err; @@ -3907,8 +3907,8 @@ static int nft_flush_set(const struct nft_ctx *ctx, goto err1; } - nft_trans_elem_set(trans) = (struct nft_set *)set; - nft_trans_elem(trans) = *((struct nft_set_elem *)elem); + nft_trans_elem_set(trans) = set; + nft_trans_elem(trans) = *elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -5019,9 +5019,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, const struct nft_chain *chain); static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, const struct nft_set_iter *iter, - const struct nft_set_elem *elem) + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); const struct nft_data *data; @@ -5045,7 +5045,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, { const struct nft_rule *rule; const struct nft_expr *expr, *last; - const struct nft_set *set; + struct nft_set *set; struct nft_set_binding *binding; struct nft_set_iter iter; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 1e20e2bbb6d9..e36069fb76ae 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -212,7 +212,7 @@ static void nft_hash_remove(const struct nft_set *set, rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); } -static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, +static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_hash *priv = nft_set_priv(set); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 08376e50f6cd..f06f55ee516d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -221,7 +221,7 @@ static void *nft_rbtree_deactivate(const struct net *net, } static void nft_rbtree_walk(const struct nft_ctx *ctx, - const struct nft_set *set, + struct nft_set *set, struct nft_set_iter *iter) { const struct nft_rbtree *priv = nft_set_priv(set); -- cgit From b2c11e4b9536ebab6b39929e1fe15f57039ab445 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Jan 2017 00:51:48 +0100 Subject: netfilter: nf_tables: bump set->ndeact on set flush Add missing set->ndeact update on each deactivated element from the set flush path. Otherwise, sets with fixed size break after flush since accounting breaks. # nft add set x y { type ipv4_addr\; size 2\; } # nft add element x y { 1.1.1.1 } # nft add element x y { 1.1.1.2 } # nft flush set x y # nft add element x y { 1.1.1.1 } :1:1-28: Error: Could not process rule: Too many open files in system Fixes: 8411b6442e59 ("netfilter: nf_tables: support for set flushing") Reported-by: Elise Lennion Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5bd0068320fb..1b913760f205 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3906,6 +3906,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, err = -ENOENT; goto err1; } + set->ndeact++; nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; -- cgit From 93f955aad4bacee5acebad141d1a03cd51f27b4e Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:43 +0100 Subject: tipc: fix nametbl_lock soft lockup at node/link events We trigger a soft lockup as we grab nametbl_lock twice if the node has a pending node up/down or link up/down event while: - we process an incoming named message in tipc_named_rcv() and perform an tipc_update_nametbl(). - we have pending backlog items in the name distributor queue during a nametable update using tipc_nametbl_publish() or tipc_nametbl_withdraw(). The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_node_subscribe()/unsubscribe() tipc_node_write_unlock() << lockup occurs if an outstanding node/link event exits, as we grabs nametbl_lock again >> tipc_nametbl_withdraw() Grab nametbl_lock tipc_named_process_backlog() tipc_update_nametbl() << rest as above >> The function tipc_node_write_unlock(), in addition to releasing the lock processes the outstanding node/link up/down events. To do this, we need to grab the nametbl_lock again leading to the lockup. In this commit we fix the soft lockup by introducing a fast variant of node_unlock(), where we just release the lock. We adapt the node_subscribe()/node_unsubscribe() to use the fast variants. Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/node.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9d2f4c2b08ab..27753325e06e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n) write_lock_bh(&n->lock); } +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(&n->lock); +} + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } @@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } -- cgit From d094c4d5f5c7e1b225e94227ca3f007be3adc4e8 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:44 +0100 Subject: tipc: add subscription refcount to avoid invalid delete Until now, the subscribers keep track of the subscriptions using reference count at subscriber level. At subscription cancel or subscriber delete, we delete the subscription only if the timer was pending for the subscription. This approach is incorrect as: 1. del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 is scheduled, it deletes an invalid subscription. 2. We export tipc_subscrp_report_overlap(), which accesses the subscription pointer multiple times. Meanwhile the subscription timer can expire thereby freeing the subscription and we might continue to access the subscription pointer leading to memory violations. In this commit, we introduce subscription refcount to avoid deleting an invalid subscription. Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/subscr.c | 124 ++++++++++++++++++++++++++++++------------------------ net/tipc/subscr.h | 1 + 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..9d94e65d0894 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,8 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); +static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; + tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); + tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; - struct tipc_subscriber *subscriber = sub->subscriber; /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(&subscriber->lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(&subscriber->lock); - - tipc_subscrb_put(subscriber); + tipc_subscrp_put(sub); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(&subscriber->kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub = container_of(kref, + struct tipc_subscription, + kref); + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + atomic_dec(&tn->subscription_count); + spin_unlock_bh(&subscriber->lock); + kfree(sub); + tipc_subscrb_put(subscriber); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(&subscription->kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(&subscription->kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct list_head *subscription_list = &subscriber->subscrp_list; + struct tipc_subscription *sub, *temp; + + spin_lock_bh(&subscriber->lock); + list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { + if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) + continue; + + tipc_subscrp_get(sub); + spin_unlock_bh(&subscriber->lock); + tipc_subscrp_delete(sub); + tipc_subscrp_put(sub); + spin_lock_bh(&subscriber->lock); + + if (s) + break; + } + spin_unlock_bh(&subscriber->lock); +} + static struct tipc_subscriber *tipc_subscrb_create(int conid) { struct tipc_subscriber *subscriber; @@ -177,8 +226,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) pr_warn("Subscriber rejected, no memory\n"); return NULL; } - kref_init(&subscriber->kref); INIT_LIST_HEAD(&subscriber->subscrp_list); + kref_init(&subscriber->kref); subscriber->conid = conid; spin_lock_init(&subscriber->lock); @@ -187,55 +236,22 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { - struct tipc_subscription *sub, *temp; - u32 timeout; - - spin_lock_bh(&subscriber->lock); - /* Destroy any existing subscriptions for subscriber */ - list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, - subscrp_list) { - timeout = htohl(sub->evt.s.timeout, sub->swap); - if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) { - tipc_subscrp_delete(sub); - tipc_subscrb_put(subscriber); - } - } - spin_unlock_bh(&subscriber->lock); - + tipc_subscrb_subscrp_delete(subscriber, NULL); tipc_subscrb_put(subscriber); } static void tipc_subscrp_delete(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - kfree(sub); - atomic_dec(&tn->subscription_count); + if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) + tipc_subscrp_put(sub); } static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { - struct tipc_subscription *sub, *temp; - u32 timeout; - - spin_lock_bh(&subscriber->lock); - /* Find first matching subscription, exit if not found */ - list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, - subscrp_list) { - if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - timeout = htohl(sub->evt.s.timeout, sub->swap); - if ((timeout == TIPC_WAIT_FOREVER) || - del_timer(&sub->timer)) { - tipc_subscrp_delete(sub); - tipc_subscrb_put(subscriber); - } - break; - } - } - spin_unlock_bh(&subscriber->lock); + tipc_subscrb_subscrp_delete(subscriber, s); } static struct tipc_subscription *tipc_subscrp_create(struct net *net, @@ -272,6 +288,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); + kref_init(&sub->kref); return sub; } @@ -288,17 +305,16 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); - tipc_subscrb_get(subscriber); sub->subscriber = subscriber; tipc_nametbl_subscribe(sub); + tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); + setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); timeout = htohl(sub->evt.s.timeout, swap); - if (timeout == TIPC_WAIT_FOREVER) - return; - setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); - mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); + if (timeout != TIPC_WAIT_FOREVER) + mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); } /* Handle one termination request for the subscriber */ diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index be60103082c9..ffdc214c117a 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -57,6 +57,7 @@ struct tipc_subscriber; * @evt: template for events generated by subscription */ struct tipc_subscription { + struct kref kref; struct tipc_subscriber *subscriber; struct net *net; struct timer_list timer; -- cgit From fc0adfc8fd18b61b6f7a3f28b429e134d6f3a008 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:45 +0100 Subject: tipc: fix connection refcount error Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. We have another refcount warning we grab the refcount in tipc_conn_lookup() for connections with flag with CF_CONNECTED not set. This usually occurs at shutdown when the we stop the topology server and withdraw TIPC_CFG_SRV publication thereby triggering a withdraw message to subscribers. In this commit, we: 1. remove the connection from the server list at recount cleanup. 2. grab the refcount for a connection only if CF_CONNECTED is set. Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/server.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..2e803601aa99 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); } tipc_clean_outqueues(con); @@ -128,8 +134,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); - if (con) + if (con && test_bit(CF_CONNECTED, &con->flags)) conn_get(con); + else + con = NULL; spin_unlock_bh(&s->idr_lock); return con; } @@ -198,15 +206,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- cgit From 9dc3abdd1f7ea524e8552e0a3ef01219892ed1f4 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:46 +0100 Subject: tipc: fix nametbl_lock soft lockup at module exit Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/server.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2e803601aa99..826cde2c401e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- cgit From 4c887aa65d38633885010277f3482400681be719 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:47 +0100 Subject: tipc: ignore requests when the connection state is not CONNECTED In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Acked-by: Ying Xue Acked-by: Jon Maloy Tested-by: John Thompson Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/server.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 826cde2c401e..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -453,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, &con->flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -466,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) { - if (!queue_work(s->send_wq, &con->swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - } return 0; } @@ -495,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(&con->outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, &con->flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == &con->outqueue) -- cgit From 35e22e49a5d6a741ebe7f2dd280b2052c3003ef7 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 24 Jan 2017 13:00:48 +0100 Subject: tipc: fix cleanup at module unload In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue Acked-by: Jon Maloy Tested-by: John Thompson Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 04ff441b8065..3cd6402e812c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -619,14 +619,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(&s->idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); if (con) { - total++; spin_unlock_bh(&s->idr_lock); tipc_close_conn(con); spin_lock_bh(&s->idr_lock); -- cgit From f39f775218a7520e3700de2003c84a042c3b5972 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Jan 2017 15:25:58 +0200 Subject: IB/rxe: Fix rxe dev insertion to rxe_dev_list The first argument of list_add_tail is the new item and the second is the head of the list. Fix the code to pass arguments in the right order, otherwise not all the rxe devices will be removed during teardown. Fixes: 8700e3e7c4857 ('Soft RoCE driver') Signed-off-by: Maor Gottlieb Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 342e78163613..4abdeb359fb4 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -555,7 +555,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) } spin_lock_bh(&dev_list_lock); - list_add_tail(&rxe_dev_list, &rxe->list); + list_add_tail(&rxe->list, &rxe_dev_list); spin_unlock_bh(&dev_list_lock); return rxe; } -- cgit From 2d4b21e0a2913612274a69a3ba1bfee4cffc6e77 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Thu, 19 Jan 2017 15:25:59 +0200 Subject: IB/rxe: Prevent from completer to operate on non valid QP On UD QP completer tasklet is scheduled for each packet sent. If it is followed by a destroy_qp(), the kernel panic will happen as the completer tries to operate on a destroyed QP. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 486d576e55bc..44b2108253bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -813,8 +813,7 @@ void rxe_qp_destroy(struct rxe_qp *qp) del_timer_sync(&qp->rnr_nak_timer); rxe_cleanup_task(&qp->req.task); - if (qp_type(qp) == IB_QPT_RC) - rxe_cleanup_task(&qp->comp.task); + rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ __rxe_do_task(&qp->req.task); -- cgit From 88ff7334f25909802140e690c0e16433e485b0a0 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 24 Jan 2017 16:26:47 +0000 Subject: net: Specify the owning module for lwtunnel ops Modules implementing lwtunnel ops should not be allowed to unload while there is state alive using those ops, so specify the owning module for all lwtunnel ops. Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 2 ++ net/core/lwt_bpf.c | 1 + net/ipv4/ip_tunnel_core.c | 2 ++ net/ipv6/ila/ila_lwt.c | 1 + net/ipv6/seg6_iptunnel.c | 1 + net/mpls/mpls_iptunnel.c | 1 + 6 files changed, 8 insertions(+) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 0b585f1fd340..73dd87647460 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -44,6 +44,8 @@ struct lwtunnel_encap_ops { int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); + + struct module *owner; }; #ifdef CONFIG_LWTUNNEL diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 71bb3e2eca08..b3eef90b2df9 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -386,6 +386,7 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = { .fill_encap = bpf_fill_encap_info, .get_encap_size = bpf_encap_nlsize, .cmp_encap = bpf_encap_cmp, + .owner = THIS_MODULE, }; static int __init bpf_lwt_init(void) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fed3d29f9eb3..0fd1976ab63b 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { @@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index a7bc54ab46e2..13b5e85fe0d5 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -238,6 +238,7 @@ static const struct lwtunnel_encap_ops ila_encap_ops = { .fill_encap = ila_fill_encap_info, .get_encap_size = ila_encap_nlsize, .cmp_encap = ila_encap_cmp, + .owner = THIS_MODULE, }; int ila_lwt_init(void) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 1d60cb132835..c46f8cbf5ab5 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -422,6 +422,7 @@ static const struct lwtunnel_encap_ops seg6_iptun_ops = { .fill_encap = seg6_fill_encap_info, .get_encap_size = seg6_encap_nlsize, .cmp_encap = seg6_encap_cmp, + .owner = THIS_MODULE, }; int __init seg6_iptunnel_init(void) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 2f7ccd934416..1d281c1ff7c1 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -215,6 +215,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = { .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, + .owner = THIS_MODULE, }; static int __init mpls_iptunnel_init(void) -- cgit From 85c814016ce3b371016c2c054a905fa2492f5a65 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 24 Jan 2017 16:26:48 +0000 Subject: lwtunnel: Fix oops on state free after encap module unload When attempting to free lwtunnel state after the module for the encap has been unloaded an oops occurs: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: lwtstate_free+0x18/0x40 [..] task: ffff88003e372380 task.stack: ffffc900001fc000 RIP: 0010:lwtstate_free+0x18/0x40 RSP: 0018:ffff88003fd83e88 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88002bbb3380 RCX: ffff88000c91a300 [..] Call Trace: free_fib_info_rcu+0x195/0x1a0 ? rt_fibinfo_free+0x50/0x50 rcu_process_callbacks+0x2d3/0x850 ? rcu_process_callbacks+0x296/0x850 __do_softirq+0xe4/0x4cb irq_exit+0xb0/0xc0 smp_apic_timer_interrupt+0x3d/0x50 apic_timer_interrupt+0x93/0xa0 [..] Code: e8 6e c6 fc ff 89 d8 5b 5d c3 bb de ff ff ff eb f4 66 90 66 66 66 66 90 55 48 89 e5 53 0f b7 07 48 89 fb 48 8b 04 c5 00 81 d5 81 <48> 8b 40 08 48 85 c0 74 13 ff d0 48 8d 7b 20 be 20 00 00 00 e8 The problem is after the module for the encap can be unloaded the corresponding ops is removed and is thus NULL here. Modules implementing lwtunnel ops should not be allowed to unload while there is state alive using those ops, so grab the module reference for the ops on creating lwtunnel state and of course release the reference when freeing the state. Fixes: 1104d9ba443a ("lwtunnel: Add destroy state operation") Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/core/lwtunnel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 47b1dd65947b..c23465005f2f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -115,8 +115,11 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); - if (likely(ops && ops->build_state)) + if (likely(ops && ops->build_state && try_module_get(ops->owner))) { ret = ops->build_state(dev, encap, family, cfg, lws); + if (ret) + module_put(ops->owner); + } rcu_read_unlock(); return ret; @@ -194,6 +197,7 @@ void lwtstate_free(struct lwtunnel_state *lws) } else { kfree(lws); } + module_put(ops->owner); } EXPORT_SYMBOL(lwtstate_free); -- cgit From 27873de99f2fecca0f6b257316489ef2a1d86ffd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Jan 2017 08:34:45 -0800 Subject: scsi: qla2xxx: Fix a recently introduced memory leak qla2x00_probe_one() allocates IRQs before it initializes rsp_q_map so IRQs must be freed even if rsp_q_map allocation did not occur. This was detected by kmemleak. Fixes: 4fa183455988 ("scsi: qla2xxx: Utilize pci_alloc_irq_vectors/pci_free_irq_vectors calls") Signed-off-by: Bart Van Assche Cc: Michael Hernandez Cc: Himanshu Madhani Cc: Christoph Hellwig Cc: Reviewed-by: Christoph Hellwig Acked-By: Himanshu Madhani Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 5815403d1d65..3116bf390b06 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3237,7 +3237,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha) * from a probe failure context. */ if (!ha->rsp_q_map || !ha->rsp_q_map[0]) - return; + goto free_irqs; rsp = ha->rsp_q_map[0]; if (ha->flags.msix_enabled) { @@ -3257,6 +3257,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha) free_irq(pci_irq_vector(ha->pdev, 0), rsp); } +free_irqs: pci_free_irq_vectors(ha->pdev); } -- cgit From 78f824d4312a8944f5340c6b161bba3bf2c81096 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 21 Jun 2016 14:24:33 +0530 Subject: ARCv2: smp-boot: wake_flag polling by non-Masters needs to be uncached This is needed on HS38 cores, for setting up IO-Coherency aperture properly The polling could perturb the caches and coherecy fabric which could be wrong in the small window when Master is setting up IOC aperture etc in arc_cache_init() We do it only for ARCv2 based builds to not affect EZChip ARCompact based platform. Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 44a0d21ed342..2afbafadb6ab 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -90,10 +90,23 @@ void __init smp_cpus_done(unsigned int max_cpus) */ static volatile int wake_flag; +#ifdef CONFIG_ISA_ARCOMPACT + +#define __boot_read(f) f +#define __boot_write(f, v) f = v + +#else + +#define __boot_read(f) arc_read_uncached_32(&f) +#define __boot_write(f, v) arc_write_uncached_32(&f, v) + +#endif + static void arc_default_smp_cpu_kick(int cpu, unsigned long pc) { BUG_ON(cpu == 0); - wake_flag = cpu; + + __boot_write(wake_flag, cpu); } void arc_platform_smp_wait_to_boot(int cpu) @@ -102,10 +115,10 @@ void arc_platform_smp_wait_to_boot(int cpu) if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET)) return; - while (wake_flag != cpu) + while (__boot_read(wake_flag) != cpu) ; - wake_flag = 0; + __boot_write(wake_flag, 0); } const char *arc_platform_smp_cpuinfo(void) -- cgit From 0516ffd88fa0d006ee80389ce14a9ca5ae45e845 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 19 Jan 2017 10:43:53 +0000 Subject: vhost/vsock: handle vhost_vq_init_access() error Propagate the error when vhost_vq_init_access() fails and set vq->private_data to NULL. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index bbbf588540ed..ce5e63d2c66a 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -373,6 +373,7 @@ static void vhost_vsock_handle_rx_kick(struct vhost_work *work) static int vhost_vsock_start(struct vhost_vsock *vsock) { + struct vhost_virtqueue *vq; size_t i; int ret; @@ -383,19 +384,20 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) goto err; for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { - struct vhost_virtqueue *vq = &vsock->vqs[i]; + vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); if (!vhost_vq_access_ok(vq)) { ret = -EFAULT; - mutex_unlock(&vq->mutex); goto err_vq; } if (!vq->private_data) { vq->private_data = vsock; - vhost_vq_init_access(vq); + ret = vhost_vq_init_access(vq); + if (ret) + goto err_vq; } mutex_unlock(&vq->mutex); @@ -405,8 +407,11 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) return 0; err_vq: + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { - struct vhost_virtqueue *vq = &vsock->vqs[i]; + vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); vq->private_data = NULL; -- cgit From f7f6634d23830ff74335734fbdb28ea109c1f349 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 10 Jan 2017 17:51:17 +0000 Subject: virtio_mmio: Set DMA masks appropriately Once DMA API usage is enabled, it becomes apparent that virtio-mmio is inadvertently relying on the default 32-bit DMA mask, which leads to problems like rapidly exhausting SWIOTLB bounce buffers. Ensure that we set the appropriate 64-bit DMA mask whenever possible, with the coherent mask suitably limited for the legacy vring as per a0be1db4304f ("virtio_pci: Limit DMA mask to 44 bits for legacy virtio devices"). Cc: Andy Lutomirski Cc: Michael S. Tsirkin Reported-by: Jean-Philippe Brucker Fixes: b42111382f0e ("virtio_mmio: Use the DMA API if enabled") Signed-off-by: Robin Murphy Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index d47a2fcef818..c71fde5fe835 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -59,6 +59,7 @@ #define pr_fmt(fmt) "virtio-mmio: " fmt #include +#include #include #include #include @@ -498,6 +499,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) struct virtio_mmio_device *vm_dev; struct resource *mem; unsigned long magic; + int rc; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem) @@ -547,9 +549,25 @@ static int virtio_mmio_probe(struct platform_device *pdev) } vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); - if (vm_dev->version == 1) + if (vm_dev->version == 1) { writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + /* + * In the legacy case, ensure our coherently-allocated virtio + * ring will be at an address expressable as a 32-bit PFN. + */ + if (!rc) + dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32 + PAGE_SHIFT)); + } else { + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + } + if (rc) + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) + dev_warn(&pdev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + platform_set_drvdata(pdev, vm_dev); return register_virtio_device(&vm_dev->vdev); -- cgit From c7070619f3408d9a0dffbed9149e6f00479cf43b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 20 Jan 2017 10:33:32 +0000 Subject: vring: Force use of DMA API for ARM-based systems with legacy devices Booting Linux on an ARM fastmodel containing an SMMU emulation results in an unexpected I/O page fault from the legacy virtio-blk PCI device: [ 1.211721] arm-smmu-v3 2b400000.smmu: event 0x10 received: [ 1.211800] arm-smmu-v3 2b400000.smmu: 0x00000000fffff010 [ 1.211880] arm-smmu-v3 2b400000.smmu: 0x0000020800000000 [ 1.211959] arm-smmu-v3 2b400000.smmu: 0x00000008fa081002 [ 1.212075] arm-smmu-v3 2b400000.smmu: 0x0000000000000000 [ 1.212155] arm-smmu-v3 2b400000.smmu: event 0x10 received: [ 1.212234] arm-smmu-v3 2b400000.smmu: 0x00000000fffff010 [ 1.212314] arm-smmu-v3 2b400000.smmu: 0x0000020800000000 [ 1.212394] arm-smmu-v3 2b400000.smmu: 0x00000008fa081000 [ 1.212471] arm-smmu-v3 2b400000.smmu: 0x0000000000000000 This is because the legacy virtio-blk device is behind an SMMU, so we have consequently swizzled its DMA ops and configured the SMMU to translate accesses. This then requires the vring code to use the DMA API to establish translations, otherwise all transactions will result in fatal faults and termination. Given that ARM-based systems only see an SMMU if one is really present (the topology is all described by firmware tables such as device-tree or IORT), then we can safely use the DMA API for all legacy virtio devices. Modern devices can advertise the prescense of an IOMMU using the VIRTIO_F_IOMMU_PLATFORM feature flag. Cc: Andy Lutomirski Cc: Michael S. Tsirkin Cc: Fixes: 876945dbf649 ("arm64: Hook up IOMMU dma_ops") Signed-off-by: Will Deacon Signed-off-by: Michael S. Tsirkin Acked-by: Marc Zyngier --- drivers/virtio/virtio_ring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 409aeaa49246..7e38ed79c3fc 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -159,6 +159,13 @@ static bool vring_use_dma_api(struct virtio_device *vdev) if (xen_domain()) return true; + /* + * On ARM-based machines, the DMA ops will do the right thing, + * so always use them with legacy devices. + */ + if (IS_ENABLED(CONFIG_ARM) || IS_ENABLED(CONFIG_ARM64)) + return !virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + return false; } -- cgit From 8a1f780e7f28c7c1d640118242cf68d528c456cd Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 24 Jan 2017 15:17:45 -0800 Subject: memory_hotplug: make zone_can_shift() return a boolean value online_{kernel|movable} is used to change the memory zone to ZONE_{NORMAL|MOVABLE} and online the memory. To check that memory zone can be changed, zone_can_shift() is used. Currently the function returns minus integer value, plus integer value and 0. When the function returns minus or plus integer value, it means that the memory zone can be changed to ZONE_{NORNAL|MOVABLE}. But when the function returns 0, there are two meanings. One of the meanings is that the memory zone does not need to be changed. For example, when memory is in ZONE_NORMAL and onlined by online_kernel the memory zone does not need to be changed. Another meaning is that the memory zone cannot be changed. When memory is in ZONE_NORMAL and onlined by online_movable, the memory zone may not be changed to ZONE_MOVALBE due to memory online limitation(see Documentation/memory-hotplug.txt). In this case, memory must not be onlined. The patch changes the return type of zone_can_shift() so that memory online operation fails when memory zone cannot be changed as follows: Before applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 8388608 managed 8388608 online_movable operation succeeded. But memory is onlined as ZONE_NORMAL, not ZONE_MOVABLE. After applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state bash: echo: write error: Invalid argument # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 online_movable operation failed because of failure of changing the memory zone from ZONE_NORMAL to ZONE_MOVABLE Fixes: df429ac03936 ("memory-hotplug: more general validation of zone during online") Link: http://lkml.kernel.org/r/2f9c3837-33d7-b6e5-59c0-6ca4372b2d84@gmail.com Signed-off-by: Yasuaki Ishimatsu Reviewed-by: Reza Arbab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 4 ++-- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 28 +++++++++++++++++----------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8ab8ea1253e6..dacb6a8418aa 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -408,14 +408,14 @@ static ssize_t show_valid_zones(struct device *dev, sprintf(buf, "%s", zone->name); /* MMOP_ONLINE_KERNEL */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL); + zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); } /* MMOP_ONLINE_MOVABLE */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE); + zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01033fadea47..c1784c0b4f35 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target); +extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e43142c15631..ca2723d47338 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg) node_set_state(node, N_MEMORY); } -int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target) +bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift) { struct zone *zone = page_zone(pfn_to_page(pfn)); enum zone_type idx = zone_idx(zone); int i; + *zone_shift = 0; + if (idx < target) { /* pages must be at end of current zone */ if (pfn + nr_pages != zone_end_pfn(zone)) - return 0; + return false; /* no zones in use between current zone and target */ for (i = idx + 1; i < target; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } if (target < idx) { /* pages must be at beginning of current zone */ if (pfn != zone->zone_start_pfn) - return 0; + return false; /* no zones in use between current zone and target */ for (i = target + 1; i < idx; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } - return target - idx; + *zone_shift = target - idx; + return true; } /* Must be protected by mem_hotplug_begin() */ @@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ !can_online_high_movable(zone)) return -EINVAL; - if (online_type == MMOP_ONLINE_KERNEL) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL); - else if (online_type == MMOP_ONLINE_MOVABLE) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE); + if (online_type == MMOP_ONLINE_KERNEL) { + if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) + return -EINVAL; + } else if (online_type == MMOP_ONLINE_MOVABLE) { + if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) + return -EINVAL; + } zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); if (!zone) -- cgit From 8310d48b125d19fcd9521d83b8293e63eb1646aa Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 24 Jan 2017 15:17:48 -0800 Subject: mm/huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp In commit 19be0eaffa3a ("mm: remove gup_flags FOLL_WRITE games from __get_user_pages()"), the mm code was changed from unsetting FOLL_WRITE after a COW was resolved to setting the (newly introduced) FOLL_COW instead. Simultaneously, the check in gup.c was updated to still allow writes with FOLL_FORCE set if FOLL_COW had also been set. However, a similar check in huge_memory.c was forgotten. As a result, remote memory writes to ro regions of memory backed by transparent huge pages cause an infinite loop in the kernel (handle_mm_fault sets FOLL_COW and returns 0 causing a retry, but follow_trans_huge_pmd bails out immidiately because `(flags & FOLL_WRITE) && !pmd_write(*pmd)` is true. While in this state the process is stil SIGKILLable, but little else works (e.g. no ptrace attach, no other signals). This is easily reproduced with the following code (assuming thp are set to always): #include #include #include #include #include #include #include #include #include #include #define TEST_SIZE 5 * 1024 * 1024 int main(void) { int status; pid_t child; int fd = open("/proc/self/mem", O_RDWR); void *addr = mmap(NULL, TEST_SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr != MAP_FAILED); pid_t parent_pid = getpid(); if ((child = fork()) == 0) { void *addr2 = mmap(NULL, TEST_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr2 != MAP_FAILED); memset(addr2, 'a', TEST_SIZE); pwrite(fd, addr2, TEST_SIZE, (uintptr_t)addr); return 0; } assert(child == waitpid(child, &status, 0)); assert(WIFEXITED(status) && WEXITSTATUS(status) == 0); return 0; } Fix this by updating follow_trans_huge_pmd in huge_memory.c analogously to the update in gup.c in the original commit. The same pattern exists in follow_devmap_pmd. However, we should not be able to reach that check with FOLL_COW set, so add WARN_ONCE to make sure we notice if we ever do. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170106015025.GA38411@juliacomputing.com Signed-off-by: Keno Fischer Acked-by: Kirill A. Shutemov Cc: Greg Thelen Cc: Nicholas Piggin Cc: Willy Tarreau Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Michal Hocko Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9a6bd6c8d55a..5f3ad65c85de 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); + /* + * When we COW a devmap PMD entry, we split it into PTEs, so we should + * not be in this function with `flags & FOLL_COW` set. + */ + WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); + if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; @@ -1128,6 +1134,16 @@ out_unlock: return ret; } +/* + * FOLL_FORCE can write to even unwritable pmd's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) +{ + return pmd_write(pmd) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); +} + struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, assert_spin_locked(pmd_lockptr(mm, pmd)); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) goto out; /* Avoid dumping huge zero page */ -- cgit From 6affb9d7b137fc93d86c926a5587e77b8bc64255 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 24 Jan 2017 15:17:51 -0800 Subject: dax: fix build warnings with FS_DAX and !FS_IOMAP As reported by Arnd: https://lkml.org/lkml/2017/1/10/756 Compiling with the following configuration: # CONFIG_EXT2_FS is not set # CONFIG_EXT4_FS is not set # CONFIG_XFS_FS is not set # CONFIG_FS_IOMAP depends on the above filesystems, as is not set CONFIG_FS_DAX=y generates build warnings about unused functions in fs/dax.c: fs/dax.c:878:12: warning: `dax_insert_mapping' defined but not used [-Wunused-function] static int dax_insert_mapping(struct address_space *mapping, ^~~~~~~~~~~~~~~~~~ fs/dax.c:572:12: warning: `copy_user_dax' defined but not used [-Wunused-function] static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, ^~~~~~~~~~~~~ fs/dax.c:542:12: warning: `dax_load_hole' defined but not used [-Wunused-function] static int dax_load_hole(struct address_space *mapping, void **entry, ^~~~~~~~~~~~~ fs/dax.c:312:14: warning: `grab_mapping_entry' defined but not used [-Wunused-function] static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, ^~~~~~~~~~~~~~~~~~ Now that the struct buffer_head based DAX fault paths and I/O path have been removed we really depend on iomap support being present for DAX. Make this explicit by selecting FS_IOMAP if we compile in DAX support. This allows us to remove conditional selections of FS_IOMAP when FS_DAX was present for ext2 and ext4, and to remove an #ifdef in fs/dax.c. Link: http://lkml.kernel.org/r/1484087383-29478-1-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 1 + fs/dax.c | 2 -- fs/ext2/Kconfig | 1 - fs/ext4/Kconfig | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index c2a377cdda2b..83eab52fb3f6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -38,6 +38,7 @@ config FS_DAX bool "Direct Access (DAX) support" depends on MMU depends on !(ARM || MIPS || SPARC) + select FS_IOMAP help Direct Access (DAX) can be used on memory-backed block devices. If the block device supports DAX and the filesystem supports DAX, diff --git a/fs/dax.c b/fs/dax.c index ddcddfeaa03b..3af2da5e64ce 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(__dax_zero_page_range); -#ifdef CONFIG_FS_IOMAP static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) { return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); @@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); #endif /* CONFIG_FS_DAX_PMD */ -#endif /* CONFIG_FS_IOMAP */ diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 36bea5adcaba..c634874e12d9 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig @@ -1,6 +1,5 @@ config EXT2_FS tristate "Second extended fs support" - select FS_IOMAP if FS_DAX help Ext2 is a standard Linux file system for hard disks. diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 7b90691e98c4..e38039fd96ff 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,7 +37,6 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C - select FS_IOMAP if FS_DAX help This is the next generation of the ext3 filesystem. -- cgit From b94f51183b0617e7b9b4fb4137d4cf1cab7547c2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 24 Jan 2017 15:17:53 -0800 Subject: kernel/watchdog: prevent false hardlockup on overloaded system On an overloaded system, it is possible that a change in the watchdog threshold can be delayed long enough to trigger a false positive. This can easily be achieved by having a cpu spinning indefinitely on a task, while another cpu updates watchdog threshold. What happens is while trying to park the watchdog threads, the hrtimers on the other cpus trigger and reprogram themselves with the new slower watchdog threshold. Meanwhile, the nmi watchdog is still programmed with the old faster threshold. Because the one cpu is blocked, it prevents the thread parking on the other cpus from completing, which is needed to shutdown the nmi watchdog and reprogram it correctly. As a result, a false positive from the nmi watchdog is reported. Fix this by setting a park_in_progress flag to block all lockups until the parking is complete. Fix provided by Ulrich Obergfell. [akpm@linux-foundation.org: s/park_in_progress/watchdog_park_in_progress/] Link: http://lkml.kernel.org/r/1481041033-192236-1-git-send-email-dzickus@redhat.com Signed-off-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 1 + kernel/watchdog.c | 9 +++++++++ kernel/watchdog_hld.c | 3 +++ 3 files changed, 13 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aacca824a6ae..0a3fadc32693 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -110,6 +110,7 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern unsigned long *watchdog_cpumask_bits; +extern atomic_t watchdog_park_in_progress; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d4b0fa01cae3..63177be0159e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) +atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); + /* * The 'watchdog_running' variable is set to 1 when the watchdog threads * are registered/started and is set to 0 when the watchdog threads are @@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; + if (atomic_read(&watchdog_park_in_progress) != 0) + return HRTIMER_NORESTART; + /* kick the hardlockup detector */ watchdog_interrupt_count(); @@ -467,12 +472,16 @@ static int watchdog_park_threads(void) { int cpu, ret = 0; + atomic_set(&watchdog_park_in_progress, 1); + for_each_watchdog_cpu(cpu) { ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); if (ret) break; } + atomic_set(&watchdog_park_in_progress, 0); + return ret; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 84016c8aee6b..12b8dd640786 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; + if (atomic_read(&watchdog_park_in_progress) != 0) + return; + if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; -- cgit From de182cc8e882f74af2a112e09f148ce646937232 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Jan 2017 15:17:56 -0800 Subject: drivers/memstick/core/memstick.c: avoid -Wnonnull warning gcc-7 produces a harmless false-postive warning about a possible NULL pointer access: drivers/memstick/core/memstick.c: In function 'h_memstick_read_dev_id': drivers/memstick/core/memstick.c:309:3: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy(mrq->data, buf, mrq->data_len); This can't happen because the caller sets the command to 'MS_TPC_READ_REG', which causes the data direction to be 'READ' and the NULL pointer not accessed. As a simple workaround for the warning, we can pass a pointer to the data that we actually want to read into. This is not needed here, but also harmless, and lets the compiler know that the access is ok. Link: http://lkml.kernel.org/r/20170111144143.548867-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Alex Dubov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/core/memstick.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a0547dbf9806..76382c858c35 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -330,7 +330,7 @@ static int h_memstick_read_dev_id(struct memstick_dev *card, struct ms_id_register id_reg; if (!(*mrq)) { - memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL, + memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg, sizeof(struct ms_id_register)); *mrq = &card->current_mrq; return 0; -- cgit From 15a77c6fe494f4b1757d30cd137fe66ab06a38c3 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 24 Jan 2017 15:17:59 -0800 Subject: userfaultfd: fix SIGBUS resulting from false rwsem wakeups With >=32 CPUs the userfaultfd selftest triggered a graceful but unexpected SIGBUS because VM_FAULT_RETRY was returned by handle_userfault() despite the UFFDIO_COPY wasn't completed. This seems caused by rwsem waking the thread blocked in handle_userfault() and we can't run up_read() before the wait_event sequence is complete. Keeping the wait_even sequence identical to the first one, would require running userfaultfd_must_wait() again to know if the loop should be repeated, and it would also require retaking the rwsem and revalidating the whole vma status. It seems simpler to wait the targeted wakeup so that if false wakeups materialize we still wait for our specific wakeup event, unless of course there are signals or the uffd was released. Debug code collecting the stack trace of the wakeup showed this: $ ./userfaultfd 100 99999 nr_pages: 25600, nr_pages_per_cpu: 800 bounces: 99998, mode: racing ver poll, userfaults: 32 35 90 232 30 138 69 82 34 30 139 40 40 31 20 19 43 13 15 28 27 38 21 43 56 22 1 17 31 8 4 2 bounces: 99997, mode: rnd ver poll, Bus error (core dumped) save_stack_trace+0x2b/0x50 try_to_wake_up+0x2a6/0x580 wake_up_q+0x32/0x70 rwsem_wake+0xe0/0x120 call_rwsem_wake+0x1b/0x30 up_write+0x3b/0x40 vm_mmap_pgoff+0x9c/0xc0 SyS_mmap_pgoff+0x1a9/0x240 SyS_mmap+0x22/0x30 entry_SYSCALL_64_fastpath+0x1f/0xbd 0xffffffffffffffff FAULT_FLAG_ALLOW_RETRY missing 70 CPU: 24 PID: 1054 Comm: userfaultfd Tainted: G W 4.8.0+ #30 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xb8/0x112 handle_userfault+0x572/0x650 handle_mm_fault+0x12cb/0x1520 __do_page_fault+0x175/0x500 trace_do_page_fault+0x61/0x270 do_async_page_fault+0x19/0x90 async_page_fault+0x25/0x30 This always happens when the main userfault selftest thread is running clone() while glibc runs either mprotect or mmap (both taking mmap_sem down_write()) to allocate the thread stack of the background threads, while locking/userfault threads already run at full throttle and are susceptible to false wakeups that may cause handle_userfault() to return before than expected (which results in graceful SIGBUS at the next attempt). This was reproduced only with >=32 CPUs because the loop to start the thread where clone() is too quick with fewer CPUs, while with 32 CPUs there's already significant activity on ~32 locking and userfault threads when the last background threads are started with clone(). This >=32 CPUs SMP race condition is likely reproducible only with the selftest because of the much heavier userfault load it generates if compared to real apps. We'll have to allow "one more" VM_FAULT_RETRY for the WP support and a patch floating around that provides it also hidden this problem but in reality only is successfully at hiding the problem. False wakeups could still happen again the second time handle_userfault() is invoked, even if it's a so rare race condition that getting false wakeups twice in a row is impossible to reproduce. This full fix is needed for correctness, the only alternative would be to allow VM_FAULT_RETRY to be returned infinitely. With this fix the WP support can stick to a strict "one more" VM_FAULT_RETRY logic (no need of returning it infinite times to avoid the SIGBUS). Link: http://lkml.kernel.org/r/20170111005535.13832-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Shubham Kumar Sharma Tested-by: Mike Kravetz Acked-by: Hillf Danton Cc: Michael Rapoport Cc: "Dr. David Alan Gilbert" Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d96e2f30084b..43953e03c356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -63,6 +63,7 @@ struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_t wq; struct userfaultfd_ctx *ctx; + bool waken; }; struct userfaultfd_wake_range { @@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; + WRITE_ONCE(uwq->waken, true); + /* + * The implicit smp_mb__before_spinlock in try_to_wake_up() + * renders uwq->waken visible to other CPUs before the task is + * waken. + */ ret = wake_up_state(wq->private, mode); if (ret) /* @@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) struct userfaultfd_wait_queue uwq; int ret; bool must_wait, return_to_userland; + long blocking_state; BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); @@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) uwq.wq.private = current; uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); uwq.ctx = ctx; + uwq.waken = false; return_to_userland = (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); + blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : + TASK_KILLABLE; spin_lock(&ctx->fault_pending_wqh.lock); /* @@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ - set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : - TASK_KILLABLE); + set_current_state(blocking_state); spin_unlock(&ctx->fault_pending_wqh.lock); must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) wake_up_poll(&ctx->fd_wqh, POLLIN); schedule(); ret |= VM_FAULT_MAJOR; + + /* + * False wakeups can orginate even from rwsem before + * up_read() however userfaults will wait either for a + * targeted wakeup on the specific uwq waitqueue from + * wake_userfault() or for signals or for uffd + * release. + */ + while (!READ_ONCE(uwq.waken)) { + /* + * This needs the full smp_store_mb() + * guarantee as the state write must be + * visible to other CPUs before reading + * uwq.waken from other CPUs. + */ + set_current_state(blocking_state); + if (READ_ONCE(uwq.waken) || + READ_ONCE(ctx->released) || + (return_to_userland ? signal_pending(current) : + fatal_signal_pending(current))) + break; + schedule(); + } } __set_current_state(TASK_RUNNING); -- cgit From aa2efd5ea4041754da4046c3d2e7edaac9526258 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 24 Jan 2017 15:18:02 -0800 Subject: mm/slub.c: trace free objects at KERN_INFO Currently when trace is enabled (e.g. slub_debug=T,kmalloc-128 ) the trace messages are mostly output at KERN_INFO. However the trace code also calls print_section() to hexdump the head of a free object. This is hard coded to use KERN_ERR, meaning the console is deluged with trace messages even if we've asked for quiet. Fix this the obvious way but adding a level parameter to print_section(), allowing calls from the trace code to use the same trace level as other trace messages. Link: http://lkml.kernel.org/r/20170113154850.518-1-daniel.thompson@linaro.org Signed-off-by: Daniel Thompson Acked-by: Christoph Lameter Acked-by: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 067598a00849..7aa6f433f4de 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; } -static void print_section(char *text, u8 *addr, unsigned int length) +static void print_section(char *level, char *text, u8 *addr, + unsigned int length) { metadata_access_enable(); - print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, length, 1); metadata_access_disable(); } @@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) p, p - addr, get_freepointer(s, p)); if (s->flags & SLAB_RED_ZONE) - print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); + print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, + s->red_left_pad); else if (p > addr + 16) - print_section("Bytes b4 ", p - 16, 16); + print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); - print_section("Object ", p, min_t(unsigned long, s->object_size, - PAGE_SIZE)); + print_section(KERN_ERR, "Object ", p, + min_t(unsigned long, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section("Redzone ", p + s->object_size, + print_section(KERN_ERR, "Redzone ", p + s->object_size, s->inuse - s->object_size); if (s->offset) @@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section("Padding ", p + off, size_from_object(s) - off); + print_section(KERN_ERR, "Padding ", p + off, + size_from_object(s) - off); dump_stack(); } @@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) end--; slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); - print_section("Padding ", end - remainder, remainder); + print_section(KERN_ERR, "Padding ", end - remainder, remainder); restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; @@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, page->freelist); if (!alloc) - print_section("Object ", (void *)object, + print_section(KERN_INFO, "Object ", (void *)object, s->object_size); dump_stack(); -- cgit From 424f6c4818bbf1b8ccf58aa012ecc19c0bb9b446 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 24 Jan 2017 15:18:05 -0800 Subject: mm: alloc_contig: re-allow CMA to compact FS pages Commit 73e64c51afc5 ("mm, compaction: allow compaction for GFP_NOFS requests") changed compation to skip FS pages if not explicitly allowed to touch them, but missed to update the CMA compact_control. This leads to a very high isolation failure rate, crippling performance of CMA even on a lightly loaded system. Re-allow CMA to compact FS pages by setting the correct GFP flags, restoring CMA behavior and performance to the kernel 4.9 level. Fixes: 73e64c51afc5 (mm, compaction: allow compaction for GFP_NOFS requests) Link: http://lkml.kernel.org/r/20170113115155.24335-1-l.stach@pengutronix.de Signed-off-by: Lucas Stach Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Joonsoo Kim Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d604d2596b7b..41d5e2e2d8ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7248,6 +7248,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, .ignore_skip_hint = true, + .gfp_mask = GFP_KERNEL, }; INIT_LIST_HEAD(&cc.migratepages); -- cgit From 3ba4bceef23206349d4130ddf140819b365de7c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Jan 2017 15:18:07 -0800 Subject: proc: add a schedule point in proc_pid_readdir() We have seen proc_pid_readdir() invocations holding cpu for more than 50 ms. Add a cond_resched() to be gentle with other tasks. [akpm@linux-foundation.org: coding style fix] Link: http://lkml.kernel.org/r/1484238380.15816.42.camel@edumazet-glaptop3.roam.corp.google.com Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 8e7e61b28f31..87c9a9aacda3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[PROC_NUMBUF]; int len; + + cond_resched(); if (!has_pid_permissions(ns, iter.task, 2)) continue; -- cgit From 3674534b775354516e5c148ea48f51d4d1909a78 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 24 Jan 2017 15:18:10 -0800 Subject: mm, memcg: do not retry precharge charges When memory.move_charge_at_immigrate is enabled and precharges are depleted during move, mem_cgroup_move_charge_pte_range() will attempt to increase the size of the precharge. Prevent precharges from ever looping by setting __GFP_NORETRY. This was probably the intention of the GFP_KERNEL & ~__GFP_NORETRY, which is pointless as written. Fixes: 0029e19ebf84 ("mm: memcontrol: remove explicit OOM parameter in charge path") Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701130208510.69402@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a63a8f832664..b822e158b319 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count) return ret; } - /* Try charges one by one with reclaim */ + /* Try charges one by one with reclaim, but do not retry */ while (count--) { - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); + ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1); if (ret) return ret; mc.precharge++; -- cgit From bbd88e1d53a84df9f57a2e37acc15518c3d304db Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 24 Jan 2017 15:18:13 -0800 Subject: Documentation/filesystems/proc.txt: add VmPin Commit bc3e53f682d9 ("mm: distinguish between mlocked and pinned pages") added VmPin in /proc//status. Report that in Documentation/filesystems/proc.txt Also move Umask after Name to keep correct order. Link: http://lkml.kernel.org/r/20170114201219.30387-1-fabf@skynet.be Signed-off-by: Fabian Frederick Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 72624a16b792..c94b4675d021 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise snapshot of a moment, you can see /proc//smaps file and scan page table. It's slow but very precise. -Table 1-2: Contents of the status files (as of 4.1) +Table 1-2: Contents of the status files (as of 4.8) .............................................................................. Field Content Name filename of the executable + Umask file mode creation mask State state (R is running, S is sleeping, D is sleeping in an uninterruptible wait, Z is zombie, T is traced or stopped) @@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1) TracerPid PID of process tracing this process (0 if not) Uid Real, effective, saved set, and file system UIDs Gid Real, effective, saved set, and file system GIDs - Umask file mode creation mask FDSize number of file descriptor slots currently allocated Groups supplementary group list NStgid descendant namespace thread group ID hierarchy @@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1) VmPeak peak virtual memory size VmSize total program size VmLck locked memory size + VmPin pinned memory size VmHWM peak resident set size ("high water mark") VmRSS size of memory portions. It contains the three following parts (VmRSS = RssAnon + RssFile + RssShmem) -- cgit From dd040b6f6d5630202e185399a2ff7ab356ed469c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 24 Jan 2017 15:18:16 -0800 Subject: radix-tree: fix private list warnings The newly introduced warning in radix_tree_free_nodes() was testing the wrong variable; it should have been 'old' instead of 'node'. Fixes: ea07b862ac8e ("mm: workingset: fix use-after-free in shadow node shrinker") Link: http://lkml.kernel.org/r/20170118163746.GA32495@cmpxchg.org Signed-off-by: Matthew Wilcox Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0b92d605fb69..84812a9fb16f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -769,7 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) struct radix_tree_node *old = child; offset = child->offset + 1; child = child->parent; - WARN_ON_ONCE(!list_empty(&node->private_list)); + WARN_ON_ONCE(!list_empty(&old->private_list)); radix_tree_node_free(old); if (old == entry_to_node(node)) return; -- cgit From d51e9894d27492783fc6d1b489070b4ba66ce969 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:18 -0800 Subject: mm/mempolicy.c: do not put mempolicy before using its nodemask Since commit be97a41b291e ("mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma") alloc_pages_vma() can potentially free a mempolicy by mpol_cond_put() before accessing the embedded nodemask by __alloc_pages_nodemask(). The commit log says it's so "we can use a single exit path within the function" but that's clearly wrong. We can still do that when doing mpol_cond_put() after the allocation attempt. Make sure the mempolicy is not freed prematurely, otherwise __alloc_pages_nodemask() can end up using a bogus nodemask, which could lead e.g. to premature OOM. Fixes: be97a41b291e ("mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma") Link: http://lkml.kernel.org/r/20170118141124.8345-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 2e346645eb80..1e7873e40c9a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2017,8 +2017,8 @@ retry_cpuset: nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); - mpol_cond_put(pol); page = __alloc_pages_nodemask(gfp, order, zl, nmask); + mpol_cond_put(pol); out: if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; -- cgit From 545d58f677b21401f6de1ac12c25cc109f903ace Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 24 Jan 2017 15:18:21 -0800 Subject: frv: add atomic64_add_unless() The build of frv allmodconfig was failing with the error: lib/atomic64_test.c:209:9: error: implicit declaration of function 'atomic64_add_unless' All the atomic64 operations were defined in frv, but atomic64_add_unless() was not done. Implement atomic64_add_unless() as done in other arches. Link: http://lkml.kernel.org/r/1484781236-6698-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/atomic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 1c2a5e264fc7..994ed3d5ca08 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -161,6 +161,22 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) return c; } +static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) +{ + long long c, old; + + c = atomic64_read(v); + for (;;) { + if (unlikely(c == u)) + break; + old = atomic64_cmpxchg(v, c, c + i); + if (likely(old == c)) + break; + c = old; + } + return c != u; +} + #define ATOMIC_OP(op) \ static inline int atomic_fetch_##op(int i, atomic_t *v) \ { \ -- cgit From 2dc705a9930b4806250fbf5a76e55266e59389f2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2017 15:18:24 -0800 Subject: fbdev: color map copying bounds checking Copying color maps to userspace doesn't check the value of to->start, which will cause kernel heap buffer OOB read due to signedness wraps. CVE-2016-8405 Link: http://lkml.kernel.org/r/20170105224249.GA50925@beast Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kees Cook Reported-by: Peter Pi (@heisecode) of Trend Micro Cc: Min Chong Cc: Dan Carpenter Cc: Tomi Valkeinen Cc: Bartlomiej Zolnierkiewicz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbdev/core/fbcmap.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index f89245b8ba8e..68a113594808 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c @@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); @@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); -- cgit From ff7a28a074ccbea999dadbb58c46212cf90984c6 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 24 Jan 2017 15:18:29 -0800 Subject: kernel/panic.c: add missing \n When a system panics, the "Rebooting in X seconds.." message is never printed because it lacks a new line. Fix it. Link: http://lkml.kernel.org/r/20170119114751.2724-1-jslaby@suse.cz Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 901c4fb46002..08aa88dde7de 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -249,7 +249,7 @@ void panic(const char *fmt, ...) * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked. */ - pr_emerg("Rebooting in %d seconds..", panic_timeout); + pr_emerg("Rebooting in %d seconds..\n", panic_timeout); for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { touch_nmi_watchdog(); -- cgit From ea57485af8f4221312a5a95d63c382b45e7840dc Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:32 -0800 Subject: mm, page_alloc: fix check for NULL preferred_zone Patch series "fix premature OOM regression in 4.7+ due to cpuset races". This is v2 of my attempt to fix the recent report based on LTP cpuset stress test [1]. The intention is to go to stable 4.9 LTSS with this, as triggering repeated OOMs is not nice. That's why the patches try to be not too intrusive. Unfortunately why investigating I found that modifying the testcase to use per-VMA policies instead of per-task policies will bring the OOM's back, but that seems to be much older and harder to fix problem. I have posted a RFC [2] but I believe that fixing the recent regressions has a higher priority. Longer-term we might try to think how to fix the cpuset mess in a better and less error prone way. I was for example very surprised to learn, that cpuset updates change not only task->mems_allowed, but also nodemask of mempolicies. Until now I expected the parameter to alloc_pages_nodemask() to be stable. I wonder why do we then treat cpusets specially in get_page_from_freelist() and distinguish HARDWALL etc, when there's unconditional intersection between mempolicy and cpuset. I would expect the nodemask adjustment for saving overhead in g_p_f(), but that clearly doesn't happen in the current form. So we have both crazy complexity and overhead, AFAICS. [1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com [2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz This patch (of 4): Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") we have a wrong check for NULL preferred_zone, which can theoretically happen due to concurrent cpuset modification. We check the zoneref pointer which is never NULL and we should check the zone pointer. Also document this in first_zones_zonelist() comment per Michal Hocko. Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Ganapatrao Kulkarni Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++++- mm/page_alloc.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 36d9896fbc1e..f4aac87adcc3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter + * @return - Zoneref pointer for the first suitable zone found (see below) * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. + * + * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is + * never NULL). This may happen either genuinely, or due to concurrent nodemask + * update due to cpuset modification. */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 41d5e2e2d8ce..85cf0f715eb0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3784,7 +3784,7 @@ retry_cpuset: */ ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref) { + if (!ac.preferred_zoneref->zone) { page = NULL; goto no_zone; } -- cgit From 16096c25bf0ca5d87e4fa6ec6108ba53feead212 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:35 -0800 Subject: mm, page_alloc: fix fast-path race with cpuset update or removal Ganapatrao Kulkarni reported that the LTP test cpuset01 in stress mode triggers OOM killer in few seconds, despite lots of free memory. The test attempts to repeatedly fault in memory in one process in a cpuset, while changing allowed nodes of the cpuset between 0 and 1 in another process. One possible cause is that in the fast path we find the preferred zoneref according to current mems_allowed, so that it points to the middle of the zonelist, skipping e.g. zones of node 1 completely. If the mems_allowed is updated to contain only node 1, we never reach it in the zonelist, and trigger OOM before checking the cpuset_mems_cookie. This patch fixes the particular case by redoing the preferred zoneref search if we switch back to the original nodemask. The condition is also slightly changed so that when the last non-root cpuset is removed, we don't miss it. Note that this is not a full fix, and more patches will follow. Link: http://lkml.kernel.org/r/20170120103843.24587-3-vbabka@suse.cz Fixes: 682a3385e773 ("mm, page_alloc: inline the fast path of the zonelist iterator") Signed-off-by: Vlastimil Babka Reported-by: Ganapatrao Kulkarni Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 85cf0f715eb0..6f28b7e926d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3804,9 +3804,17 @@ retry_cpuset: /* * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. + * Also recalculate the starting point for the zonelist iterator or + * we could end up iterating over non-eligible zones endlessly. */ - if (cpusets_enabled()) + if (unlikely(ac.nodemask != nodemask)) { ac.nodemask = nodemask; + ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, + ac.high_zoneidx, ac.nodemask); + if (!ac.preferred_zoneref->zone) + goto no_zone; + } + page = __alloc_pages_slowpath(alloc_mask, order, &ac); no_zone: -- cgit From 5ce9bfef1d27944c119a397a9d827bef795487ce Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:38 -0800 Subject: mm, page_alloc: move cpuset seqcount checking to slowpath This is a preparation for the following patch to make review simpler. While the primary motivation is a bug fix, this also simplifies the fast path, although the moved code is only enabled when cpusets are in use. Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Ganapatrao Kulkarni Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f28b7e926d1..0df3c089d3af 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; unsigned int alloc_flags; unsigned long did_some_progress; - enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; + enum compact_priority compact_priority; enum compact_result compact_result; - int compaction_retries = 0; - int no_progress_loops = 0; + int compaction_retries; + int no_progress_loops; unsigned long alloc_start = jiffies; unsigned int stall_timeout = 10 * HZ; + unsigned int cpuset_mems_cookie; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC; +retry_cpuset: + compaction_retries = 0; + no_progress_loops = 0; + compact_priority = DEF_COMPACT_PRIORITY; + cpuset_mems_cookie = read_mems_allowed_begin(); + /* * The fast path uses conservative alloc_flags to succeed only until * kswapd needs to be woken up, and to avoid the cost of setting up @@ -3720,6 +3727,15 @@ retry: } nopage: + /* + * When updating a task's mems_allowed, it is possible to race with + * parallel threads in such a way that an allocation can fail while + * the mask is being updated. If a page allocation is about to fail, + * check if the cpuset changed during allocation and if so, retry. + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + warn_alloc(gfp_mask, "page allocation failure: order:%u", order); got_pg: @@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { struct page *page; - unsigned int cpuset_mems_cookie; unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { @@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; -retry_cpuset: - cpuset_mems_cookie = read_mems_allowed_begin(); - /* Dirty zone balancing only done in the fast path */ ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); @@ -3786,6 +3798,11 @@ retry_cpuset: ac.high_zoneidx, ac.nodemask); if (!ac.preferred_zoneref->zone) { page = NULL; + /* + * This might be due to race with cpuset_current_mems_allowed + * update, so make sure we retry with original nodemask in the + * slow path. + */ goto no_zone; } @@ -3794,6 +3811,7 @@ retry_cpuset: if (likely(page)) goto out; +no_zone: /* * Runtime PM, block IO and its error handling path can deadlock * because I/O on the device might not complete. @@ -3811,24 +3829,11 @@ retry_cpuset: ac.nodemask = nodemask; ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref->zone) - goto no_zone; + /* If we have NULL preferred zone, slowpath wll handle that */ } page = __alloc_pages_slowpath(alloc_mask, order, &ac); -no_zone: - /* - * When updating a task's mems_allowed, it is possible to race with - * parallel threads in such a way that an allocation can fail while - * the mask is being updated. If a page allocation is about to fail, - * check if the cpuset changed during allocation and if so, retry. - */ - if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { - alloc_mask = gfp_mask; - goto retry_cpuset; - } - out: if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { -- cgit From e47483bca2cc59a4593b37a270b16ee42b1d9f08 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:41 -0800 Subject: mm, page_alloc: fix premature OOM when racing with cpuset mems update Ganapatrao Kulkarni reported that the LTP test cpuset01 in stress mode triggers OOM killer in few seconds, despite lots of free memory. The test attempts to repeatedly fault in memory in one process in a cpuset, while changing allowed nodes of the cpuset between 0 and 1 in another process. The problem comes from insufficient protection against cpuset changes, which can cause get_page_from_freelist() to consider all zones as non-eligible due to nodemask and/or current->mems_allowed. This was masked in the past by sufficient retries, but since commit 682a3385e773 ("mm, page_alloc: inline the fast path of the zonelist iterator") we fix the preferred_zoneref once, and don't iterate over the whole zonelist in further attempts, thus the only eligible zones might be placed in the zonelist before our starting point and we always miss them. A previous patch fixed this problem for current->mems_allowed. However, cpuset changes also update the task's mempolicy nodemask. The fix has two parts. We have to repeat the preferred_zoneref search when we detect cpuset update by way of seqcount, and we have to check the seqcount before considering OOM. [akpm@linux-foundation.org: fix typo in comment] Link: http://lkml.kernel.org/r/20170120103843.24587-5-vbabka@suse.cz Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Signed-off-by: Vlastimil Babka Reported-by: Ganapatrao Kulkarni Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0df3c089d3af..f3e0c69a97b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3555,6 +3555,17 @@ retry_cpuset: no_progress_loops = 0; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * We need to recalculate the starting point for the zonelist iterator + * because we might have used different nodemask in the fast path, or + * there was a cpuset modification and we are retrying - otherwise we + * could end up iterating over non-eligible zones endlessly. + */ + ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, ac->nodemask); + if (!ac->preferred_zoneref->zone) + goto nopage; + /* * The fast path uses conservative alloc_flags to succeed only until @@ -3715,6 +3726,13 @@ retry: &compaction_retries)) goto retry; + /* + * It's possible we raced with cpuset update so the OOM would be + * premature (see below the nopage: label for full explanation). + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); if (page) @@ -3728,10 +3746,11 @@ retry: nopage: /* - * When updating a task's mems_allowed, it is possible to race with - * parallel threads in such a way that an allocation can fail while - * the mask is being updated. If a page allocation is about to fail, - * check if the cpuset changed during allocation and if so, retry. + * When updating a task's mems_allowed or mempolicy nodemask, it is + * possible to race with parallel threads in such a way that our + * allocation can fail while the mask is being updated. If we are about + * to fail, check if the cpuset changed during allocation and if so, + * retry. */ if (read_mems_allowed_retry(cpuset_mems_cookie)) goto retry_cpuset; @@ -3822,15 +3841,9 @@ no_zone: /* * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. - * Also recalculate the starting point for the zonelist iterator or - * we could end up iterating over non-eligible zones endlessly. */ - if (unlikely(ac.nodemask != nodemask)) { + if (unlikely(ac.nodemask != nodemask)) ac.nodemask = nodemask; - ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, - ac.high_zoneidx, ac.nodemask); - /* If we have NULL preferred zone, slowpath wll handle that */ - } page = __alloc_pages_slowpath(alloc_mask, order, &ac); -- cgit From 4180c4c170a5a33b9987b314d248a9d572d89ab0 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 24 Jan 2017 15:18:43 -0800 Subject: frv: add missing atomic64 operations Some more atomic64 operations were missing and as a result frv allmodconfig was failing. Add the missing operations. Link: http://lkml.kernel.org/r/1485193844-12850-1-git-send-email-sudip.mukherjee@codethink.co.uk Signed-off-by: Sudip Mukherjee Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/atomic.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 994ed3d5ca08..e93c9494503a 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -139,7 +139,7 @@ static inline void atomic64_dec(atomic64_t *v) #define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0) - +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new)) #define atomic_xchg(v, new) (xchg(&(v)->counter, new)) @@ -177,6 +177,23 @@ static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) return c != u; } +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long c, old, dec; + + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + #define ATOMIC_OP(op) \ static inline int atomic_fetch_##op(int i, atomic_t *v) \ { \ -- cgit From f598f82e204ec0b17797caaf1b0311c52d43fb9a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Tue, 24 Jan 2017 15:18:46 -0800 Subject: romfs: use different way to generate fsid for BLOCK or MTD Commit 8a59f5d25265 ("fs/romfs: return f_fsid for statfs(2)") generates a 64bit id from sb->s_bdev->bd_dev. This is only correct when romfs is defined with CONFIG_ROMFS_ON_BLOCK. If romfs is only defined with CONFIG_ROMFS_ON_MTD, sb->s_bdev is NULL, referencing sb->s_bdev->bd_dev will triger an oops. Richard Weinberger points out that when CONFIG_ROMFS_BACKED_BY_BOTH=y, both CONFIG_ROMFS_ON_BLOCK and CONFIG_ROMFS_ON_MTD are defined. Therefore when calling huge_encode_dev() to generate a 64bit id, I use the follow order to choose parameter, - CONFIG_ROMFS_ON_BLOCK defined use sb->s_bdev->bd_dev - CONFIG_ROMFS_ON_BLOCK undefined and CONFIG_ROMFS_ON_MTD defined use sb->s_dev when, - both CONFIG_ROMFS_ON_BLOCK and CONFIG_ROMFS_ON_MTD undefined leave id as 0 When CONFIG_ROMFS_ON_MTD is defined and sb->s_mtd is not NULL, sb->s_dev is set to a device ID generated by MTD_BLOCK_MAJOR and mtd index, otherwise sb->s_dev is 0. This is a try-best effort to generate a uniq file system ID, if all the above conditions are not meet, f_fsid of this romfs instance will be 0. Generally only one romfs can be built on single MTD block device, this method is enough to identify multiple romfs instances in a computer. Link: http://lkml.kernel.org/r/1482928596-115155-1-git-send-email-colyli@suse.de Signed-off-by: Coly Li Reported-by: Nong Li Tested-by: Nong Li Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/romfs/super.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index d0f8a38dfafa..0186fe6d39f3 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -74,6 +74,7 @@ #include #include #include +#include #include "internal.h" static struct kmem_cache *romfs_inode_cachep; @@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode) static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + /* When calling huge_encode_dev(), + * use sb->s_bdev->bd_dev when, + * - CONFIG_ROMFS_ON_BLOCK defined + * use sb->s_dev when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD defined + * leave id as 0 when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD undefined + */ + if (sb->s_bdev) + id = huge_encode_dev(sb->s_bdev->bd_dev); + else if (sb->s_dev) + id = huge_encode_dev(sb->s_dev); buf->f_type = ROMFS_MAGIC; buf->f_namelen = ROMFS_MAXFN; @@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY | MS_NOATIME; sb->s_op = &romfs_super_ops; +#ifdef CONFIG_ROMFS_ON_MTD + /* Use same dev ID from the underlying mtdblock device */ + if (sb->s_mtd) + sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index); +#endif /* read the image superblock and check it */ rsb = kmalloc(512, GFP_KERNEL); if (!rsb) -- cgit From 3705ccfdd1e8b539225ce20e3925a945cc788d67 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jan 2017 15:18:49 -0800 Subject: mn10300: fix build error of missing fpu_save() When CONFIG_FPU is not enabled on arch/mn10300, causes a build error with a call to fpu_save(): kernel/built-in.o: In function `.L410': core.c:(.sched.text+0x28a): undefined reference to `fpu_save' Fix this by including in so that an empty static inline fpu_save() is defined. Link: http://lkml.kernel.org/r/dc421c4f-4842-4429-1b99-92865c2f24b6@infradead.org Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Reviewed-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/include/asm/switch_to.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mn10300/include/asm/switch_to.h b/arch/mn10300/include/asm/switch_to.h index 393d311735c8..67e333aa7629 100644 --- a/arch/mn10300/include/asm/switch_to.h +++ b/arch/mn10300/include/asm/switch_to.h @@ -16,7 +16,7 @@ struct task_struct; struct thread_struct; -#if !defined(CONFIG_LAZY_SAVE_FPU) +#if defined(CONFIG_FPU) && !defined(CONFIG_LAZY_SAVE_FPU) struct fpu_state_struct; extern asmlinkage void fpu_save(struct fpu_state_struct *); #define switch_fpu(prev, next) \ -- cgit From 3277953de2f31dd03c6375e9a9f680ac37fc9d27 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Tue, 24 Jan 2017 15:18:52 -0800 Subject: mm: do not export ioremap_page_range symbol for external module Recently, I've found cases in which ioremap_page_range was used incorrectly, in external modules, leading to crashes. This can be partly attributed to the fact that ioremap_page_range is lower-level, with fewer protections, as compared to the other functions that an external module would typically call. Those include: ioremap_cache ioremap_nocache ioremap_prot ioremap_uc ioremap_wc ioremap_wt ...each of which wraps __ioremap_caller, which in turn provides a safer way to achieve the mapping. Therefore, stop EXPORT-ing ioremap_page_range. Link: http://lkml.kernel.org/r/1485173220-29010-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Reviewed-by: John Hubbard Suggested-by: John Hubbard Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ioremap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/ioremap.c b/lib/ioremap.c index 86c8911b0e3a..a3e14ce92a56 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr, return err; } -EXPORT_SYMBOL_GPL(ioremap_page_range); -- cgit From 534c9dc982aca01b630297ad5637f6e95e94c1e2 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Tue, 24 Jan 2017 15:18:55 -0800 Subject: MAINTAINERS: add Dan Streetman to zswap maintainers Add myself as zswap maintainer. Link: http://lkml.kernel.org/r/20170124212200.19052-1-ddstreet@ieee.org Signed-off-by: Dan Streetman Acked-by: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2fd11b439ab3..c79b519fb422 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13679,6 +13679,7 @@ F: Documentation/vm/zsmalloc.txt ZSWAP COMPRESSED SWAP CACHING M: Seth Jennings +M: Dan Streetman L: linux-mm@kvack.org S: Maintained F: mm/zswap.c -- cgit From aab45453ff5c77200c6da4ac909f7a4392aed17e Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Tue, 24 Jan 2017 15:18:57 -0800 Subject: MAINTAINERS: add Dan Streetman to zbud maintainers Add myself as zbud maintainer. Link: http://lkml.kernel.org/r/20170124221705.26523-1-ddstreet@ieee.org Signed-off-by: Dan Streetman Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c79b519fb422..bbf74a9d6b47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13624,6 +13624,7 @@ F: drivers/net/hamradio/z8530.h ZBUD COMPRESSED PAGE ALLOCATOR M: Seth Jennings +M: Dan Streetman L: linux-mm@kvack.org S: Maintained F: mm/zbud.c -- cgit From 3feb479cea37fc623cf4e705631b2e679cbfbd7a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 23 Jan 2017 13:13:58 -0200 Subject: Revert "thermal: thermal_hwmon: Convert to hwmon_device_register_with_info()" This reverts commit 7611fb68062f ("thermal: thermal_hwmon: Convert to hwmon_device_register_with_info()"). Pavel Machek reported breakage in the Nokia N900 due to this commit. We can revisit a proper fix for the warning later. Reported-by: Pavel Machek Signed-off-by: Fabio Estevam Acked-by: Guenter Roeck Acked-by: Pavel Machek Signed-off-by: Zhang Rui --- drivers/thermal/thermal_hwmon.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index c4a508a124dc..541af5946203 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -58,6 +58,14 @@ static LIST_HEAD(thermal_hwmon_list); static DEFINE_MUTEX(thermal_hwmon_list_lock); +static ssize_t +name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", hwmon->type); +} +static DEVICE_ATTR_RO(name); + static ssize_t temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -157,12 +165,15 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) INIT_LIST_HEAD(&hwmon->tz_list); strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); - hwmon->device = hwmon_device_register_with_info(NULL, hwmon->type, - hwmon, NULL, NULL); + hwmon->device = hwmon_device_register(NULL); if (IS_ERR(hwmon->device)) { result = PTR_ERR(hwmon->device); goto free_mem; } + dev_set_drvdata(hwmon->device, hwmon); + result = device_create_file(hwmon->device, &dev_attr_name); + if (result) + goto free_mem; register_sys_interface: temp = kzalloc(sizeof(*temp), GFP_KERNEL); @@ -211,8 +222,10 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) free_temp_mem: kfree(temp); unregister_name: - if (new_hwmon_device) + if (new_hwmon_device) { + device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); + } free_mem: if (new_hwmon_device) kfree(hwmon); @@ -254,6 +267,7 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) list_del(&hwmon->node); mutex_unlock(&thermal_hwmon_list_lock); + device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); kfree(hwmon); } -- cgit From bdbfd5196d24a6d0845b549eba6ce8e6fa8bb3d0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 Jan 2017 12:53:45 -0700 Subject: drm/i915/gvt/kvmgt: mdev ABI is available_instances, not available_instance Per the ABI specification[1], each mdev_supported_types entry should have an available_instances, with an "s", not available_instance. [1] Documentation/ABI/testing/sysfs-bus-vfio-mdev Signed-off-by: Alex Williamson Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 0c9234a87a20..3f656e3a6e5a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -230,8 +230,8 @@ static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, return NULL; } -static ssize_t available_instance_show(struct kobject *kobj, struct device *dev, - char *buf) +static ssize_t available_instances_show(struct kobject *kobj, + struct device *dev, char *buf) { struct intel_vgpu_type *type; unsigned int num = 0; @@ -269,12 +269,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, type->fence); } -static MDEV_TYPE_ATTR_RO(available_instance); +static MDEV_TYPE_ATTR_RO(available_instances); static MDEV_TYPE_ATTR_RO(device_api); static MDEV_TYPE_ATTR_RO(description); static struct attribute *type_attrs[] = { - &mdev_type_attr_available_instance.attr, + &mdev_type_attr_available_instances.attr, &mdev_type_attr_device_api.attr, &mdev_type_attr_description.attr, NULL, -- cgit From 7283accfaef66e6a64f7d3ec0672596dd8e5b144 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 Jan 2017 13:15:43 -0700 Subject: drm/i915/gvt: Fix kmem_cache_create() name According to kmem_cache_sanity_check(), spaces are not allowed in the name of a cache and results in a kernel oops with CONFIG_DEBUG_VM. Convert to underscores. Signed-off-by: Alex Williamson Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index fb852c51d00e..34083731669d 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -798,7 +798,7 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&vgpu->workload_q_head[i]); } - vgpu->workloads = kmem_cache_create("gvt-g vgpu workload", + vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", sizeof(struct intel_vgpu_workload), 0, SLAB_HWCACHE_ALIGN, NULL); -- cgit From ba7addcd805e5c83e201b118a2693b921a980b44 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 25 Jan 2017 10:30:02 +0800 Subject: MAINTAINERS: update new mail list for intel gvt driver We've moved to lists.freedesktop.org from lists.01.org. Update info in MAINTAINERS. Signed-off-by: Zhenyu Wang --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5f0420a0da5b..5bd03d5cb28b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4153,7 +4153,7 @@ F: Documentation/gpu/i915.rst INTEL GVT-g DRIVERS (Intel GPU Virtualization) M: Zhenyu Wang M: Zhi Wang -L: igvt-g-dev@lists.01.org +L: intel-gvt-dev@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org W: https://01.org/igvt-g T: git https://github.com/01org/gvt-linux.git -- cgit From b5fa0f7f88edcde37df1807fdf9ff10ec787a60e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jan 2017 16:36:57 +1100 Subject: powerpc: Fix build failure with clang due to BUILD_BUG_ON() Anton says: In commit 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") and commit c12e6f24d413 ("powerpc: Add option to use jump label for mmu_has_feature()") we added: BUILD_BUG_ON(!__builtin_constant_p(feature)) to cpu_has_feature() and mmu_has_feature() in order to catch usage issues (such as cpu_has_feature(cpu_has_feature(X), which has happened once in the past). Unfortunately LLVM isn't smart enough to resolve this, and it errors out. I work around it in my clang/LLVM builds of the kernel, but I have just discovered that it causes a lot of issues for the bcc (eBPF) trace tool (which uses LLVM). For now just #ifdef it away for clang builds. Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") Fixes: c12e6f24d413 ("powerpc: Add option to use jump label for mmu_has_feature()") Cc: stable@vger.kernel.org # v4.8+ Reported-by: Anton Blanchard Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpu_has_feature.h | 2 ++ arch/powerpc/include/asm/mmu.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index b312b152461b..6e834caa3720 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -23,7 +23,9 @@ static __always_inline bool cpu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index a34c764ca8dd..233a7e8cc8e3 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -160,7 +160,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { -- cgit From ae4a3e028bb8b59e7cfeb0cc9ef03d885182ce8b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 19 Jan 2017 08:49:07 -0800 Subject: dmaengine: cppi41: Fix runtime PM timeouts with USB mass storage Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") added runtime PM support for cppi41, but had corner case issues. Some of the issues were fixed with commit 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected"). That fix however caused a new regression where we can get error -115 messages with USB on BeagleBone when connecting a USB mass storage device to a hub. This is because when connecting a USB mass storage device to a hub, the initial DMA transfers can take over 200ms to complete and cppi41 autosuspend delay times out. To fix the issue, we want to implement refcounting for chan_busy array that contains the active dma transfers. Increasing the autosuspend delay won't help as that the delay could be potentially seconds, and it's best to let the USB subsystem to deal with the timeouts on errors. The earlier attempt for runtime PM was buggy as the pm_runtime_get/put() calls could get unpaired easily as they did not follow the state of the chan_busy array as described in commit 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected". Let's fix the issue by adding pm_runtime_get() to where a new transfer is added to the chan_busy array, and calls to pm_runtime_put() where chan_busy array entry is cleared. This prevents any autosuspend timeouts from happening while dma transfers are active. Fixes: 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected") Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Andy Shevchenko Cc: Bin Liu Cc: Grygorii Strashko Cc: Kevin Hilman Cc: Patrick Titiano Cc: Sergei Shtylyov Signed-off-by: Tony Lindgren Tested-by: Bin Liu Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index d5ba43a87a68..7de4fdf86a6a 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -257,6 +257,10 @@ static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) BUG_ON(desc_num >= ALLOC_DECS_NUM); c = cdd->chan_busy[desc_num]; cdd->chan_busy[desc_num] = NULL; + + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return c; } @@ -447,6 +451,15 @@ static void push_desc_queue(struct cppi41_channel *c) */ __iowmb(); + /* + * DMA transfers can take at least 200ms to complete with USB mass + * storage connected. To prevent autosuspend timeouts, we must use + * pm_runtime_get/put() when chan_busy[] is modified. This will get + * cleared in desc_to_chan() or cppi41_stop_chan() depending on the + * outcome of the transfer. + */ + pm_runtime_get(cdd->ddev.dev); + desc_phys = lower_32_bits(c->desc_phys); desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); WARN_ON(cdd->chan_busy[desc_num]); @@ -705,6 +718,9 @@ static int cppi41_stop_chan(struct dma_chan *chan) WARN_ON(!cdd->chan_busy[desc_num]); cdd->chan_busy[desc_num] = NULL; + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return 0; } -- cgit From 362f4562466c3b9490e733e06999025638310d4a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 19 Jan 2017 08:49:08 -0800 Subject: dmaengine: cppi41: Fix oops in cppi41_runtime_resume Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") together with recent MUSB changes allowed USB and DMA on BeagleBone to idle when no cable is connected. But looks like few corner case issues still remain. Looks like just by re-plugging USB cable about ten or so times on BeagleBone when configured in USB peripheral mode we can get warnings and eventually trigger an oops in cppi41 DMA: WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:1154 cppi41_runtime_suspend+ x28/0x38 [cppi41] ... WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:452 push_desc_queue+0x94/0x9c [cppi41] ... Unable to handle kernel NULL pointer dereference at virtual address 00000104 pgd = c0004000 [00000104] *pgd=00000000 Internal error: Oops: 805 [#1] SMP ARM ... [] (cppi41_runtime_resume [cppi41]) from [] (__rpm_callback+0xc0/0x214) [] (__rpm_callback) from [] (rpm_callback+0x20/0x80) [] (rpm_callback) from [] (rpm_resume+0x504/0x78c) [] (rpm_resume) from [] (pm_runtime_work+0x60/0xa8) [] (pm_runtime_work) from [] (process_one_work+0x2b4/0x808) This is because of a race with runtime PM and cppi41_dma_issue_pending() as reported by Alexandre Bailon in earlier set of patches. Based on mailing list discussions we however came to the conclusion that a different fix from Alexandre's fix is needed in order to guarantee that DMA is really active when we try to use it. To fix the issue, we need to add a driver specific flag as we otherwise can have -EINPROGRESS state set by runtime PM and can't rely on pm_runtime_active() to tell us when we can use the DMA. And we need to make sure the DMA transfers get triggered in the queued order. So let's always queue the transfers, then flush the queue from both cppi41_dma_issue_pending() and cppi41_runtime_resume() as suggested by Grygorii Strashko in an earlier example patch. For reference, this is also documented in Documentation/power/runtime_pm.txt in the example at the end of the file as pointed out by Grygorii Strashko . Based on earlier patches from Alexandre Bailon and Grygorii Strashko modified based on testing and what was discussed on the mailing lists. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Andy Shevchenko Cc: Bin Liu Cc: Grygorii Strashko Cc: Kevin Hilman Cc: Patrick Titiano Cc: Sergei Shtylyov Reported-by: Alexandre Bailon Signed-off-by: Tony Lindgren Tested-by: Bin Liu Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 7de4fdf86a6a..55c1782e3623 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -153,6 +153,8 @@ struct cppi41_dd { /* context for suspend/resume */ unsigned int dma_tdfdq; + + bool is_suspended; }; #define FIST_COMPLETION_QUEUE 93 @@ -470,20 +472,26 @@ static void push_desc_queue(struct cppi41_channel *c) cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); } -static void pending_desc(struct cppi41_channel *c) +/* + * Caller must hold cdd->lock to prevent push_desc_queue() + * getting called out of order. We have both cppi41_dma_issue_pending() + * and cppi41_runtime_resume() call this function. + */ +static void cppi41_run_queue(struct cppi41_dd *cdd) { - struct cppi41_dd *cdd = c->cdd; - unsigned long flags; + struct cppi41_channel *c, *_c; - spin_lock_irqsave(&cdd->lock, flags); - list_add_tail(&c->node, &cdd->pending); - spin_unlock_irqrestore(&cdd->lock, flags); + list_for_each_entry_safe(c, _c, &cdd->pending, node) { + push_desc_queue(c); + list_del(&c->node); + } } static void cppi41_dma_issue_pending(struct dma_chan *chan) { struct cppi41_channel *c = to_cpp41_chan(chan); struct cppi41_dd *cdd = c->cdd; + unsigned long flags; int error; error = pm_runtime_get(cdd->ddev.dev); @@ -495,10 +503,11 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) return; } - if (likely(pm_runtime_active(cdd->ddev.dev))) - push_desc_queue(c); - else - pending_desc(c); + spin_lock_irqsave(&cdd->lock, flags); + list_add_tail(&c->node, &cdd->pending); + if (!cdd->is_suspended) + cppi41_run_queue(cdd); + spin_unlock_irqrestore(&cdd->lock, flags); pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); @@ -1166,8 +1175,12 @@ static int __maybe_unused cppi41_resume(struct device *dev) static int __maybe_unused cppi41_runtime_suspend(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); + unsigned long flags; + spin_lock_irqsave(&cdd->lock, flags); + cdd->is_suspended = true; WARN_ON(!list_empty(&cdd->pending)); + spin_unlock_irqrestore(&cdd->lock, flags); return 0; } @@ -1175,14 +1188,11 @@ static int __maybe_unused cppi41_runtime_suspend(struct device *dev) static int __maybe_unused cppi41_runtime_resume(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); - struct cppi41_channel *c, *_c; unsigned long flags; spin_lock_irqsave(&cdd->lock, flags); - list_for_each_entry_safe(c, _c, &cdd->pending, node) { - push_desc_queue(c); - list_del(&c->node); - } + cdd->is_suspended = false; + cppi41_run_queue(cdd); spin_unlock_irqrestore(&cdd->lock, flags); return 0; -- cgit From 6610d0edf6dc7ee97e46ab3a538a565c79d26199 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 20 Jan 2017 12:07:53 -0800 Subject: dmaengine: cppi41: Clean up pointless warnings With patches "dmaengine: cppi41: Fix runtime PM timeouts with USB mass storage", and "dmaengine: cppi41: Fix oops in cppi41_runtime_resume", the pm_runtime_get/put() in cppi41_irq() is no longer needed. We now guarantee that cppi41 is enabled when dma is in use. We can still get pointless error -115 when musb is configured as a usb peripheral. That's because we should now check for the state of is_suspended instead. Let's just remove the now useless code and replace it with a WARN(). Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 55c1782e3623..200828c60db9 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -323,12 +323,12 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; - int error; - error = pm_runtime_get(cdd->ddev.dev); - if (error < 0) - dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", - __func__, error); + /* + * This should never trigger, see the comments in + * push_desc_queue() + */ + WARN_ON(cdd->is_suspended); q_num = __fls(val); val &= ~(1 << q_num); @@ -349,9 +349,6 @@ static irqreturn_t cppi41_irq(int irq, void *data) c->residue = pd_trans_len(c->desc->pd6) - len; dma_cookie_complete(&c->txd); dmaengine_desc_get_callback_invoke(&c->txd, NULL); - - pm_runtime_mark_last_busy(cdd->ddev.dev); - pm_runtime_put_autosuspend(cdd->ddev.dev); } } return IRQ_HANDLED; -- cgit From 407788b51db6f6aab499d02420082f436abf3238 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 24 Jan 2017 09:18:57 -0600 Subject: usb: musb: Fix host mode error -71 regression Commit 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") started implementing musb generic runtime PM support by introducing devctl register session bit based state control. This caused a regression where if a USB mass storage device is connected to a USB hub, we can get: usb 1-1: reset high-speed USB device number 2 using musb-hdrc usb 1-1: device descriptor read/64, error -71 usb 1-1.1: new high-speed USB device number 4 using musb-hdrc This is because before the USB storage device is connected, musb is in OTG_STATE_A_SUSPEND. And we currently only set need_finish_resume in musb_stage0_irq() and the related code calling finish_resume_work in musb_resume() and musb_runtime_resume() never gets called. To fix the issue, we can call schedule_delayed_work() directly in musb_stage0_irq() to have finish_resume_work run. And we should no longer never get interrupts when when suspended. We have changed musb to no longer need pm_runtime_irqsafe(). The need_finish_resume flag was added in commit 9298b4aad37e ("usb: musb: fix device hotplug behind hub") and no longer applies as far as I can tell. So let's just remove the earlier code that no longer is needed. Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Reported-by: Bin Liu Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 15 ++------------- drivers/usb/musb/musb_core.h | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index fca288bbc800..cd40467be3c5 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -594,11 +594,11 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, | MUSB_PORT_STAT_RESUME; musb->rh_timer = jiffies + msecs_to_jiffies(USB_RESUME_TIMEOUT); - musb->need_finish_resume = 1; - musb->xceiv->otg->state = OTG_STATE_A_HOST; musb->is_active = 1; musb_host_resume_root_hub(musb); + schedule_delayed_work(&musb->finish_resume_work, + msecs_to_jiffies(USB_RESUME_TIMEOUT)); break; case OTG_STATE_B_WAIT_ACON: musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; @@ -2710,11 +2710,6 @@ static int musb_resume(struct device *dev) mask = MUSB_DEVCTL_BDEVICE | MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV; if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } /* * The USB HUB code expects the device to be in RPM_ACTIVE once it came @@ -2766,12 +2761,6 @@ static int musb_runtime_resume(struct device *dev) musb_restore_context(musb); - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } - spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); if (error) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index ade902ea1221..ce5a18c98c6d 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -410,7 +410,6 @@ struct musb { /* is_suspended means USB B_PERIPHERAL suspend */ unsigned is_suspended:1; - unsigned need_finish_resume :1; /* may_wakeup means remote wakeup is enabled */ unsigned may_wakeup:1; -- cgit From 3ba7b7795b7e8889af1377904c55c7fae9e0c775 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 24 Jan 2017 09:18:58 -0600 Subject: usb: musb: Fix external abort on non-linefetch for musb_irq_work() While testing musb host mode cable plugging on a BeagleBone, I came across this error: Unhandled fault: external abort on non-linefetch (0x1008) at 0xd1dcfc60 ... [] (musb_default_readb [musb_hdrc]) from [] (musb_irq_work+0x1c/0x180 [musb_hdrc]) [] (musb_irq_work [musb_hdrc]) from [] (process_one_work+0x2b4/0x808) [] (process_one_work) from [] (worker_thread+0x3c/0x550) [] (worker_thread) from [] (kthread+0x104/0x148) [] (kthread) from [] (ret_from_fork+0x14/0x24) Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index cd40467be3c5..772f15821242 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1925,6 +1925,14 @@ static void musb_pm_runtime_check_session(struct musb *musb) static void musb_irq_work(struct work_struct *data) { struct musb *musb = container_of(data, struct musb, irq_work.work); + int error; + + error = pm_runtime_get_sync(musb->controller); + if (error < 0) { + dev_err(musb->controller, "Could not enable: %i\n", error); + + return; + } musb_pm_runtime_check_session(musb); @@ -1932,6 +1940,9 @@ static void musb_irq_work(struct work_struct *data) musb->xceiv_old_state = musb->xceiv->otg->state; sysfs_notify(&musb->controller->kobj, NULL, "mode"); } + + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); } static void musb_recover_from_babble(struct musb *musb) -- cgit From d9b2997e4a0a874e452df7cdd7de5a54502bd0aa Mon Sep 17 00:00:00 2001 From: Lukáš Lalinský Date: Fri, 20 Jan 2017 19:46:34 +0100 Subject: USB: Add quirk for WORLDE easykey.25 MIDI keyboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a quirk for WORLDE easykey.25 MIDI keyboard (idVendor=0218, idProduct=0401). The device reports that it has config string descriptor at index 3, but when the system selects the configuration and tries to get the description, it returns a -EPROTO error, the communication restarts and this keeps repeating over and over again. Not requesting the string descriptor makes the device work correctly. Relevant info from Wireshark: [...] CONFIGURATION DESCRIPTOR bLength: 9 bDescriptorType: 0x02 (CONFIGURATION) wTotalLength: 101 bNumInterfaces: 2 bConfigurationValue: 1 iConfiguration: 3 Configuration bmAttributes: 0xc0 SELF-POWERED NO REMOTE-WAKEUP 1... .... = Must be 1: Must be 1 for USB 1.1 and higher .1.. .... = Self-Powered: This device is SELF-POWERED ..0. .... = Remote Wakeup: This device does NOT support remote wakeup bMaxPower: 50 (100mA) [...] 45 0.369104 host 2.38.0 USB 64 GET DESCRIPTOR Request STRING [...] URB setup bmRequestType: 0x80 1... .... = Direction: Device-to-host .00. .... = Type: Standard (0x00) ...0 0000 = Recipient: Device (0x00) bRequest: GET DESCRIPTOR (6) Descriptor Index: 0x03 bDescriptorType: 0x03 Language Id: English (United States) (0x0409) wLength: 255 46 0.369255 2.38.0 host USB 64 GET DESCRIPTOR Response STRING[Malformed Packet] [...] Frame 46: 64 bytes on wire (512 bits), 64 bytes captured (512 bits) on interface 0 USB URB [Source: 2.38.0] [Destination: host] URB id: 0xffff88021f62d480 URB type: URB_COMPLETE ('C') URB transfer type: URB_CONTROL (0x02) Endpoint: 0x80, Direction: IN Device: 38 URB bus id: 2 Device setup request: not relevant ('-') Data: present (0) URB sec: 1484896277 URB usec: 455031 URB status: Protocol error (-EPROTO) (-71) URB length [bytes]: 0 Data length [bytes]: 0 [Request in: 45] [Time from request: 0.000151000 seconds] Unused Setup Header Interval: 0 Start frame: 0 Copy of Transfer Flags: 0x00000200 Number of ISO descriptors: 0 [Malformed Packet: USB] [Expert Info (Error/Malformed): Malformed Packet (Exception occurred)] [Malformed Packet (Exception occurred)] [Severity level: Error] [Group: Malformed] Signed-off-by: Lukáš Lalinský Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d2e50a27140c..24f9f98968a5 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* CBM - Flash disk */ { USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME }, + /* WORLDE easy key (easykey.25) MIDI controller */ + { USB_DEVICE(0x0218, 0x0401), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* HP 5300/5370C scanner */ { USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- cgit From 91539eb1fda2d530d3b268eef542c5414e54bf1a Mon Sep 17 00:00:00 2001 From: Iago Abal Date: Wed, 11 Jan 2017 14:00:21 +0100 Subject: dmaengine: pl330: fix double lock The static bug finder EBA (http://www.iagoabal.eu/eba/) reported the following double-lock bug: Double lock: 1. spin_lock_irqsave(pch->lock, flags) at pl330_free_chan_resources:2236; 2. call to function `pl330_release_channel' immediately after; 3. call to function `dma_pl330_rqcb' in line 1753; 4. spin_lock_irqsave(pch->lock, flags) at dma_pl330_rqcb:1505. I have fixed it as suggested by Marek Szyprowski. First, I have replaced `pch->lock' with `pl330->lock' in functions `pl330_alloc_chan_resources' and `pl330_free_chan_resources'. This avoids the double-lock by acquiring a different lock than `dma_pl330_rqcb'. NOTE that, as a result, `pl330_free_chan_resources' executes `list_splice_tail_init' on `pch->work_list' under lock `pl330->lock', whereas in the rest of the code `pch->work_list' is protected by `pch->lock'. I don't know if this may cause race conditions. Similarly `pch->cyclic' is written by `pl330_alloc_chan_resources' under `pl330->lock' but read by `pl330_tx_submit' under `pch->lock'. Second, I have removed locking from `pl330_request_channel' and `pl330_release_channel' functions. Function `pl330_request_channel' is only called from `pl330_alloc_chan_resources', so the lock is already held. Function `pl330_release_channel' is called from `pl330_free_chan_resources', which already holds the lock, and from `pl330_del'. Function `pl330_del' is called in an error path of `pl330_probe' and at the end of `pl330_remove', but I assume that there cannot be concurrent accesses to the protected data at those points. Signed-off-by: Iago Abal Reviewed-by: Marek Szyprowski Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 740bbb942594..7539f73df9e0 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1699,7 +1699,6 @@ static bool _chan_ns(const struct pl330_dmac *pl330, int i) static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) { struct pl330_thread *thrd = NULL; - unsigned long flags; int chans, i; if (pl330->state == DYING) @@ -1707,8 +1706,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) chans = pl330->pcfg.num_chan; - spin_lock_irqsave(&pl330->lock, flags); - for (i = 0; i < chans; i++) { thrd = &pl330->channels[i]; if ((thrd->free) && (!_manager_ns(thrd) || @@ -1726,8 +1723,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) thrd = NULL; } - spin_unlock_irqrestore(&pl330->lock, flags); - return thrd; } @@ -1745,7 +1740,6 @@ static inline void _free_event(struct pl330_thread *thrd, int ev) static void pl330_release_channel(struct pl330_thread *thrd) { struct pl330_dmac *pl330; - unsigned long flags; if (!thrd || thrd->free) return; @@ -1757,10 +1751,8 @@ static void pl330_release_channel(struct pl330_thread *thrd) pl330 = thrd->dmac; - spin_lock_irqsave(&pl330->lock, flags); _free_event(thrd, thrd->ev); thrd->free = true; - spin_unlock_irqrestore(&pl330->lock, flags); } /* Initialize the structure for PL330 configuration, that can be used @@ -2122,20 +2114,20 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); dma_cookie_init(chan); pch->cyclic = false; pch->thread = pl330_request_channel(pl330); if (!pch->thread) { - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return -ENOMEM; } tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); return 1; } @@ -2238,12 +2230,13 @@ static int pl330_pause(struct dma_chan *chan) static void pl330_free_chan_resources(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; tasklet_kill(&pch->task); pm_runtime_get_sync(pch->dmac->ddma.dev); - spin_lock_irqsave(&pch->lock, flags); + spin_lock_irqsave(&pl330->lock, flags); pl330_release_channel(pch->thread); pch->thread = NULL; @@ -2251,7 +2244,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) if (pch->cyclic) list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); - spin_unlock_irqrestore(&pch->lock, flags); + spin_unlock_irqrestore(&pl330->lock, flags); pm_runtime_mark_last_busy(pch->dmac->ddma.dev); pm_runtime_put_autosuspend(pch->dmac->ddma.dev); } -- cgit From b17c1bba9cec1727451b906d9a0c209774624873 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Mon, 23 Jan 2017 16:32:57 +0000 Subject: staging: greybus: timesync: validate platform state callback When tearingdown timesync, and not in arche platform, the state platform callback is not initialized. That will trigger the following NULL dereferencing. CallTrace: ? gb_timesync_platform_unlock_bus+0x11/0x20 [greybus] gb_timesync_teardown+0x85/0xc0 [greybus] gb_timesync_svc_remove+0xab/0x190 [greybus] gb_svc_del+0x29/0x110 [greybus] gb_hd_del+0x14/0x20 [greybus] ap_disconnect+0x24/0x60 [gb_es2] usb_unbind_interface+0x7a/0x2c0 __device_release_driver+0x96/0x150 device_release_driver+0x1e/0x30 bus_remove_device+0xe7/0x130 device_del+0x116/0x230 usb_disable_device+0x97/0x1f0 usb_disconnect+0x80/0x260 hub_event+0x5ca/0x10e0 process_one_work+0x126/0x3b0 worker_thread+0x55/0x4c0 ? process_one_work+0x3b0/0x3b0 kthread+0xc4/0xe0 ? kthread_park+0xb0/0xb0 ret_from_fork+0x22/0x30 So, fix that by adding checks before use the callback. Fixes: 970dc85bd95d ("greybus: timesync: Add timesync core driver") Cc: # 4.9.x Signed-off-by: Rui Miguel Silva Reviewed-by: Viresh Kumar Reviewed-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/timesync_platform.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/staging/greybus/timesync_platform.c b/drivers/staging/greybus/timesync_platform.c index 113f3d6c4b3a..27f75b17679b 100644 --- a/drivers/staging/greybus/timesync_platform.c +++ b/drivers/staging/greybus/timesync_platform.c @@ -45,12 +45,18 @@ u32 gb_timesync_platform_get_clock_rate(void) int gb_timesync_platform_lock_bus(struct gb_timesync_svc *pdata) { + if (!arche_platform_change_state_cb) + return 0; + return arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_TIME_SYNC, pdata); } void gb_timesync_platform_unlock_bus(void) { + if (!arche_platform_change_state_cb) + return; + arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_ACTIVE, NULL); } -- cgit From b78671591a10218ab18bbea120fd05df7a002e88 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 15 Jan 2017 12:58:25 +0000 Subject: drm/i915: Avoid drm_atomic_state_put(NULL) in intel_display_resume intel_display_resume() may be called without an atomic state to restore, i.e. dev_priv->modeset_reset_restore state is NULL. One such case is following a lid open/close event and the forced modeset in intel_lid_notify(). Reported-by: Stefan Seyfried Tested-by: Stefan Seyfried Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170115125825.18597-1-chris@chris-wilson.co.uk Reviewed-by: Ander Conselvan de Oliveira (cherry picked from commit 3c5e37f169cb67cbd03c6116fbc93e0805815d29) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8d702cf1a616..1a01d4189c3f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -17027,7 +17027,8 @@ void intel_display_resume(struct drm_device *dev) if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); - drm_atomic_state_put(state); + if (state) + drm_atomic_state_put(state); } void intel_modeset_gem_init(struct drm_device *dev) -- cgit From 27892bbdc9233f33bf0f44e08aab8f12e0dec142 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Wed, 18 Jan 2017 13:38:43 -0800 Subject: drm/i915: prevent crash with .disable_display parameter The .disable_display parameter was causing a fatal crash when fbdev was dereferenced during driver init. V1: protection in i915_drv.c V2: Moved protection to intel_fbdev.c Fixes: 43cee314345a ("drm/i915/fbdev: Limit the global async-domain synchronization") Testcase: igt/drv_module_reload/basic-no-display Cc: Chris Wilson Signed-off-by: Clint Taylor Link: http://patchwork.freedesktop.org/patch/msgid/1484775523-29428-1-git-send-email-clinton.a.taylor@intel.com Reviewed-by: Chris Wilson Cc: Lukas Wunner Cc: Daniel Vetter Cc: Jani Nikula Cc: # v4.8+ Signed-off-by: Chris Wilson (cherry picked from commit 5b8cd0755f8a06a851c436a013e7be0823fb155a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index beb08982dc0b..8cf2d80f2254 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -742,6 +742,9 @@ void intel_fbdev_initial_config_async(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + if (!ifbdev) + return; + ifbdev->cookie = async_schedule(intel_fbdev_initial_config, ifbdev); } -- cgit From a38a7bd1766b42ea0ed14b99be23a653922ed5c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Jan 2017 11:37:49 +0000 Subject: drm/i915: Release temporary load-detect state upon switching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After we call drm_atomic_commit() on the load-detect state, we can free our local reference. Upon restore, we only apply and free the previous state. Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Daniel Vetter Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170119113749.2517-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä (cherry picked from commit 7abbd11f344aa7abe29befb218774a1ea26018ac) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1a01d4189c3f..f313d62a7e74 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11246,6 +11246,7 @@ found: } old->restore_state = restore_state; + drm_atomic_state_put(state); /* let the connector get through one full cycle before testing */ intel_wait_for_vblank(dev_priv, intel_crtc->pipe); -- cgit From c34f078675f505c4437919bb1897b1351f16a050 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:42 +0200 Subject: drm/i915: Don't leak edid in intel_crt_detect_ddc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the path where intel_crt_detect_ddc() detects a CRT, if would return true without freeing the edid. Fixes: a2bd1f541f19 ("drm/i915: check whether we actually received an edid in detect_ddc") Cc: Chris Wilson Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v3.6+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-1-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit c96b63a6a7ac4bd670ec2e663793a9a31418b790) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_crt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 86ecec5601d4..588470eb8d39 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status -- cgit From 21d6e0bde50713922a6520ef84e5fd245b05d468 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:43 +0200 Subject: drm/i915: Don't init hpd polling for vlv and chv from runtime_suspend() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An error in the condition for avoiding the call to intel_hpd_poll_init() for valleyview and cherryview from intel_runtime_suspend() caused it to be called unconditionally. Fix it. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-2-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 04313b00b79405f86d815100f85c47a2ee5b8ca0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 445fec9c2841..b2c4a0b8a627 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2378,7 +2378,7 @@ static int intel_runtime_suspend(struct device *kdev) assert_forcewakes_inactive(dev_priv); - if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_poll_init(dev_priv); DRM_DEBUG_KMS("Device suspended\n"); -- cgit From 3781bd6e7d64d5f5bea9fdee11ab9460a700c0e4 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:44 +0200 Subject: drm/i915: Fix calculation of rotated x and y offsets for planar formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameters tile_size, tile_width and tile_height were passed in the wrong order to _intel_adjust_tile_offset() when calculating the rotated offsets. This doesn't fix any user visible bug, since for packed formats new and old offset are the same and the rotated offsets are within a tile before they are fed to _intel_adjust_tile_offset(). In that case, the offsets are unchanged. That is not true for planar formats, but those are currently not supported. Fixes: 66a2d927cb0e ("drm/i915: Make intel_adjust_tile_offset() work for linear buffers") Cc: Ville Syrjälä Cc: Sivakumar Thulasimani Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-3-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 46a1bd289507dfcc428fb9daf65421ed6be6af8b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f313d62a7e74..7a9040fc6e5a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2585,8 +2585,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, * We only keep the x/y offsets, so push all of the * gtt offset into the x/y offsets. */ - _intel_adjust_tile_offset(&x, &y, tile_size, - tile_width, tile_height, pitch_tiles, + _intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, gtt_offset_rotated * tile_size, 0); gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; -- cgit From 6d1d427a4e24c403b4adf928d61994bdaa0ca03a Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:45 +0200 Subject: drm/i915: Check for NULL atomic state in intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In intel_crtc_disable_noatomic(), bail on a failure to allocate an atomic state to avoid a NULL pointer dereference. Fixes: 4a80655827af ("drm/i915: Pass atomic state to crtc enable/disable functions") Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-4-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 31bb2ef97ea9db343348f9b5ccaa9bb6f48fc655) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7a9040fc6e5a..f6dd073afdc9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6850,6 +6850,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) } state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); + return; + } + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; /* Everything's already locked, -EDEADLK can't happen. */ -- cgit From 6f0f02dc56f18760b46dc1bf5b3f7386869d4162 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 21:29:39 +0000 Subject: drm/i915: Move atomic state free from out of fence release Fences are required to support being released from under an atomic context. The drm_atomic_state struct may take a mutex when being released and so we cannot drop a reference to the drm_atomic_state from the fence release path directly, and so we need to defer that unreference to a worker. [ 326.576697] WARNING: CPU: 2 PID: 366 at kernel/sched/core.c:7737 __might_sleep+0x5d/0x80 [ 326.576816] do not call blocking ops when !TASK_RUNNING; state=1 set at [] intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.576818] Modules linked in: rfcomm fuse snd_hda_codec_hdmi bnep snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer input_leds led_class snd punit_atom_debug btusb btrtl btbcm btintel intel_rapl bluetooth i915 drm_kms_helper syscopyarea sysfillrect iwlwifi sysimgblt soundcore fb_sys_fops mei_txe cfg80211 drm pwm_lpss_platform pwm_lpss pinctrl_cherryview fjes acpi_pad parport_pc ppdev parport autofs4 [ 326.576899] CPU: 2 PID: 366 Comm: i915/signal:0 Tainted: G U 4.10.0-rc3-patser+ #5030 [ 326.576902] Hardware name: /NUC5PPYB, BIOS PYBSWCEL.86A.0031.2015.0601.1712 06/01/2015 [ 326.576905] Call Trace: [ 326.576920] dump_stack+0x4d/0x6d [ 326.576926] __warn+0xc0/0xe0 [ 326.576931] warn_slowpath_fmt+0x5a/0x80 [ 326.577004] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577075] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577079] __might_sleep+0x5d/0x80 [ 326.577087] mutex_lock+0x1b/0x40 [ 326.577133] drm_property_free_blob+0x1e/0x80 [drm] [ 326.577167] ? drm_property_destroy+0xe0/0xe0 [drm] [ 326.577200] drm_mode_object_unreference+0x5c/0x70 [drm] [ 326.577233] drm_property_unreference_blob+0xe/0x10 [drm] [ 326.577260] __drm_atomic_helper_crtc_destroy_state+0x14/0x40 [drm_kms_helper] [ 326.577278] drm_atomic_helper_crtc_destroy_state+0x10/0x20 [drm_kms_helper] [ 326.577352] intel_crtc_destroy_state+0x9/0x10 [i915] [ 326.577388] drm_atomic_state_default_clear+0xea/0x1d0 [drm] [ 326.577462] intel_atomic_state_clear+0xd/0x20 [i915] [ 326.577497] drm_atomic_state_clear+0x1a/0x30 [drm] [ 326.577532] __drm_atomic_state_free+0x13/0x60 [drm] [ 326.577607] intel_atomic_commit_ready+0x6f/0x78 [i915] [ 326.577670] i915_sw_fence_release+0x3a/0x50 [i915] [ 326.577733] dma_i915_sw_fence_wake+0x39/0x80 [i915] [ 326.577741] dma_fence_signal+0xda/0x120 [ 326.577812] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577884] intel_breadcrumbs_signaler+0xb1/0x270 [i915] [ 326.577889] kthread+0x127/0x130 [ 326.577961] ? intel_engine_remove_wait+0x1a0/0x1a0 [i915] [ 326.577964] ? kthread_stop+0x120/0x120 [ 326.577970] ret_from_fork+0x22/0x30 Fixes: c004a90b7263 ("drm/i915: Restore nonblocking awaits for modesetting") Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170123212939.30345-1-chris@chris-wilson.co.uk Cc: # v4.10-rc1+ Reviewed-by: Joonas Lahtinen (cherry picked from commit eb955eee27d9dc176871540c43c9070ee4701642) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 28 ++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 243224aeabf8..69bc3b0c4390 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1977,6 +1977,11 @@ struct drm_i915_private { struct i915_frontbuffer_tracking fb_tracking; + struct intel_atomic_helper { + struct llist_head free_list; + struct work_struct free_work; + } atomic_helper; + u16 orig_clock; bool mchbar_need_disable; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6dd073afdc9..77f7b1d849a4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14523,8 +14523,14 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, break; case FENCE_FREE: - drm_atomic_state_put(&state->base); - break; + { + struct intel_atomic_helper *helper = + &to_i915(state->base.dev)->atomic_helper; + + if (llist_add(&state->freed, &helper->free_list)) + schedule_work(&helper->free_work); + break; + } } return NOTIFY_DONE; @@ -16403,6 +16409,18 @@ fail: drm_modeset_acquire_fini(&ctx); } +static void intel_atomic_helper_free_state(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16422,6 +16440,9 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; + INIT_WORK(&dev_priv->atomic_helper.free_work, + intel_atomic_helper_free_state); + intel_init_quirks(dev); intel_init_pm(dev_priv); @@ -17106,6 +17127,9 @@ void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + flush_work(&dev_priv->atomic_helper.free_work); + WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + intel_disable_gt_powersave(dev_priv); /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cd132c216a67..cd72ae171eeb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -370,6 +370,8 @@ struct intel_atomic_state { struct skl_wm_values wm_results; struct i915_sw_fence commit_ready; + + struct llist_node freed; }; struct intel_plane_state { -- cgit From 2f5db26c2ecb248bdc319feb2990453cb02fc950 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 20 Jan 2017 13:51:23 -0800 Subject: drm/i915: reinstate call to trace_i915_vma_bind The call went away in: commit 3b16525cc4c1a43e9053cfdc414356eea24bdfad Author: Chris Wilson Date: Thu Aug 4 16:32:25 2016 +0100 drm/i915: Split insertion/binding of an object into the VM It is useful to have this trace as it pairs nicely with the vma_unbind one to track vma activity. Added inside the i915_vma_bind function (was outside before) to keep a similar placement as trace_i915_vma_unbind. v2: print bind_flags instead of flags (Chris) Fixes: 3b16525cc4c1 ("drm/i915: Split insertion/binding of an object into the VM") Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1484949083-11430-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 6146e6da5c961735dacf9b6c0c8b5f1382193ee2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index a792dcb902b5..e924a9516079 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -185,6 +185,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; } + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) return ret; -- cgit From 83e526f2a2fa4b2e82b6bd3ddbb26b70acfa8947 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Wed, 18 Jan 2017 00:57:44 +0000 Subject: usb: gadget: f_fs: Assorted buffer overflow checks. OS descriptor head, when flagged as provided, is accessed without checking if it fits in provided buffer. Verify length before access. Also, there are other places where buffer length it checked after accessing offsets which are potentially past the end. Check buffer length before as well to fail cleanly. Signed-off-by: Vincent Pelletier Acked-by: Felipe Balbi Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 5490fc51638e..fd80c1b9c823 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2269,6 +2269,8 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || h->interface >= ffs->interfaces_count) return -EINVAL; length = le32_to_cpu(d->dwSize); + if (len < length) + return -EINVAL; type = le32_to_cpu(d->dwPropertyDataType); if (type < USB_EXT_PROP_UNICODE || type > USB_EXT_PROP_UNICODE_MULTI) { @@ -2277,6 +2279,11 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, return -EINVAL; } pnl = le16_to_cpu(d->wPropertyNameLength); + if (length < 14 + pnl) { + pr_vdebug("invalid os descriptor length: %d pnl:%d (descriptor %d)\n", + length, pnl, type); + return -EINVAL; + } pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl)); if (length != 14 + pnl + pdl) { pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n", @@ -2363,6 +2370,9 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, } } if (flags & (1 << i)) { + if (len < 4) { + goto error; + } os_descs_count = get_unaligned_le32(data); data += 4; len -= 4; @@ -2435,7 +2445,8 @@ static int __ffs_data_got_strings(struct ffs_data *ffs, ENTER(); - if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || + if (unlikely(len < 16 || + get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || get_unaligned_le32(data + 4) != len)) goto error; str_count = get_unaligned_le32(data + 8); -- cgit From fd25ea29093e275195d0ae8b2573021a1c98959f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 22 Jan 2017 13:24:05 +0100 Subject: Revert "ACPI / video: Add force_native quirk for HP Pavilion dv6" Revert commit 6276e53fa8c0 (ACPI / video: Add force_native quirk for HP Pavilion dv6). In the commit message for the quirk this revert removes I wrote: "Note that there are quite a few HP Pavilion dv6 variants, some woth ATI and some with NVIDIA hybrid gfx, both seem to need this quirk to have working backlight control. There are also some versions with only Intel integrated gfx, these may not need this quirk, but it should not hurt there." Unfortunately that seems wrong, I've already received 2 reports of this commit causing regressions on some dv6 variants (at least one of which actually has a nvidia GPU). So it seems that HP has made a mess here by using the same model-name both in marketing and in the DMI data for many different variants. Some of which need acpi_backlight=native for functional backlight control (as the quirk this commit reverts was doing), where as others are broken by it. So lets get back to the old sitation so as to avoid regressing on models which used to work without any kernel cmdline arguments before. Fixes: 6276e53fa8c0 (ACPI / video: Add force_native quirk for HP Pavilion dv6) Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 02ded25c82e4..7f48156cbc0c 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -305,17 +305,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), }, }, - { - /* https://bugzilla.redhat.com/show_bug.cgi?id=1204476 */ - /* https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 */ - .callback = video_detect_force_native, - .ident = "HP Pavilion dv6", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6 Notebook PC"), - }, - }, - { }, }; -- cgit From 4dfa2b84118fd6c95202ae87e62adf5000ccd4d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2017 07:49:34 -0800 Subject: xfs: only update mount/resv fields on success in __xfs_ag_resv_init Try to reserve the blocks first and only then update the fields in or hanging off the mount structure. This way we can call __xfs_ag_resv_init again after a previous failure. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ag_resv.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index d346d42c54d1..94234bff40dc 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -200,22 +200,27 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; + xfs_extlen_t reserved; - resv = xfs_perag_resv(pag, type); if (used > ask) ask = used; - resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = ask - used; - mp->m_ag_max_usable -= ask; + reserved = ask - used; - trace_xfs_ag_resv_init(pag, type, ask); - - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); - if (error) + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); + return error; + } - return error; + mp->m_ag_max_usable -= ask; + + resv = xfs_perag_resv(pag, type); + resv->ar_asked = ask; + resv->ar_reserved = resv->ar_orig_reserved = reserved; + + trace_xfs_ag_resv_init(pag, type, ask); + return 0; } /* Create a per-AG block reservation. */ -- cgit From 76d771b4cbe33c581bd6ca2710c120be51172440 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2017 07:49:35 -0800 Subject: xfs: use per-AG reservations for the finobt Currently we try to rely on the global reserved block pool for block allocations for the free inode btree, but I have customer reports (fairly complex workload, need to find an easier reproducer) where that is not enough as the AG where we free an inode that requires a new finobt block is entirely full. This causes us to cancel a dirty transaction and thus a file system shutdown. I think the right way to guard against this is to treat the finot the same way as the refcount btree and have a per-AG reservations for the possible worst case size of it, and the patch below implements that. Note that this could increase mount times with large finobt trees. In an ideal world we would have added a field for the number of finobt fields to the AGI, similar to what we did for the refcount blocks. We should do add it next time we rev the AGI or AGF format by adding new fields. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ag_resv.c | 47 ++++++++++++++++++--- fs/xfs/libxfs/xfs_ialloc_btree.c | 90 ++++++++++++++++++++++++++++++++++++++-- fs/xfs/libxfs/xfs_ialloc_btree.h | 3 ++ fs/xfs/xfs_inode.c | 23 +++++----- fs/xfs/xfs_mount.h | 1 + 5 files changed, 144 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 94234bff40dc..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -39,6 +39,7 @@ #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" +#include "xfs_ialloc_btree.h" /* * Per-AG Block Reservations @@ -210,6 +211,9 @@ __xfs_ag_resv_init( if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); + xfs_warn(mp, +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", + pag->pag_agno); return error; } @@ -228,6 +232,8 @@ int xfs_ag_resv_init( struct xfs_perag *pag) { + struct xfs_mount *mp = pag->pag_mount; + xfs_agnumber_t agno = pag->pag_agno; xfs_extlen_t ask; xfs_extlen_t used; int error = 0; @@ -236,23 +242,45 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(pag->pag_mount, - pag->pag_agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, - ask, used); + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) { + /* + * Because we didn't have per-AG reservations when the + * finobt feature was added we might not be able to + * reserve all needed blocks. Warn and fall back to the + * old and potentially buggy code in that case, but + * ensure we do have the reservation for the refcountbt. + */ + ask = used = 0; + + mp->m_inotbt_nores = true; + + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, + &used); + if (error) + goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) + goto out; + } } /* Create the AGFL metadata reservation */ if (pag->pag_agfl_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, - &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; @@ -261,9 +289,16 @@ xfs_ag_resv_init( goto out; } +#ifdef DEBUG + /* need to read in the AGF for the ASSERT below to work */ + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); + if (error) + return error; + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount); +#endif out: return error; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0fd086d03d41..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -82,11 +82,12 @@ xfs_finobt_set_root( } STATIC int -xfs_inobt_alloc_block( +__xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int *stat) + int *stat, + enum xfs_ag_resv_type resv) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( args.maxlen = 1; args.prod = 1; args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.resv = resv; error = xfs_alloc_vextent(&args); if (error) { @@ -122,6 +124,27 @@ xfs_inobt_alloc_block( return 0; } +STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, + XFS_AG_RESV_METADATA); +} + STATIC int xfs_inobt_free_block( struct xfs_btree_cur *cur, @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, - .alloc_block = xfs_inobt_alloc_block, + .alloc_block = xfs_finobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, @@ -480,3 +503,64 @@ xfs_inobt_rec_check_count( return 0; } #endif /* DEBUG */ + +static xfs_extlen_t +xfs_inobt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_inobt_mxr[0] == 0) + return 0; + + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / + XFS_INODES_PER_CHUNK); +} + +static int +xfs_inobt_count_blocks( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_btnum_t btnum, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_btree_cur *cur; + int error; + + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); + error = xfs_btree_count_blocks(cur, tree_blocks); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + return error; +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_finobt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + xfs_extlen_t tree_len = 0; + int error; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); + if (error) + return error; + + *ask += xfs_inobt_max_size(mp); + *used += tree_len; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_extlen_t *ask, xfs_extlen_t *used); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9557795eb74..de32f0fe47c8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1792,22 +1792,23 @@ xfs_inactive_ifree( int error; /* - * The ifree transaction might need to allocate blocks for record - * insertion to the finobt. We don't want to fail here at ENOSPC, so - * allow ifree to dip into the reserved block pool if necessary. - * - * Freeing large sets of inodes generally means freeing inode chunks, - * directory and file data blocks, so this should be relatively safe. - * Only under severe circumstances should it be possible to free enough - * inodes to exhaust the reserve block pool via finobt expansion while - * at the same time not creating free space in the filesystem. + * We try to use a per-AG reservation for any block needed by the finobt + * tree, but as the finobt feature predates the per-AG reservation + * support a degraded file system might not have enough space for the + * reservation at mount time. In that case try to dip into the reserved + * pool and pray. * * Send a warning if the reservation does happen to fail, as the inode * now remains allocated and sits on the unlinked list until the fs is * repaired. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); + if (unlikely(mp->m_inotbt_nores)) { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, + &tp); + } else { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); + } if (error) { if (error == -ENOSPC) { xfs_warn_ratelimited(mp, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 84f785218907..7f351f706b7a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -140,6 +140,7 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ __uint64_t m_flags; /* global mount flags */ + bool m_inotbt_nores; /* no per-AG finobt resv. */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ int m_ialloc_min_blks;/* min blocks in sparse inode -- cgit From 5a93790d4e2df73e30c965ec6e49be82fc3ccfce Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 25 Jan 2017 07:53:43 -0800 Subject: xfs: remove racy hasattr check from attr ops xfs_attr_[get|remove]() have unlocked attribute fork checks to optimize away a lock cycle in cases where the fork does not exist or is otherwise empty. This check is not safe, however, because an attribute fork short form to extent format conversion includes a transient state that causes the xfs_inode_hasattr() check to fail. Specifically, xfs_attr_shortform_to_leaf() creates an empty extent format attribute fork and then adds the existing shortform attributes to it. This means that lookup of an existing xattr can spuriously return -ENOATTR when racing against a setxattr that causes the associated format conversion. This was originally reproduced by an untar on a particularly configured glusterfs volume, but can also be reproduced on demand with properly crafted xattr requests. The format conversion occurs under the exclusive ilock. xfs_attr_get() and xfs_attr_remove() already have the proper locking and checks further down in the functions to handle this situation correctly. Drop the unlocked checks to avoid the spurious failure and rely on the existing logic. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; -- cgit From 493611ebd62673f39e2f52c2561182c558a21cb6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2017 08:59:43 -0800 Subject: xfs: extsize hints are not unlikely in xfs_bmap_btalloc With COW files they are the hotpath, just like for files with the extent size hint attribute. We really shouldn't micro-manage anything but failure cases with unlikely. Additionally Arnd Bergmann recently reported that one of these two unlikely annotations causes link failures together with an upcoming kernel instrumentation patch, so let's get rid of it ASAP. Signed-off-by: Christoph Hellwig Reported-by: Arnd Bergmann Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ab82dd4a4980..bfc00de5c6f1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3629,7 +3629,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3701,7 +3701,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); -- cgit From 6f29a130613191d3c6335169febe002cba00edf5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Jan 2017 14:01:53 +0800 Subject: sctp: sctp_addr_id2transport should verify the addr before looking up assoc sctp_addr_id2transport is a function for sockopt to look up assoc by address. As the address is from userspace, it can be a v4-mapped v6 address. But in sctp protocol stack, it always handles a v4-mapped v6 address as a v4 address. So it's necessary to convert it to a v4 address before looking up assoc by address. This patch is to fix it by calling sctp_verify_addr in which it can do this conversion before calling sctp_endpoint_lookup_assoc, just like what sctp_sendmsg and __sctp_connect do for the address from users. Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 318c6786d653..37eeab7899fc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -235,8 +235,12 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, sctp_assoc_t id) { struct sctp_association *addr_asoc = NULL, *id_asoc = NULL; - struct sctp_transport *transport; + struct sctp_af *af = sctp_get_af_specific(addr->ss_family); union sctp_addr *laddr = (union sctp_addr *)addr; + struct sctp_transport *transport; + + if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) + return NULL; addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, laddr, -- cgit From 5207f3996338e1db71363fe381c81aaf1e54e4e3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 24 Jan 2017 14:05:16 +0800 Subject: sctp: sctp gso should set feature with NETIF_F_SG when calling skb_segment Now sctp gso puts segments into skb's frag_list, then processes these segments in skb_segment. But skb_segment handles them only when gs is enabled, as it's in the same branch with skb's frags. Although almost all the NICs support sg other than some old ones, but since commit 1e16aa3ddf86 ("net: gso: use feature flag argument in all protocol gso handlers"), features &= skb->dev->hw_enc_features, and xfrm_output_gso call skb_segment with features = 0, which means sctp gso would call skb_segment with sg = 0, and skb_segment would not work as expected. This patch is to fix it by setting features param with NETIF_F_SG when calling skb_segment so that it can go the right branch to process the skb's frag_list. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 7e869d0cca69..4f5a2b580aa5 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, goto out; } - segs = skb_segment(skb, features | NETIF_F_HW_CSUM); + segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG); if (IS_ERR(segs)) goto out; -- cgit From 56d806222ace4c3aeae516cd7a855340fb2839d8 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 24 Jan 2017 21:49:41 -0500 Subject: tcp: correct memory barrier usage in tcp_check_space() sock_reset_flag() maps to __clear_bit() not the atomic version clear_bit(). Thus, we need smp_mb(), smp_mb__after_atomic() is not sufficient. Fixes: 3c7151275c0c ("tcp: add memory barriers to write space paths") Cc: Eric Dumazet Cc: Oleg Nesterov Signed-off-by: Jason Baron Acked-by: Eric Dumazet Reported-by: Oleg Nesterov Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6c790754ae3e..41dcbd568cbe 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5078,7 +5078,7 @@ static void tcp_check_space(struct sock *sk) if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ - smp_mb__after_atomic(); + smp_mb(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { tcp_new_space(sk); -- cgit From a551ee94ea723b4af9b827c7460f108bc13425ee Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 25 Jan 2017 02:55:07 -0500 Subject: bnxt_en: Fix bnxt_reset() in the slow path task. In bnxt_sp_task(), we set a bit BNXT_STATE_IN_SP_TASK so that bnxt_close() will synchronize and wait for bnxt_sp_task() to finish. Some functions in bnxt_sp_task() require us to clear BNXT_STATE_IN_SP_TASK and then acquire rtnl_lock() to prevent race conditions. There are some bugs related to this logic. This patch refactors the code to have common bnxt_rtnl_lock_sp() and bnxt_rtnl_unlock_sp() to handle the RTNL and the clearing/setting of the bit. Multiple functions will need the same logic. We also need to move bnxt_reset() to the end of bnxt_sp_task(). Functions that clear BNXT_STATE_IN_SP_TASK must be the last functions to be called in bnxt_sp_task(). The common scheme will handle the condition properly. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 38 ++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 53e686fdf2ce..30d7d643bdbe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6200,23 +6200,32 @@ bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } -/* Only called from bnxt_sp_task() */ -static void bnxt_reset(struct bnxt *bp, bool silent) +static void bnxt_rtnl_lock_sp(struct bnxt *bp) { - /* bnxt_reset_task() calls bnxt_close_nic() which waits - * for BNXT_STATE_IN_SP_TASK to clear. - * If there is a parallel dev_close(), bnxt_close() may be holding + /* We are called from bnxt_sp_task which has BNXT_STATE_IN_SP_TASK + * set. If the device is being closed, bnxt_close() may be holding * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear. So we * must clear BNXT_STATE_IN_SP_TASK before holding rtnl(). */ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); rtnl_lock(); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_reset_task(bp, silent); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); rtnl_unlock(); } +/* Only called from bnxt_sp_task() */ +static void bnxt_reset(struct bnxt *bp, bool silent) +{ + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + bnxt_reset_task(bp, silent); + bnxt_rtnl_unlock_sp(bp); +} + static void bnxt_cfg_ntp_filters(struct bnxt *); static void bnxt_sp_task(struct work_struct *work) @@ -6266,18 +6275,21 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_hwrm_tunnel_dst_port_free( bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); } - if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) - bnxt_reset(bp, false); - - if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event)) - bnxt_reset(bp, true); - if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) bnxt_get_port_module_status(bp); if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) bnxt_hwrm_port_qstats(bp); + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They + * must be the last functions to be called before exiting. + */ + if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) + bnxt_reset(bp, false); + + if (test_and_clear_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event)) + bnxt_reset(bp, true); + smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); } -- cgit From 0eaa24b971ae251ae9d3be23f77662a655532063 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 25 Jan 2017 02:55:08 -0500 Subject: bnxt_en: Fix RTNL lock usage on bnxt_update_link(). bnxt_update_link() is called from multiple code paths. Most callers, such as open, ethtool, already hold RTNL. Only the caller bnxt_sp_task() does not. So it is a bug to take RTNL inside bnxt_update_link(). Fix it by removing the RTNL inside bnxt_update_link(). The function now expects the caller to always hold RTNL. In bnxt_sp_task(), call bnxt_rtnl_lock_sp() before calling bnxt_update_link(). We also need to move the call to the end of bnxt_sp_task() since it will be clearing the BNXT_STATE_IN_SP_TASK bit. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 37 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 30d7d643bdbe..69b0fce4f5b3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5314,17 +5314,12 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state) if ((link_info->support_auto_speeds | diff) != link_info->support_auto_speeds) { /* An advertised speed is no longer supported, so we need to - * update the advertisement settings. See bnxt_reset() for - * comments about the rtnl_lock() sequence below. + * update the advertisement settings. Caller holds RTNL + * so we can modify link settings. */ - clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_lock(); link_info->advertising = link_info->support_auto_speeds; - if (test_bit(BNXT_STATE_OPEN, &bp->state) && - (link_info->autoneg & BNXT_AUTONEG_SPEED)) + if (link_info->autoneg & BNXT_AUTONEG_SPEED) bnxt_hwrm_set_link_setting(bp, true, false); - set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_unlock(); } return 0; } @@ -6231,7 +6226,6 @@ static void bnxt_cfg_ntp_filters(struct bnxt *); static void bnxt_sp_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, sp_task); - int rc; set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); smp_mb__after_atomic(); @@ -6245,16 +6239,6 @@ static void bnxt_sp_task(struct work_struct *work) if (test_and_clear_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event)) bnxt_cfg_ntp_filters(bp); - if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { - if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, - &bp->sp_event)) - bnxt_hwrm_phy_qcaps(bp); - - rc = bnxt_update_link(bp, true); - if (rc) - netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", - rc); - } if (test_and_clear_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event)) bnxt_hwrm_exec_fwd_req(bp); if (test_and_clear_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event)) { @@ -6284,6 +6268,21 @@ static void bnxt_sp_task(struct work_struct *work) /* These functions below will clear BNXT_STATE_IN_SP_TASK. They * must be the last functions to be called before exiting. */ + if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { + int rc = 0; + + if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, + &bp->sp_event)) + bnxt_hwrm_phy_qcaps(bp); + + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + rc = bnxt_update_link(bp, true); + bnxt_rtnl_unlock_sp(bp); + if (rc) + netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", + rc); + } if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false); -- cgit From 90c694bb71819fb5bd3501ac397307d7e41ddeca Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 25 Jan 2017 02:55:09 -0500 Subject: bnxt_en: Fix RTNL lock usage on bnxt_get_port_module_status(). bnxt_get_port_module_status() calls bnxt_update_link() which expects RTNL to be held. In bnxt_sp_task() that does not hold RTNL, we need to call it with a prior call to bnxt_rtnl_lock_sp() and the call needs to be moved to the end of bnxt_sp_task(). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 69b0fce4f5b3..4fcc6a84a087 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6259,9 +6259,6 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_hwrm_tunnel_dst_port_free( bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE); } - if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) - bnxt_get_port_module_status(bp); - if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) bnxt_hwrm_port_qstats(bp); @@ -6283,6 +6280,12 @@ static void bnxt_sp_task(struct work_struct *work) netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", rc); } + if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) { + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) + bnxt_get_port_module_status(bp); + bnxt_rtnl_unlock_sp(bp); + } if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false); -- cgit From 61976fff20f92aceecc3670f6168bfc57a79e047 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 25 Jan 2017 09:20:54 +0100 Subject: Documentation: devicetree: change the mediatek ethernet compatible string When the binding was defined, I was not aware that mt2701 was an earlier version of the SoC. For sake of consistency, the ethernet driver should use mt2701 inside the compat string as this is the earliest SoC with the ethernet core. The ethernet driver is currently of no real use until we finish and upstream the DSA driver. There are no users of this binding yet. It should be safe to fix this now before it is too late and we need to provide backward compatibility for the mt7623-eth compat string. Reported-by: Sean Wang Signed-off-by: John Crispin Reviewed-by: Matthias Brugger Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index c010fafc66a8..c7194e87d5f4 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -7,7 +7,7 @@ have dual GMAC each represented by a child node.. * Ethernet controller node Required properties: -- compatible: Should be "mediatek,mt7623-eth" +- compatible: Should be "mediatek,mt2701-eth" - reg: Address and length of the register set for the device - interrupts: Should contain the three frame engines interrupts in numeric order. These are fe_int0, fe_int1 and fe_int2. -- cgit From 8b901f6bbcf12a20e43105d161bedde093431e61 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 25 Jan 2017 09:20:55 +0100 Subject: net-next: ethernet: mediatek: change the compatible string When the binding was defined, I was not aware that mt2701 was an earlier version of the SoC. For sake of consistency, the ethernet driver should use mt2701 inside the compat string as this is the earliest SoC with the ethernet core. The ethernet driver is currently of no real use until we finish and upstream the DSA driver. There are no users of this binding yet. It should be safe to fix this now before it is too late and we need to provide backward compatibility for the mt7623-eth compat string. Reported-by: Sean Wang Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3dd87889e67e..1c29c86f8709 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2517,7 +2517,7 @@ static int mtk_remove(struct platform_device *pdev) } const struct of_device_id of_mtk_match[] = { - { .compatible = "mediatek,mt7623-eth" }, + { .compatible = "mediatek,mt2701-eth" }, {}, }; MODULE_DEVICE_TABLE(of, of_mtk_match); -- cgit From 8a87fca8dd5879eb05a0903cb7ea4fd2a3876ae0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Jan 2017 11:39:48 +0100 Subject: net: phy: leds: Clear phy_num_led_triggers on failure to avoid crash phy_attach_direct() ignores errors returned by phy_led_triggers_register(). I think that's OK, as LED triggers can be considered a non-critical feature. However, this causes problems later: - phy_led_trigger_change_speed() will access the array phy_device.phy_led_triggers, which has been freed in the error path of phy_led_triggers_register(), which may lead to a crash. - phy_led_triggers_unregister() will access the same array, leading to crashes during s2ram or poweroff, like: Unable to handle kernel NULL pointer dereference at virtual address 00000000 ... [] (__list_del_entry_valid) from [] (led_trigger_unregister+0x34/0xcc) [] (led_trigger_unregister) from [] (phy_led_triggers_unregister+0x28/0x34) [] (phy_led_triggers_unregister) from [] (phy_detach+0x30/0x74) [] (phy_detach) from [] (sh_eth_close+0x64/0x9c) [] (sh_eth_close) from [] (dpm_run_callback+0x48/0xc8) or: list_del corruption. prev->next should be dede6540, but was 2e323931 ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:52! ... [] (__list_del_entry_valid) from [] (led_trigger_unregister+0x34/0xcc) [] (led_trigger_unregister) from [] (phy_led_triggers_unregister+0x28/0x34) [] (phy_led_triggers_unregister) from [] (phy_detach+0x30/0x74) [] (phy_detach) from [] (sh_eth_close+0x6c/0xa4) [] (sh_eth_close) from [] (__dev_close_many+0xac/0xd0) To fix this, clear phy_device.phy_num_led_triggers in the error path of phy_led_triggers_register() fails. Note that the "No phy led trigger registered for speed" message will still be printed on link speed changes, which is a good cue that something went wrong with the LED triggers. Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change") Signed-off-by: Geert Uytterhoeven Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_led_triggers.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index fa62bdf2f526..3f619e7371e9 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -102,8 +102,10 @@ int phy_led_triggers_register(struct phy_device *phy) sizeof(struct phy_led_trigger) * phy->phy_num_led_triggers, GFP_KERNEL); - if (!phy->phy_led_triggers) - return -ENOMEM; + if (!phy->phy_led_triggers) { + err = -ENOMEM; + goto out_clear; + } for (i = 0; i < phy->phy_num_led_triggers; i++) { err = phy_led_trigger_register(phy, &phy->phy_led_triggers[i], @@ -120,6 +122,8 @@ out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); +out_clear: + phy->phy_num_led_triggers = 0; return err; } EXPORT_SYMBOL_GPL(phy_led_triggers_register); -- cgit From d6f8cfa3dea294eabf8f302e90176dd6381fb66e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Jan 2017 11:39:49 +0100 Subject: net: phy: leds: Break dependency of phy.h on phy_led_triggers.h includes , which is not really needed. Drop the include from , and add it to all users that didn't include it explicitly. Suggested-by: Andrew Lunn Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 1 + drivers/net/phy/phy_led_triggers.c | 1 + include/linux/phy.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e687a9cb4a37..7cc1b7dcfe05 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 3f619e7371e9..94ca42e630bb 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -12,6 +12,7 @@ */ #include #include +#include #include static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy, diff --git a/include/linux/phy.h b/include/linux/phy.h index f7d95f644eed..7fc1105605bf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,7 +25,6 @@ #include #include #include -#include #include -- cgit From 3c880eb0205222bb062970085ebedc73ec8dfd14 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Jan 2017 11:39:50 +0100 Subject: net: phy: leds: Fix truncated LED trigger names Commit 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id") increased the size of MII bus IDs, but forgot to update the private definition in . This may cause: 1. Truncation of LED trigger names, 2. Duplicate LED trigger names, 3. Failures registering LED triggers, 4. Crashes due to bad error handling in the LED trigger failure path. To fix this, and prevent the definitions going out of sync again in the future, let the PHY LED trigger code use the existing MII_BUS_ID_SIZE definition. Example: - Before I had triggers "ee700000.etherne:01:100Mbps" and "ee700000.etherne:01:10Mbps", - After the increase of MII_BUS_ID_SIZE, both became "ee700000.ethernet-ffffffff:01:" => FAIL, - Now, the triggers are "ee700000.ethernet-ffffffff:01:100Mbps" and "ee700000.ethernet-ffffffff:01:10Mbps", which are unique again. Fixes: 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id") Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy_led_triggers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h index a2daea0a37d2..b37b05bfd1a6 100644 --- a/include/linux/phy_led_triggers.h +++ b/include/linux/phy_led_triggers.h @@ -18,11 +18,11 @@ struct phy_device; #ifdef CONFIG_LED_TRIGGER_PHY #include +#include #define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 -#define PHY_MII_BUS_ID_SIZE (20 - 3) -#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \ FIELD_SIZEOF(struct mdio_device, addr)+\ PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) -- cgit From f154be241d22298d2b63c9b613f619fa1086ea75 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 25 Jan 2017 09:10:41 -0800 Subject: net: dsa: Bring back device detaching in dsa_slave_suspend() Commit 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid lockdep splat") removed the netif_device_detach() call done in dsa_slave_suspend() which is necessary, and paired with a corresponding netif_device_attach(), bring it back. Fixes: 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid lockdep splat") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ba1b6b9630d2..7d4596110851 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1201,6 +1201,8 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + netif_device_detach(slave_dev); + if (p->phy) { phy_stop(p->phy); p->old_pause = -1; -- cgit From 54a07c7bb0da0343734c78212bbe9f3735394962 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 26 Jan 2017 06:44:03 +1000 Subject: Revert "drm/probe-helpers: Drop locking from poll_enable" This reverts commit 3846fd9b86001bea171943cc3bb9222cb6da6b42. There were some precursor commits missing for this around connector locking, we should probably merge Lyude's nouveau avoid the problem patch. --- drivers/gpu/drm/drm_probe_helper.c | 51 ++++++++++++++++++++++-------------- drivers/gpu/drm/i915/intel_hotplug.c | 4 +-- include/drm/drm_crtc_helper.h | 1 + 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index b452a7ccd84b..cf8f0128c161 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -115,27 +115,24 @@ static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector) #define DRM_OUTPUT_POLL_PERIOD (10*HZ) /** - * drm_kms_helper_poll_enable - re-enable output polling. + * drm_kms_helper_poll_enable_locked - re-enable output polling. * @dev: drm_device * - * This function re-enables the output polling work, after it has been - * temporarily disabled using drm_kms_helper_poll_disable(), for example over - * suspend/resume. + * This function re-enables the output polling work without + * locking the mode_config mutex. * - * Drivers can call this helper from their device resume implementation. It is - * an error to call this when the output polling support has not yet been set - * up. - * - * Note that calls to enable and disable polling must be strictly ordered, which - * is automatically the case when they're only call from suspend/resume - * callbacks. + * This is like drm_kms_helper_poll_enable() however it is to be + * called from a context where the mode_config mutex is locked + * already. */ -void drm_kms_helper_poll_enable(struct drm_device *dev) +void drm_kms_helper_poll_enable_locked(struct drm_device *dev) { bool poll = false; struct drm_connector *connector; unsigned long delay = DRM_OUTPUT_POLL_PERIOD; + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); + if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll) return; @@ -163,7 +160,7 @@ void drm_kms_helper_poll_enable(struct drm_device *dev) if (poll) schedule_delayed_work(&dev->mode_config.output_poll_work, delay); } -EXPORT_SYMBOL(drm_kms_helper_poll_enable); +EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked); static enum drm_connector_status drm_connector_detect(struct drm_connector *connector, bool force) @@ -290,7 +287,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, /* Re-enable polling in case the global poll config changed. */ if (drm_kms_helper_poll != dev->mode_config.poll_running) - drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); dev->mode_config.poll_running = drm_kms_helper_poll; @@ -482,12 +479,8 @@ out: * This function disables the output polling work. * * Drivers can call this helper from their device suspend implementation. It is - * not an error to call this even when output polling isn't enabled or already - * disabled. Polling is re-enabled by calling drm_kms_helper_poll_enable(). - * - * Note that calls to enable and disable polling must be strictly ordered, which - * is automatically the case when they're only call from suspend/resume - * callbacks. + * not an error to call this even when output polling isn't enabled or arlready + * disabled. */ void drm_kms_helper_poll_disable(struct drm_device *dev) { @@ -497,6 +490,24 @@ void drm_kms_helper_poll_disable(struct drm_device *dev) } EXPORT_SYMBOL(drm_kms_helper_poll_disable); +/** + * drm_kms_helper_poll_enable - re-enable output polling. + * @dev: drm_device + * + * This function re-enables the output polling work. + * + * Drivers can call this helper from their device resume implementation. It is + * an error to call this when the output polling support has not yet been set + * up. + */ +void drm_kms_helper_poll_enable(struct drm_device *dev) +{ + mutex_lock(&dev->mode_config.mutex); + drm_kms_helper_poll_enable_locked(dev); + mutex_unlock(&dev->mode_config.mutex); +} +EXPORT_SYMBOL(drm_kms_helper_poll_enable); + /** * drm_kms_helper_poll_init - initialize and enable output polling * @dev: drm_device diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..3d546c019de0 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -180,7 +180,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) /* Enable polling and queue hotplug re-enabling. */ if (hpd_disabled) { - drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); mod_delayed_work(system_wq, &dev_priv->hotplug.reenable_work, msecs_to_jiffies(HPD_STORM_REENABLE_DELAY)); } @@ -511,7 +511,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work) } if (enabled) - drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); mutex_unlock(&dev->mode_config.mutex); diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index d026f5017c33..982c299e435a 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -73,5 +73,6 @@ extern void drm_kms_helper_hotplug_event(struct drm_device *dev); extern void drm_kms_helper_poll_disable(struct drm_device *dev); extern void drm_kms_helper_poll_enable(struct drm_device *dev); +extern void drm_kms_helper_poll_enable_locked(struct drm_device *dev); #endif -- cgit From 1372cef1c697d8aac0cc923f8aa2c37d790ec9ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 25 Jan 2017 19:30:09 +0000 Subject: regulator: fixed: Revert support for ACPI interface This reverts commit 13bed58ce874 (regulator: fixed: add support for ACPI interface). While there does appear to be a practical need to manage regulators on ACPI systems, using ad-hoc properties to describe regulators to the kernel presents a number of problems (especially should ACPI gain first class support for such things), and there are ongoing discussions as to how to manage this. Until there is a rough consensus, revert commit 13bed58ce8748d43, which hasn't been in a released kernel yet as discussed in [1] and the surrounding thread. [1] http://lkml.kernel.org/r/20170125184949.x2wkoo7kbaaajkjk@sirena.org.uk Signed-off-by: Mark Rutland Cc: Liam Girdwood Cc: Lorenzo Pieralisi Cc: Lu Baolu Cc: Mark Brown Cc: Rafael J. Wysocki Cc: linux-kernel@vger.kernel.org Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index a43b0e8a438d..988a7472c2ab 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -30,9 +30,6 @@ #include #include #include -#include -#include -#include struct fixed_voltage_data { struct regulator_desc desc; @@ -97,44 +94,6 @@ of_get_fixed_voltage_config(struct device *dev, return config; } -/** - * acpi_get_fixed_voltage_config - extract fixed_voltage_config structure info - * @dev: device requesting for fixed_voltage_config - * @desc: regulator description - * - * Populates fixed_voltage_config structure by extracting data through ACPI - * interface, returns a pointer to the populated structure of NULL if memory - * alloc fails. - */ -static struct fixed_voltage_config * -acpi_get_fixed_voltage_config(struct device *dev, - const struct regulator_desc *desc) -{ - struct fixed_voltage_config *config; - const char *supply_name; - struct gpio_desc *gpiod; - int ret; - - config = devm_kzalloc(dev, sizeof(*config), GFP_KERNEL); - if (!config) - return ERR_PTR(-ENOMEM); - - ret = device_property_read_string(dev, "supply-name", &supply_name); - if (!ret) - config->supply_name = supply_name; - - gpiod = gpiod_get(dev, "gpio", GPIOD_ASIS); - if (IS_ERR(gpiod)) - return ERR_PTR(-ENODEV); - - config->gpio = desc_to_gpio(gpiod); - config->enable_high = device_property_read_bool(dev, - "enable-active-high"); - gpiod_put(gpiod); - - return config; -} - static struct regulator_ops fixed_voltage_ops = { }; @@ -155,11 +114,6 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) &drvdata->desc); if (IS_ERR(config)) return PTR_ERR(config); - } else if (ACPI_HANDLE(&pdev->dev)) { - config = acpi_get_fixed_voltage_config(&pdev->dev, - &drvdata->desc); - if (IS_ERR(config)) - return PTR_ERR(config); } else { config = dev_get_platdata(&pdev->dev); } -- cgit From 0e1929dedea36781e25902118c93edd8d8f09af1 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Mon, 16 Jan 2017 15:49:38 +0100 Subject: i2c: i2c-cadence: Initialize configuration before probing devices The cadence I2C driver calls cdns_i2c_writereg(..) to setup a workaround in the controller, but did so after calling i2c_add_adapter() which starts probing devices on the bus. Change the order so that the configuration is completely finished before using the adapter. Signed-off-by: Mike Looijmans Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 686971263bef..45d6771fac8c 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -962,10 +962,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) goto err_clk_dis; } - ret = i2c_add_adapter(&id->adap); - if (ret < 0) - goto err_clk_dis; - /* * Cadence I2C controller has a bug wherein it generates * invalid read transaction after HW timeout in master receiver mode. @@ -975,6 +971,10 @@ static int cdns_i2c_probe(struct platform_device *pdev) */ cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET); + ret = i2c_add_adapter(&id->adap); + if (ret < 0) + goto err_clk_dis; + dev_info(&pdev->dev, "%u kHz mmio %08lx irq %d\n", id->i2c_clk / 1000, (unsigned long)r_mem->start, id->irq); -- cgit From b9b487e494712c8e5905b724e12f5ef17e9ae6f9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 Jan 2017 12:00:29 -0500 Subject: Revert "drm/radeon: always apply pci shutdown callbacks" This seems to break reboot on some evergreen systems. bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99524 https://bugzilla.kernel.org/show_bug.cgi?id=192271 This reverts commit a481daa88fd4d6b54f25348972bba10b5f6a84d0. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 00ea0002b539..e0c143b865f3 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -366,11 +366,10 @@ static void radeon_pci_shutdown(struct pci_dev *pdev) { /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown. - * unfortunately we can't detect certain - * hypervisors so just do this all the time. + * torn down properly on reboot/shutdown */ - radeon_pci_remove(pdev); + if (radeon_device_is_virtual()) + radeon_pci_remove(pdev); } static int radeon_pmops_suspend(struct device *dev) -- cgit From e13fe92bb58cf9b8f709ec18267ffc9e6ffeb016 Mon Sep 17 00:00:00 2001 From: Gao Pan Date: Tue, 17 Jan 2017 18:20:55 +0800 Subject: i2c: imx-lpi2c: add VLLS mode support When system enters VLLS mode, module power is turned off. As a result, all registers are reset to HW default value. After exiting VLLS mode, registers are still in default mode. As a result, the pinctrl settings are incorrect, which will affect the module function. The patch recovers the pinctrl setting when exit VLLS mode. Signed-off-by: Gao Pan Reviewed-by: Vladimir Zapolskiy [wsa: added missing include] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index c62b7cd475f8..3310f2e0dbd3 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -636,12 +637,31 @@ static int lpi2c_imx_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int lpi2c_imx_suspend(struct device *dev) +{ + pinctrl_pm_select_sleep_state(dev); + + return 0; +} + +static int lpi2c_imx_resume(struct device *dev) +{ + pinctrl_pm_select_default_state(dev); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(imx_lpi2c_pm, lpi2c_imx_suspend, lpi2c_imx_resume); + static struct platform_driver lpi2c_imx_driver = { .probe = lpi2c_imx_probe, .remove = lpi2c_imx_remove, .driver = { .name = DRIVER_NAME, .of_match_table = lpi2c_imx_of_match, + .pm = &imx_lpi2c_pm, }, }; -- cgit From 26afec39306926654e9cd320f19bbf3685bb0997 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 26 Jan 2017 09:38:31 +0800 Subject: r8152: avoid start_xmit to call napi_schedule during autosuspend Adjust the setting of the flag of SELECTIVE_SUSPEND to prevent start_xmit() from calling napi_schedule() directly during runtime suspend. After calling napi_disable() or clearing the flag of WORK_ENABLE, scheduling the napi is useless. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e1466b4d2b6c..23bef8e6e0bd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3585,10 +3585,15 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp) struct net_device *netdev = tp->netdev; int ret = 0; + set_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { u32 rcr = 0; if (delay_autosuspend(tp)) { + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); ret = -EBUSY; goto out1; } @@ -3605,6 +3610,8 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp) if (!(ocp_data & RXFIFO_EMPTY)) { rxdy_gated_en(tp, false); ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); ret = -EBUSY; goto out1; } @@ -3624,8 +3631,6 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp) } } - set_bit(SELECTIVE_SUSPEND, &tp->flags); - out1: return ret; } @@ -3681,12 +3686,13 @@ static int rtl8152_resume(struct usb_interface *intf) if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { tp->rtl_ops.autosuspend_en(tp, false); - clear_bit(SELECTIVE_SUSPEND, &tp->flags); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(tp->netdev)) rtl_start_rx(tp); napi_enable(&tp->napi); + clear_bit(SELECTIVE_SUSPEND, &tp->flags); + smp_mb__after_atomic(); } else { tp->rtl_ops.up(tp); netif_carrier_off(tp->netdev); -- cgit From de9bf29dd6e4a8a874cb92f8901aed50a9d0b1d3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 26 Jan 2017 09:38:32 +0800 Subject: r8152: avoid start_xmit to schedule napi when napi is disabled Stop the tx when the napi is disabled to prevent napi_schedule() is called. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 23bef8e6e0bd..ec882be4cfaf 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3155,10 +3155,13 @@ static void set_carrier(struct r8152 *tp) if (!netif_carrier_ok(netdev)) { tp->rtl_ops.enable(tp); set_bit(RTL8152_SET_RX_MODE, &tp->flags); + netif_stop_queue(netdev); napi_disable(&tp->napi); netif_carrier_on(netdev); rtl_start_rx(tp); napi_enable(&tp->napi); + netif_wake_queue(netdev); + netif_info(tp, link, netdev, "carrier on\n"); } } else { if (netif_carrier_ok(netdev)) { @@ -3166,6 +3169,7 @@ static void set_carrier(struct r8152 *tp) napi_disable(&tp->napi); tp->rtl_ops.disable(tp); napi_enable(&tp->napi); + netif_info(tp, link, netdev, "carrier off\n"); } } } @@ -3515,12 +3519,12 @@ static int rtl8152_pre_reset(struct usb_interface *intf) if (!netif_running(netdev)) return 0; + netif_stop_queue(netdev); napi_disable(&tp->napi); clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); if (netif_carrier_ok(netdev)) { - netif_stop_queue(netdev); mutex_lock(&tp->control); tp->rtl_ops.disable(tp); mutex_unlock(&tp->control); @@ -3548,10 +3552,10 @@ static int rtl8152_post_reset(struct usb_interface *intf) rtl_start_rx(tp); rtl8152_set_rx_mode(netdev); mutex_unlock(&tp->control); - netif_wake_queue(netdev); } napi_enable(&tp->napi); + netif_wake_queue(netdev); usb_submit_urb(tp->intr_urb, GFP_KERNEL); return 0; -- cgit From 248b213ad908b88db15941202ef7cb7eb137c1a0 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 26 Jan 2017 09:38:33 +0800 Subject: r8152: re-schedule napi for tx Re-schedule napi after napi_complete() for tx, if it is necessay. In r8152_poll(), if the tx is completed after tx_bottom() and before napi_complete(), the scheduling of napi would be lost. Then, no one handles the next tx until the next napi_schedule() is called. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ec882be4cfaf..4785d2b9d80f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1936,6 +1936,9 @@ static int r8152_poll(struct napi_struct *napi, int budget) napi_complete(napi); if (!list_empty(&tp->rx_done)) napi_schedule(napi); + else if (!skb_queue_empty(&tp->tx_queue) && + !list_empty(&tp->tx_free)) + napi_schedule(napi); } return work_done; -- cgit From 7489bdadb7d17d3c81e39b85688500f700beb790 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 26 Jan 2017 09:38:34 +0800 Subject: r8152: check rx after napi is enabled Schedule the napi after napi_enable() for rx, if it is necessary. If the rx is completed when napi is disabled, the sheduling of napi would be lost. Then, no one handles the rx packet until next napi is scheduled. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4785d2b9d80f..ad42295356dd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "7" +#define NET_VERSION "8" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -3561,6 +3561,9 @@ static int rtl8152_post_reset(struct usb_interface *intf) netif_wake_queue(netdev); usb_submit_urb(tp->intr_urb, GFP_KERNEL); + if (!list_empty(&tp->rx_done)) + napi_schedule(&tp->napi); + return 0; } @@ -3700,6 +3703,8 @@ static int rtl8152_resume(struct usb_interface *intf) napi_enable(&tp->napi); clear_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); + if (!list_empty(&tp->rx_done)) + napi_schedule(&tp->napi); } else { tp->rtl_ops.up(tp); netif_carrier_off(tp->netdev); -- cgit From b68df015609eac67f045c155cb3195e5a1061d66 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 25 Jan 2017 18:22:48 -0800 Subject: virtio_net: use dev_kfree_skb for small buffer XDP receive In the small buffer case during driver unload we currently use put_page instead of dev_kfree_skb. Resolve this by adding a check for virtnet mode when checking XDP queue type. Also name the function so that the code reads correctly to match the additional check. Fixes: bb91accf2733 ("virtio-net: XDP support for small buffers") Signed-off-by: John Fastabend Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3d1519ea0669..4c6e9b43f1d3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1898,8 +1898,12 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } -static bool is_xdp_queue(struct virtnet_info *vi, int q) +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) { + /* For small receive mode always use kfree_skb variants */ + if (!vi->mergeable_rx_bufs) + return false; + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) return false; else if (q < vi->curr_queue_pairs) @@ -1916,7 +1920,7 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_queue(vi, i)) + if (!is_xdp_raw_buffer_queue(vi, i)) dev_kfree_skb(buf); else put_page(virt_to_head_page(buf)); -- cgit From 529ec6ac26656378435eb0396a780f017d51e105 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2017 14:56:36 -0800 Subject: virtio_net: reject XDP programs using header adjustment commit 17bedab27231 ("bpf: xdp: Allow head adjustment in XDP prog") added a new XDP helper to prepend and remove data from a frame. Make virtio_net reject programs making use of this helper until proper support is added. Signed-off-by: Jakub Kicinski Acked-by: John Fastabend Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4c6e9b43f1d3..765c2d6358da 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1715,6 +1715,11 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) u16 xdp_qp = 0, curr_qp; int i, err; + if (prog && prog->xdp_adjust_head) { + netdev_warn(dev, "Does not support bpf_xdp_adjust_head()\n"); + return -EOPNOTSUPP; + } + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || -- cgit From 2aa6ba7b5ad3189cc27f14540aa2f57f0ed8df4b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 25 Jan 2017 20:24:57 -0800 Subject: xfs: clear _XBF_PAGES from buffers when readahead page If we try to allocate memory pages to back an xfs_buf that we're trying to read, it's possible that we'll be so short on memory that the page allocation fails. For a blocking read we'll just wait, but for readahead we simply dump all the pages we've collected so far. Unfortunately, after dumping the pages we neglect to clear the _XBF_PAGES state, which means that the subsequent call to xfs_buf_free thinks that b_pages still points to pages we own. It then double-frees the b_pages pages. This results in screaming about negative page refcounts from the memory manager, which xfs oughtn't be triggering. To reproduce this case, mount a filesystem where the size of the inodes far outweighs the availalble memory (a ~500M inode filesystem on a VM with 300MB memory did the trick here) and run bulkstat in parallel with other memory eating processes to put a huge load on the system. The "check summary" phase of xfs_scrub also works for this purpose. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..ac3b4db519df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -422,6 +422,7 @@ retry: out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } -- cgit From c81e55e057b6458aac6d96a6429ef021b7f6f62c Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 24 Dec 2016 09:46:43 -0800 Subject: nvmet_fc: correct logic in disconnect queue LS handling Correct logic in disconnect queue LS handling. Rework so that queue searching and error reporting is above the section to send back a ls rjt Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 173e842f19c9..ba57f9852bde 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1314,7 +1314,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; struct fcnvme_ls_disconnect_acc *acc = (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; - struct nvmet_fc_tgt_queue *queue; + struct nvmet_fc_tgt_queue *queue = NULL; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; bool del_assoc = false; @@ -1348,7 +1348,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; - if (!assoc) + if (assoc) { + if (rqst->discon_cmd.scope == + FCNVME_DISCONN_CONNECTION) { + queue = nvmet_fc_find_target_queue(tgtport, + be64_to_cpu( + rqst->discon_cmd.id)); + if (!queue) { + nvmet_fc_tgt_a_put(assoc); + ret = VERR_NO_CONN; + } + } + } else ret = VERR_NO_ASSOC; } @@ -1373,21 +1384,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_LS_DISCONNECT); - if (rqst->discon_cmd.scope == FCNVME_DISCONN_CONNECTION) { - queue = nvmet_fc_find_target_queue(tgtport, - be64_to_cpu(rqst->discon_cmd.id)); - if (queue) { - int qid = queue->qid; + /* are we to delete a Connection ID (queue) */ + if (queue) { + int qid = queue->qid; - nvmet_fc_delete_target_queue(queue); + nvmet_fc_delete_target_queue(queue); - /* release the get taken by find_target_queue */ - nvmet_fc_tgt_q_put(queue); + /* release the get taken by find_target_queue */ + nvmet_fc_tgt_q_put(queue); - /* tear association down if io queue terminated */ - if (!qid) - del_assoc = true; - } + /* tear association down if io queue terminated */ + if (!qid) + del_assoc = true; } /* release get taken in nvmet_fc_find_target_assoc */ -- cgit From 344770b07b7ae70639ebf110010eb6156a6e55e9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 27 Nov 2016 22:29:17 +0200 Subject: nvmet: delete controllers deletion upon subsystem release No reason for them to be kept around if we are deleting the subsystem, so instead of passively wait for the host to disconnect, actively delete the controllers. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 1 + drivers/nvme/target/core.c | 10 ++++++++++ drivers/nvme/target/nvmet.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 6f5074153dcd..be8c800078e2 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -631,6 +631,7 @@ static void nvmet_subsys_release(struct config_item *item) { struct nvmet_subsys *subsys = to_subsys(item); + nvmet_subsys_del_ctrls(subsys); nvmet_subsys_put(subsys); } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b1d66ed655c9..4a367549eb93 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -935,6 +935,16 @@ static void nvmet_subsys_free(struct kref *ref) kfree(subsys); } +void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) +{ + struct nvmet_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) + ctrl->ops->delete_ctrl(ctrl); + mutex_unlock(&subsys->lock); +} + void nvmet_subsys_put(struct nvmet_subsys *subsys) { kref_put(&subsys->ref, nvmet_subsys_free); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 23d5eb1c944f..cc7ad06b43a7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -282,6 +282,7 @@ void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); void nvmet_subsys_put(struct nvmet_subsys *subsys); +void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys); struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid); void nvmet_put_namespace(struct nvmet_ns *ns); -- cgit From 06406d81a2d7cfb8abcc4fa6cdfeb8e5897007c5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 1 Jan 2017 13:41:56 +0200 Subject: nvmet: cancel fatal error and flush async work before free controller Make sure they are not running and we can free the controller safely. Signed-off-by: Roy Shterman Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4a367549eb93..a327a43f8166 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -816,6 +816,9 @@ static void nvmet_ctrl_free(struct kref *ref) list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); + flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->fatal_err_work); + ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid); nvmet_subsys_put(subsys); -- cgit From 23a8ed4a624324dc696c328f09bd502c4a3816f0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 1 Jan 2017 13:18:26 +0200 Subject: nvmet: Call fatal_error from keep-alive timout expiration We only need to call delete_ctrl once, so given that both keep-alive timeout and any other fatal error can trigger it, just make sure we only call delete_ctrl once. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a327a43f8166..fc5ba2f9e15f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -200,7 +200,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", ctrl->cntlid, ctrl->kato); - ctrl->ops->delete_ctrl(ctrl); + nvmet_ctrl_fatal_error(ctrl); } static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) -- cgit From 748ff8408f8e208f279ba221e5c12612fbb4dddb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 19 Jan 2017 09:55:08 -0600 Subject: nvmet-rdma: Fix missing dma sync to nvme data structures This patch performs dma sync operations on nvme_command and nvme_completion. nvme_command is synced (a) on receiving of the recv queue completion for cpu access. (b) before posting recv wqe back to rdma adapter for device access. nvme_completion is synced (a) on receiving of the recv queue completion of associated nvme_command for cpu access. (b) before posting send wqe to rdma adapter for device access. This patch is generated for git://git.infradead.org/nvme-fabrics.git Branch: nvmf-4.10 Signed-off-by: Parav Pandit Reviewed-by: Max Gurtovoy --- drivers/nvme/target/rdma.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 8c3760a78ac0..60990220bd83 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -438,6 +438,10 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, { struct ib_recv_wr *bad_wr; + ib_dma_sync_single_for_device(ndev->device, + cmd->sge[0].addr, cmd->sge[0].length, + DMA_FROM_DEVICE); + if (ndev->srq) return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); @@ -538,6 +542,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) first_wr = &rsp->send_wr; nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); + + ib_dma_sync_single_for_device(rsp->queue->dev->device, + rsp->send_sge.addr, rsp->send_sge.length, + DMA_TO_DEVICE); + if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); @@ -698,6 +707,14 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, cmd->n_rdma = 0; cmd->req.port = queue->port; + + ib_dma_sync_single_for_cpu(queue->dev->device, + cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, + DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(queue->dev->device, + cmd->send_sge.addr, cmd->send_sge.length, + DMA_TO_DEVICE); + if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_rdma_ops)) return; -- cgit From 19e420bb4076ace670addc55300e3b8c4a02dfc6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 Jan 2017 16:55:57 +0100 Subject: nvme-fc: use blk_rq_nr_phys_segments Without this deallocate won't work properly due to the mismatch of the bio/request size and the actual payload size. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fcc9dcfdf675..e65041c640cb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1663,13 +1663,13 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, return 0; freq->sg_table.sgl = freq->first_sgl; - ret = sg_alloc_table_chained(&freq->sg_table, rq->nr_phys_segments, - freq->sg_table.sgl); + ret = sg_alloc_table_chained(&freq->sg_table, + blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); if (ret) return -ENOMEM; op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); - WARN_ON(op->nents > rq->nr_phys_segments); + WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, dir); -- cgit From 08965c2eba135bdfb6e86cf25308e01421c7e0ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 25 Jan 2017 13:43:56 -0800 Subject: Revert "sd: remove __data_len hack for WRITE SAME" This patch reverts commit f80de881d8df and avoids that sending a WRITE SAME command to the iSCSI initiator triggers the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000014 TARGET_CORE[iSCSI]: Expected Transfer Length: 260096 does not match SCSI CDB Length: 512 for SAM Opcode: 0x41 IP: iscsi_tcp_segment_done+0x20b/0x310 [libiscsi_tcp] Oops: 0000 [#1] SMP Modules linked in: target_core_user uio target_core_iblock target_core_file iscsi_target_mod target_core_mod netconsole configfs crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 crypto_simd cryptd glue_helper virtio_console virtio_rng virtio_balloon serio_raw i2c_piix4 acpi_cpufreq button iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ext4 jbd2 mbcache virtio_blk virtio_net psmouse floppy drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_pci CPU: 2 PID: 5 Comm: kworker/u8:0 Not tainted 4.10.0-rc5-debug+ #3 Workqueue: iscsi_q_0 iscsi_xmitworker [libiscsi] RIP: 0010:iscsi_tcp_segment_done+0x20b/0x310 [libiscsi_tcp] Call Trace: iscsi_sw_tcp_xmit_segment+0x84/0x120 [iscsi_tcp] iscsi_sw_tcp_pdu_xmit+0x51/0x180 [iscsi_tcp] iscsi_tcp_task_xmit+0xb3/0x290 [libiscsi_tcp] iscsi_xmit_task+0x4e/0xc0 [libiscsi] iscsi_xmitworker+0x243/0x330 [libiscsi] process_one_work+0x1d8/0x4b0 worker_thread+0x49/0x4a0 kthread+0x102/0x140 Fixes: f80de881d8df ("sd: remove __data_len hack for WRITE SAME") Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Jens Axboe Cc: Lee Duncan Cc: Chris Leech Acked-by: Christoph Hellwig Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0b09638fa39b..1f5d92a25a49 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -836,6 +836,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) struct bio *bio = rq->bio; sector_t sector = blk_rq_pos(rq); unsigned int nr_sectors = blk_rq_sectors(rq); + unsigned int nr_bytes = blk_rq_bytes(rq); int ret; if (sdkp->device->no_write_same) @@ -868,7 +869,21 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) cmd->transfersize = sdp->sector_size; cmd->allowed = SD_MAX_RETRIES; - return scsi_init_io(cmd); + + /* + * For WRITE SAME the data transferred via the DATA OUT buffer is + * different from the amount of data actually written to the target. + * + * We set up __data_len to the amount of data transferred via the + * DATA OUT buffer so that blk_rq_map_sg sets up the proper S/G list + * to transfer a single sector of data first, but then reset it to + * the amount of data to be written right after so that the I/O path + * knows how much to actually write. + */ + rq->__data_len = sdp->sector_size; + ret = scsi_init_io(cmd); + rq->__data_len = nr_bytes; + return ret; } static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) -- cgit From ff9f8a7cf935468a94d9927c68b00daae701667e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Jan 2017 18:20:55 -0800 Subject: sysctl: fix proc_doulongvec_ms_jiffies_minmax() We perform the conversion between kernel jiffies and ms only when exporting kernel value to user space. We need to do the opposite operation when value is written by user. Only matters when HZ != 1000 Signed-off-by: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8dbaec0e4f7f..1aea594a54db 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2475,6 +2475,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int break; if (neg) continue; + val = convmul * val / convdiv; if ((min && val < *min) || (max && val > *max)) continue; *i = val; -- cgit From c364b6d0b6cda1cd5d9ab689489adda3e82529aa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 26 Jan 2017 09:50:30 -0800 Subject: xfs: fix bmv_count confusion w/ shared extents In a bmapx call, bmv_count is the total size of the array, including the zeroth element that userspace uses to supply the search key. The output array starts at offset 1 so that we can set up the user for the next invocation. Since we now can split an extent into multiple bmap records due to shared/unshared status, we have to be careful that we don't overflow the output array. In the original patch f86f403794b ("xfs: teach get_bmapx about shared extents and the CoW fork") I used cur_ext (the output index) to check for overflows, albeit with an off-by-one error. Since nexleft no longer describes the number of unfilled slots in the output, we can rip all that out and use cur_ext for the overflow check directly. Failure to do this causes heap corruption in bmapx callers such as xfs_io and xfs_scrub. xfs/328 can reproduce this problem. Reviewed-by: Eric Sandeen Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index b9abce524c33..c1417919ab0a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -528,7 +528,6 @@ xfs_getbmap( xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ - int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ @@ -686,10 +685,8 @@ xfs_getbmap( goto out_free_map; } - nexleft = nex; - do { - nmap = (nexleft > subnex) ? subnex : nexleft; + nmap = (nex> subnex) ? subnex : nex; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); @@ -697,8 +694,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count; i++) { + for (i = 0; i < nmap && bmv->bmv_length && + cur_ext < bmv->bmv_count - 1; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -760,16 +757,27 @@ xfs_getbmap( continue; } + /* + * In order to report shared extents accurately, + * we report each distinct shared/unshared part + * of a single bmbt record using multiple bmap + * extents. To make that happen, we iterate the + * same map array item multiple times, each + * time trimming out the subextent that we just + * reported. + * + * Because of this, we must check the out array + * index (cur_ext) directly against bmv_count-1 + * to avoid overflows. + */ if (inject_map.br_startblock != NULLFSBLOCK) { map[i] = inject_map; i--; - } else - nexleft--; + } bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count); + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); out_free_map: kmem_free(map); -- cgit From 406dab8450ec76eca88a1af2fc15d18a2b36ca49 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 26 Jan 2017 15:14:52 -0500 Subject: nfs: Fix "Don't increment lock sequence ID after NFS4ERR_MOVED" Lock sequence IDs are bumped in decode_lock by calling nfs_increment_seqid(). nfs_increment_sequid() does not use the seqid_mutating_err() function fixed in commit 059aa7348241 ("Don't increment lock sequence ID after NFS4ERR_MOVED"). Fixes: 059aa7348241 ("Don't increment lock sequence ID after ...") Signed-off-by: Chuck Lever Tested-by: Xuan Qi Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 90e6193ce6be..daeb94e3acd4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1091,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BADXDR: case -NFS4ERR_RESOURCE: case -NFS4ERR_NOFILEHANDLE: + case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; }; -- cgit From ee6625a948d2e47267ec8fd97307fdd67d0f8a5b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Jan 2017 15:50:41 -0500 Subject: pNFS: Fix a reference leak in _pnfs_return_layout IF NFS_LAYOUT_RETURN_REQUESTED is not set, then we currently exit without freeing the list of invalidated layout segments, leading to a reference leak. Reported-by: Olga Kornievskaia Fixes: 24408f5282 ("pNFS: Fix bugs in _pnfs_return_layout") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 59554f3adf29..dd042498ce7c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1200,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); if (send) status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); out_put_layout_hdr: + pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); -- cgit From 282e4637bc1c0b338708bcebd09d31c69abec070 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 26 Jan 2017 09:06:22 -0800 Subject: HID: wacom: Fix poor prox handling in 'wacom_pl_irq' Commit 025bcc1 performed cleanup work on the 'wacom_pl_irq' function, making it follow the standards used in the rest of the codebase. The change unintiontionally allowed the function to send input events from reports that are not marked as being in prox. This can cause problems as the report values for X, Y, etc. are not guaranteed to be correct. In particular, occasionally the tablet will send a report with these values set to zero. If such a report is received it can caus an unexpected jump in the XY position. This patch surrounds more of the processing code with a proximity check, preventing these zeroed reports from overwriting the current state. To be safe, only the tool type and ABS_MISC events should be reported when the pen is marked as being out of prox. Fixes: 025bcc1540 ("HID: wacom: Simplify 'wacom_pl_irq'") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 0884dc9554fd..672145b0d8f5 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -166,19 +166,21 @@ static int wacom_pl_irq(struct wacom_wac *wacom) wacom->id[0] = STYLUS_DEVICE_ID; } - pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); - if (features->pressure_max > 255) - pressure = (pressure << 1) | ((data[4] >> 6) & 1); - pressure += (features->pressure_max + 1) / 2; - - input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); - input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); - input_report_abs(input, ABS_PRESSURE, pressure); - - input_report_key(input, BTN_TOUCH, data[4] & 0x08); - input_report_key(input, BTN_STYLUS, data[4] & 0x10); - /* Only allow the stylus2 button to be reported for the pen tool. */ - input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + if (prox) { + pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); + if (features->pressure_max > 255) + pressure = (pressure << 1) | ((data[4] >> 6) & 1); + pressure += (features->pressure_max + 1) / 2; + + input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); + input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); + input_report_abs(input, ABS_PRESSURE, pressure); + + input_report_key(input, BTN_TOUCH, data[4] & 0x08); + input_report_key(input, BTN_STYLUS, data[4] & 0x10); + /* Only allow the stylus2 button to be reported for the pen tool. */ + input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + } if (!prox) wacom->id[0] = 0; -- cgit From ed9ab4287f96e66340e0390e2c583f2f9110cba0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 26 Jan 2017 17:34:40 +0000 Subject: HID: usbhid: Quirk a AMI virtual mouse and keyboard with ALWAYS_POLL Quirking the following AMI USB device with ALWAYS_POLL fixes an AMI virtual keyboard and mouse from not responding and timing out when it is attached to a ppc64el Power 8 system and when we have some rapid open/closes on the mouse device. usb 1-3: new high-speed USB device number 2 using xhci_hcd usb 1-3: New USB device found, idVendor=046b, idProduct=ff01 usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-3: Product: Virtual Hub usb 1-3: Manufacturer: American Megatrends Inc. usb 1-3: SerialNumber: serial usb 1-3.3: new high-speed USB device number 3 using xhci_hcd usb 1-3.3: New USB device found, idVendor=046b, idProduct=ff31 usb 1-3.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-3.3: Product: Virtual HardDisk Device usb 1-3.3: Manufacturer: American Megatrends Inc. usb 1-3.4: new low-speed USB device number 4 using xhci_hcd usb 1-3.4: New USB device found, idVendor=046b, idProduct=ff10 usb 1-3.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0 usb 1-3.4: Product: Virtual Keyboard and Mouse usb 1-3.4: Manufacturer: American Megatrends Inc. With the quirk I have not been able to trigger the issue with half an hour of saturation soak testing. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f46f2c5117fa..350accfee8e8 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -76,6 +76,9 @@ #define USB_VENDOR_ID_ALPS_JP 0x044E #define HID_DEVICE_ID_ALPS_U1_DUAL 0x120B +#define USB_VENDOR_ID_AMI 0x046b +#define USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE 0xff10 + #define USB_VENDOR_ID_ANTON 0x1130 #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index e9d6cc7cdfc5..30a2977e2645 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, -- cgit From 877a021e08ccb6434718c0cc781fdf943c884cc0 Mon Sep 17 00:00:00 2001 From: Ardinartsev Nikita Date: Thu, 26 Jan 2017 16:54:42 +0300 Subject: HID: hid-lg: Fix immediate disconnection of Logitech Rumblepad 2 With NOGET quirk Logitech F510 is now fully workable in dinput mode including rumble effects (according to fftest). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117091 [jkosina@suse.cz: fix patch format] Signed-off-by: Ardinartsev Nikita Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-lg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index c5c5fbe9d605..52026dc94d5c 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -872,7 +872,7 @@ static const struct hid_device_id lg_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG), .driver_data = LG_NOGET | LG_FF4 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2), - .driver_data = LG_FF2 }, + .driver_data = LG_NOGET | LG_FF2 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940), .driver_data = LG_FF3 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR), -- cgit From 07cd12945551b63ecb1a349d50a6d69d1d6feb4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Jan 2017 16:47:28 -0500 Subject: cgroup: don't online subsystems before cgroup_name/path() are operational While refactoring cgroup creation, a5bca2152036 ("cgroup: factor out cgroup_create() out of cgroup_mkdir()") incorrectly onlined subsystems before the new cgroup is associated with it kernfs_node. This is fine for cgroup proper but cgroup_name/path() depend on the associated kernfs_node and if a subsystem makes the new cgroup_subsys_state visible, which they're allowed to after onlining, it can lead to NULL dereference. The current code performs cgroup creation and subsystem onlining in cgroup_create() and cgroup_mkdir() makes the cgroup and subsystems visible afterwards. There's no reason to online the subsystems early and we can simply drop cgroup_apply_control_enable() call from cgroup_create() so that the subsystems are onlined and made visible at the same time. Signed-off-by: Tejun Heo Reported-by: Konstantin Khlebnikov Fixes: a5bca2152036 ("cgroup: factor out cgroup_create() out of cgroup_mkdir()") Cc: stable@vger.kernel.org # v4.6+ --- kernel/cgroup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2ee9ec3051b2..688dd02af985 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5221,6 +5221,11 @@ err_free_css: return ERR_PTR(err); } +/* + * The returned cgroup is fully initialized including its control mask, but + * it isn't associated with its kernfs_node and doesn't have the control + * mask applied. + */ static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; @@ -5288,11 +5293,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_propagate_control(cgrp); - /* @cgrp doesn't have dir yet so the following will only create csses */ - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto out_destroy; - return cgrp; out_cancel_ref: @@ -5300,9 +5300,6 @@ out_cancel_ref: out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); -out_destroy: - cgroup_destroy_locked(cgrp); - return ERR_PTR(ret); } static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, -- cgit From 586655d278ba08af7b198b93217746f9a506ee8a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 25 Jan 2017 00:44:16 +0100 Subject: rtc: jz4740: make the driver buildable as a module again By using kernel_halt() instead of machine_halt(), we can make the driver build as a module. However, jz4740 platforms not loading this module will not be able to power off. Suggested-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Revert "rtc: jz4740: make the driver builtin only" This reverts commit b9168c539c0b2de756aaffd380384dbde8adbe07. --- drivers/rtc/Kconfig | 5 ++++- drivers/rtc/rtc-jz4740.c | 12 ++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index c93c5a8fba32..5dc673dc9487 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1551,12 +1551,15 @@ config RTC_DRV_MPC5121 will be called rtc-mpc5121. config RTC_DRV_JZ4740 - bool "Ingenic JZ4740 SoC" + tristate "Ingenic JZ4740 SoC" depends on MACH_INGENIC || COMPILE_TEST help If you say yes here you get support for the Ingenic JZ47xx SoCs RTC controllers. + This driver can also be buillt as a module. If so, the module + will be called rtc-jz4740. + config RTC_DRV_LPC24XX tristate "NXP RTC for LPC178x/18xx/408x/43xx" depends on ARCH_LPC18XX || COMPILE_TEST diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index 72918c1ba092..64989afffa3d 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -294,7 +295,7 @@ static void jz4740_rtc_power_off(void) JZ_REG_RTC_RESET_COUNTER, reset_counter_ticks); jz4740_rtc_poweroff(dev_for_power_off); - machine_halt(); + kernel_halt(); } static const struct of_device_id jz4740_rtc_of_match[] = { @@ -302,6 +303,7 @@ static const struct of_device_id jz4740_rtc_of_match[] = { { .compatible = "ingenic,jz4780-rtc", .data = (void *)ID_JZ4780 }, {}, }; +MODULE_DEVICE_TABLE(of, jz4740_rtc_of_match); static int jz4740_rtc_probe(struct platform_device *pdev) { @@ -429,6 +431,7 @@ static const struct platform_device_id jz4740_rtc_ids[] = { { "jz4780-rtc", ID_JZ4780 }, {} }; +MODULE_DEVICE_TABLE(platform, jz4740_rtc_ids); static struct platform_driver jz4740_rtc_driver = { .probe = jz4740_rtc_probe, @@ -440,4 +443,9 @@ static struct platform_driver jz4740_rtc_driver = { .id_table = jz4740_rtc_ids, }; -builtin_platform_driver(jz4740_rtc_driver); +module_platform_driver(jz4740_rtc_driver); + +MODULE_AUTHOR("Lars-Peter Clausen "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RTC driver for the JZ4740 SoC\n"); +MODULE_ALIAS("platform:jz4740-rtc"); -- cgit From a47b70ea86bdeb3091341f5ae3ef580f1a1ad822 Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Thu, 26 Jan 2017 14:29:27 +0100 Subject: ravb: unmap descriptors when freeing rings "swiotlb buffer is full" errors occur after repeated initialisation of a device - f.e. suspend/resume or ip link set up/down. This is because memory mapped using dma_map_single() in ravb_ring_format() and ravb_start_xmit() is not released. Resolve this problem by unmapping descriptors when freeing rings. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Kazuya Mizuguchi [simon: reworked] Signed-off-by: Simon Horman Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 112 ++++++++++++++++++------------- 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 89ac1e3f6175..301f48755093 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -179,6 +179,49 @@ static struct mdiobb_ops bb_ops = { .get_mdio_data = ravb_get_mdio_data, }; +/* Free TX skb function for AVB-IP */ +static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only) +{ + struct ravb_private *priv = netdev_priv(ndev); + struct net_device_stats *stats = &priv->stats[q]; + struct ravb_tx_desc *desc; + int free_num = 0; + int entry; + u32 size; + + for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) { + bool txed; + + entry = priv->dirty_tx[q] % (priv->num_tx_ring[q] * + NUM_TX_DESC); + desc = &priv->tx_ring[q][entry]; + txed = desc->die_dt == DT_FEMPTY; + if (free_txed_only && !txed) + break; + /* Descriptor type must be checked before all other reads */ + dma_rmb(); + size = le16_to_cpu(desc->ds_tagl) & TX_DS; + /* Free the original skb. */ + if (priv->tx_skb[q][entry / NUM_TX_DESC]) { + dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), + size, DMA_TO_DEVICE); + /* Last packet descriptor? */ + if (entry % NUM_TX_DESC == NUM_TX_DESC - 1) { + entry /= NUM_TX_DESC; + dev_kfree_skb_any(priv->tx_skb[q][entry]); + priv->tx_skb[q][entry] = NULL; + if (txed) + stats->tx_packets++; + } + free_num++; + } + if (txed) + stats->tx_bytes += size; + desc->die_dt = DT_EEMPTY; + } + return free_num; +} + /* Free skb's and DMA buffers for Ethernet AVB */ static void ravb_ring_free(struct net_device *ndev, int q) { @@ -194,19 +237,21 @@ static void ravb_ring_free(struct net_device *ndev, int q) kfree(priv->rx_skb[q]); priv->rx_skb[q] = NULL; - /* Free TX skb ringbuffer */ - if (priv->tx_skb[q]) { - for (i = 0; i < priv->num_tx_ring[q]; i++) - dev_kfree_skb(priv->tx_skb[q][i]); - } - kfree(priv->tx_skb[q]); - priv->tx_skb[q] = NULL; - /* Free aligned TX buffers */ kfree(priv->tx_align[q]); priv->tx_align[q] = NULL; if (priv->rx_ring[q]) { + for (i = 0; i < priv->num_rx_ring[q]; i++) { + struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; + + if (!dma_mapping_error(ndev->dev.parent, + le32_to_cpu(desc->dptr))) + dma_unmap_single(ndev->dev.parent, + le32_to_cpu(desc->dptr), + PKT_BUF_SZ, + DMA_FROM_DEVICE); + } ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1); dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q], @@ -215,12 +260,20 @@ static void ravb_ring_free(struct net_device *ndev, int q) } if (priv->tx_ring[q]) { + ravb_tx_free(ndev, q, false); + ring_size = sizeof(struct ravb_tx_desc) * (priv->num_tx_ring[q] * NUM_TX_DESC + 1); dma_free_coherent(ndev->dev.parent, ring_size, priv->tx_ring[q], priv->tx_desc_dma[q]); priv->tx_ring[q] = NULL; } + + /* Free TX skb ringbuffer. + * SKBs are freed by ravb_tx_free() call above. + */ + kfree(priv->tx_skb[q]); + priv->tx_skb[q] = NULL; } /* Format skb and descriptor buffer for Ethernet AVB */ @@ -431,44 +484,6 @@ static int ravb_dmac_init(struct net_device *ndev) return 0; } -/* Free TX skb function for AVB-IP */ -static int ravb_tx_free(struct net_device *ndev, int q) -{ - struct ravb_private *priv = netdev_priv(ndev); - struct net_device_stats *stats = &priv->stats[q]; - struct ravb_tx_desc *desc; - int free_num = 0; - int entry; - u32 size; - - for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) { - entry = priv->dirty_tx[q] % (priv->num_tx_ring[q] * - NUM_TX_DESC); - desc = &priv->tx_ring[q][entry]; - if (desc->die_dt != DT_FEMPTY) - break; - /* Descriptor type must be checked before all other reads */ - dma_rmb(); - size = le16_to_cpu(desc->ds_tagl) & TX_DS; - /* Free the original skb. */ - if (priv->tx_skb[q][entry / NUM_TX_DESC]) { - dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - size, DMA_TO_DEVICE); - /* Last packet descriptor? */ - if (entry % NUM_TX_DESC == NUM_TX_DESC - 1) { - entry /= NUM_TX_DESC; - dev_kfree_skb_any(priv->tx_skb[q][entry]); - priv->tx_skb[q][entry] = NULL; - stats->tx_packets++; - } - free_num++; - } - stats->tx_bytes += size; - desc->die_dt = DT_EEMPTY; - } - return free_num; -} - static void ravb_get_tx_tstamp(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); @@ -902,7 +917,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) spin_lock_irqsave(&priv->lock, flags); /* Clear TX interrupt */ ravb_write(ndev, ~mask, TIS); - ravb_tx_free(ndev, q); + ravb_tx_free(ndev, q, true); netif_wake_subqueue(ndev, q); mmiowb(); spin_unlock_irqrestore(&priv->lock, flags); @@ -1567,7 +1582,8 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) priv->cur_tx[q] += NUM_TX_DESC; if (priv->cur_tx[q] - priv->dirty_tx[q] > - (priv->num_tx_ring[q] - 1) * NUM_TX_DESC && !ravb_tx_free(ndev, q)) + (priv->num_tx_ring[q] - 1) * NUM_TX_DESC && + !ravb_tx_free(ndev, q, true)) netif_stop_subqueue(ndev, q); exit: -- cgit From 67ade058ef2c65a3e56878af9c293ec76722a2e5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:38 -0800 Subject: Btrfs: remove old tree_root case in btrfs_read_locked_inode() As Jeff explained in c2951f32d36c ("btrfs: remove old tree_root dirent processing in btrfs_real_readdir()"), supporting this old format is no longer necessary since the Btrfs magic number has been updated since we changed to the current format. There are other places where we still handle this old format, but since this is part of a fix that is going to stable, I'm only removing this one for now. Cc: # 4.9.x Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 177e7284909c..e2b961e20ec1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3835,10 +3835,7 @@ cache_acl: break; case S_IFDIR: inode->i_fop = &btrfs_dir_file_operations; - if (root == fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; -- cgit From 1fdf41941b8010691679638f8d0c8d08cfee7726 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:39 -0800 Subject: Btrfs: disable xattr operations on subvolume directories When you snapshot a subvolume containing a subvolume, you get a placeholder directory where the subvolume would be. These directory inodes have ->i_ops set to btrfs_dir_ro_inode_operations. Previously, these i_ops didn't include the xattr operation callbacks. The conversion to xattr_handlers missed this case, leading to bogus attempts to set xattrs on these inodes. This manifested itself as failures when running delayed inodes. To fix this, clear IOP_XATTR in ->i_opflags on these inodes. Fixes: 6c6ef9f26e59 ("xattr: Stop calling {get,set,remove}xattr inode operations") Cc: Andreas Gruenbacher Reported-by: Chris Murphy Tested-by: Chris Murphy Cc: # 4.9.x Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e2b961e20ec1..16694325a449 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5718,6 +5718,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; + inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = current_time(inode); -- cgit From 57b59ed2e5b91e958843609c7884794e29e6c4cb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:40 -0800 Subject: Btrfs: remove ->{get, set}_acl() from btrfs_dir_ro_inode_operations Subvolume directory inodes can't have ACLs. Cc: # 4.9.x Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 16694325a449..8a8e719778bd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10592,8 +10592,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, - .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; -- cgit From 6c971c09f38704513c426ba6515f22fb3d6c87d5 Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Thu, 26 Jan 2017 09:45:40 +0800 Subject: drm/ast: Fixed system hanged if disable P2A The original ast driver will access some BMC configuration through P2A bridge that can be disabled since AST2300 and after. It will cause system hanged if P2A bridge is disabled. Here is the update to fix it. Signed-off-by: Y.C. Chen Signed-off-by: Dave Airlie --- drivers/gpu/drm/ast/ast_drv.h | 1 + drivers/gpu/drm/ast/ast_main.c | 157 ++++++++++++++++++++++------------------- drivers/gpu/drm/ast/ast_post.c | 18 +++-- 3 files changed, 97 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 908011d2c8f5..7abda94fc2cf 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -113,6 +113,7 @@ struct ast_private { struct ttm_bo_kmap_obj cache_kmap; int next_cursor; bool support_wide_screen; + bool DisableP2A; enum ast_tx_chip tx_chip_type; u8 dp501_maxclk; diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index f75c6421db62..533e762d036d 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -124,6 +124,12 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) } else *need_post = false; + /* Check P2A Access */ + ast->DisableP2A = true; + data = ast_read32(ast, 0xf004); + if (data != 0xFFFFFFFF) + ast->DisableP2A = false; + /* Check if we support wide screen */ switch (ast->chip) { case AST1180: @@ -140,15 +146,17 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) ast->support_wide_screen = true; else { ast->support_wide_screen = false; - /* Read SCU7c (silicon revision register) */ - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - data = ast_read32(ast, 0x1207c); - data &= 0x300; - if (ast->chip == AST2300 && data == 0x0) /* ast1300 */ - ast->support_wide_screen = true; - if (ast->chip == AST2400 && data == 0x100) /* ast1400 */ - ast->support_wide_screen = true; + if (ast->DisableP2A == false) { + /* Read SCU7c (silicon revision register) */ + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + data = ast_read32(ast, 0x1207c); + data &= 0x300; + if (ast->chip == AST2300 && data == 0x0) /* ast1300 */ + ast->support_wide_screen = true; + if (ast->chip == AST2400 && data == 0x100) /* ast1400 */ + ast->support_wide_screen = true; + } } break; } @@ -216,80 +224,81 @@ static int ast_get_dram_info(struct drm_device *dev) uint32_t data, data2; uint32_t denum, num, div, ref_pll; - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - - - ast_write32(ast, 0x10000, 0xfc600309); - - do { - if (pci_channel_offline(dev->pdev)) - return -EIO; - } while (ast_read32(ast, 0x10000) != 0x01); - data = ast_read32(ast, 0x10004); - - if (data & 0x40) + if (ast->DisableP2A) + { ast->dram_bus_width = 16; + ast->dram_type = AST_DRAM_1Gx16; + ast->mclk = 396; + } else - ast->dram_bus_width = 32; + { + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + data = ast_read32(ast, 0x10004); + + if (data & 0x40) + ast->dram_bus_width = 16; + else + ast->dram_bus_width = 32; + + if (ast->chip == AST2300 || ast->chip == AST2400) { + switch (data & 0x03) { + case 0: + ast->dram_type = AST_DRAM_512Mx16; + break; + default: + case 1: + ast->dram_type = AST_DRAM_1Gx16; + break; + case 2: + ast->dram_type = AST_DRAM_2Gx16; + break; + case 3: + ast->dram_type = AST_DRAM_4Gx16; + break; + } + } else { + switch (data & 0x0c) { + case 0: + case 4: + ast->dram_type = AST_DRAM_512Mx16; + break; + case 8: + if (data & 0x40) + ast->dram_type = AST_DRAM_1Gx16; + else + ast->dram_type = AST_DRAM_512Mx32; + break; + case 0xc: + ast->dram_type = AST_DRAM_1Gx32; + break; + } + } - if (ast->chip == AST2300 || ast->chip == AST2400) { - switch (data & 0x03) { - case 0: - ast->dram_type = AST_DRAM_512Mx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_1Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_2Gx16; - break; + data = ast_read32(ast, 0x10120); + data2 = ast_read32(ast, 0x10170); + if (data2 & 0x2000) + ref_pll = 14318; + else + ref_pll = 12000; + + denum = data & 0x1f; + num = (data & 0x3fe0) >> 5; + data = (data & 0xc000) >> 14; + switch (data) { case 3: - ast->dram_type = AST_DRAM_4Gx16; - break; - } - } else { - switch (data & 0x0c) { - case 0: - case 4: - ast->dram_type = AST_DRAM_512Mx16; + div = 0x4; break; - case 8: - if (data & 0x40) - ast->dram_type = AST_DRAM_1Gx16; - else - ast->dram_type = AST_DRAM_512Mx32; + case 2: + case 1: + div = 0x2; break; - case 0xc: - ast->dram_type = AST_DRAM_1Gx32; + default: + div = 0x1; break; } + ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000); } - - data = ast_read32(ast, 0x10120); - data2 = ast_read32(ast, 0x10170); - if (data2 & 0x2000) - ref_pll = 14318; - else - ref_pll = 12000; - - denum = data & 0x1f; - num = (data & 0x3fe0) >> 5; - data = (data & 0xc000) >> 14; - switch (data) { - case 3: - div = 0x4; - break; - case 2: - case 1: - div = 0x2; - break; - default: - div = 0x1; - break; - } - ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000); return 0; } diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 810c51d92b99..5331ee1df086 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -379,12 +379,20 @@ void ast_post_gpu(struct drm_device *dev) ast_open_key(ast); ast_set_def_ext_reg(dev); - if (ast->chip == AST2300 || ast->chip == AST2400) - ast_init_dram_2300(dev); - else - ast_init_dram_reg(dev); + if (ast->DisableP2A == false) + { + if (ast->chip == AST2300 || ast->chip == AST2400) + ast_init_dram_2300(dev); + else + ast_init_dram_reg(dev); - ast_init_3rdtx(dev); + ast_init_3rdtx(dev); + } + else + { + if (ast->tx_chip_type != AST_TX_NONE) + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80); /* Enable DVO */ + } } /* AST 2300 DRAM settings */ -- cgit From cae9ff036eea577856d5b12860b4c79c5e71db4a Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 11 Jan 2017 21:25:23 -0500 Subject: drm/nouveau: Don't enabling polling twice on runtime resume As it turns out, on cards that actually have CRTCs on them we're already calling drm_kms_helper_poll_enable(drm_dev) from nouveau_display_resume() before we call it in nouveau_pmops_runtime_resume(). This leads us to accidentally trying to enable polling twice, which results in a potential deadlock between the RPM locks and drm_dev->mode_config.mutex if we end up trying to enable polling the second time while output_poll_execute is running and holding the mode_config lock. As such, make sure we only enable polling in nouveau_pmops_runtime_resume() if we need to. This fixes hangs observed on the ThinkPad W541 Signed-off-by: Lyude Cc: Hans de Goede Cc: Kilian Singer Cc: Lukas Wunner Cc: David Airlie Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_display.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_drm.c | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index cef08da1da4e..6a157763dfc3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -411,7 +411,8 @@ nouveau_display_init(struct drm_device *dev) return ret; /* enable polling for external displays */ - drm_kms_helper_poll_enable(dev); + if (!dev->mode_config.poll_enabled) + drm_kms_helper_poll_enable(dev); /* enable hotplug interrupts */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 59348fc41c77..bc85a45f91cd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -773,7 +773,10 @@ nouveau_pmops_runtime_resume(struct device *dev) pci_set_master(pdev); ret = nouveau_do_resume(drm_dev, true); - drm_kms_helper_poll_enable(drm_dev); + + if (!drm_dev->mode_config.poll_enabled) + drm_kms_helper_poll_enable(drm_dev); + /* do magic */ nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25)); vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON); -- cgit From 15266ae38fe09dae07bd8812cb7a7717b1e1d992 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 11 Jan 2017 21:25:24 -0500 Subject: drm/nouveau: Handle fbcon suspend/resume in seperate worker Resuming from RPM can happen while already holding dev->mode_config.mutex. This means we can't actually handle fbcon in any RPM resume workers, since restoring fbcon requires grabbing dev->mode_config.mutex again. So move the fbcon suspend/resume code into it's own worker, and rely on that instead to avoid deadlocking. This fixes more deadlocks for runtime suspending the GPU on the ThinkPad W541. Reproduction recipe: - Get a machine with both optimus and a nvidia card with connectors attached to it - Wait for the nvidia GPU to suspend - Attempt to manually reprobe any of the connectors on the nvidia GPU using sysfs - *deadlock* [airlied: use READ_ONCE to address Hans's comment] Signed-off-by: Lyude Cc: Hans de Goede Cc: Kilian Singer Cc: Lukas Wunner Cc: David Airlie Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_drv.h | 2 ++ drivers/gpu/drm/nouveau/nouveau_fbcon.c | 43 ++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8d5ed5bfdacb..42c1fa53d431 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -165,6 +165,8 @@ struct nouveau_drm { struct backlight_device *backlight; struct list_head bl_connectors; struct work_struct hpd_work; + struct work_struct fbcon_work; + int fbcon_new_state; #ifdef CONFIG_ACPI struct notifier_block acpi_nb; #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 2f2a3dcd4ad7..fa2d0a978ccc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -470,19 +470,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = { .fb_probe = nouveau_fbcon_create, }; +static void +nouveau_fbcon_set_suspend_work(struct work_struct *work) +{ + struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work); + int state = READ_ONCE(drm->fbcon_new_state); + + if (state == FBINFO_STATE_RUNNING) + pm_runtime_get_sync(drm->dev->dev); + + console_lock(); + if (state == FBINFO_STATE_RUNNING) + nouveau_fbcon_accel_restore(drm->dev); + drm_fb_helper_set_suspend(&drm->fbcon->helper, state); + if (state != FBINFO_STATE_RUNNING) + nouveau_fbcon_accel_save_disable(drm->dev); + console_unlock(); + + if (state == FBINFO_STATE_RUNNING) { + pm_runtime_mark_last_busy(drm->dev->dev); + pm_runtime_put_sync(drm->dev->dev); + } +} + void nouveau_fbcon_set_suspend(struct drm_device *dev, int state) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { - console_lock(); - if (state == FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_restore(dev); - drm_fb_helper_set_suspend(&drm->fbcon->helper, state); - if (state != FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_save_disable(dev); - console_unlock(); - } + + if (!drm->fbcon) + return; + + drm->fbcon_new_state = state; + /* Since runtime resume can happen as a result of a sysfs operation, + * it's possible we already have the console locked. So handle fbcon + * init/deinit from a seperate work thread + */ + schedule_work(&drm->fbcon_work); } int @@ -502,6 +526,7 @@ nouveau_fbcon_init(struct drm_device *dev) return -ENOMEM; drm->fbcon = fbcon; + INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work); drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs); -- cgit From 242ef5d483594a2bed6b8a2685849c83e7810d17 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 18 Jan 2017 14:14:01 -0800 Subject: drm/vmwgfx: Fix depth input into drm_mode_legacy_fb_format Currently the pitch is passed in as depth. This causes drm_mode_legacy_fb_format() to return the wrong pixel format. The wrong pixel format will be rejected by vmw_kms_new_framebuffer(), thus leaving par->set_fb to NULL. This eventually causes a crash in vmw_fb_setcolreg() when the code tries to dereference par->set_fb. Signed-off-by: Sinclair Yeh Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 723fd763da8e..7a96798b9c0a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -481,8 +481,7 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info) mode_cmd.height = var->yres; mode_cmd.pitches[0] = ((var->bits_per_pixel + 7) / 8) * mode_cmd.width; mode_cmd.pixel_format = - drm_mode_legacy_fb_format(var->bits_per_pixel, - ((var->bits_per_pixel + 7) / 8) * mode_cmd.width); + drm_mode_legacy_fb_format(var->bits_per_pixel, depth); cur_fb = par->set_fb; if (cur_fb && cur_fb->width == mode_cmd.width && -- cgit From 191e885a2e130e639bb0c8ee350d7047294f2ce6 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 25 Jan 2017 10:31:52 -0800 Subject: firmware: fix NULL pointer dereference in __fw_load_abort() Since commit 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return value") fw_load_abort() could be called twice and lead us to a kernel crash. This happens only when the firmware fallback mechanism (regular or custom) is used. The fallback mechanism exposes a sysfs interface for userspace to upload a file and notify the kernel when the file is loaded and ready, or to cancel an upload by echo'ing -1 into on the loading file: echo -n "-1" > /sys/$DEVPATH/loading This will call fw_load_abort(). Some distributions actually have a udev rule in place to *always* immediately cancel all firmware fallback mechanism requests (Debian), they have: $ cat /lib/udev/rules.d/50-firmware.rules # stub for immediately telling the kernel that userspace firmware loading # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1 Distributions with this udev rule would run into this crash only if the fallback mechanism is used. Since most distributions disable by default using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK), this would typicaly mean only 2 drivers which *require* the fallback mechanism could typically incur a crash: drivers/firmware/dell_rbu.c and the drivers/leds/leds-lp55xx-common.c driver. Distributions enabling CONFIG_FW_LOADER_USER_HELPER_FALLBACK by default are obviously more exposed to this crash. The crash happens because after commit 5b029624948d ("firmware: do not use fw_lock for fw_state protection") and subsequent fix commit 5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return value") a race can happen between this cancelation and the firmware fw_state_wait_timeout() being woken up after a state change with which fw_load_abort() as that calls swake_up(). Upon error fw_state_wait_timeout() will also again call fw_load_abort() and trigger a null reference. At first glance we could just fix this with a !buf check on fw_load_abort() before accessing buf->fw_st, however there is a logical issue in having a state machine used for the fallback mechanism and preventing access from it once we abort as its inside the buf (buf->fw_st). The firmware_class.c code is setting the buf to NULL to annotate an abort has occurred. Replace this mechanism by simply using the state check instead. All the other code in place already uses similar checks for aborting as well so no further changes are needed. An oops can be reproduced with the new fw_fallback.sh fallback mechanism cancellation test. Either cancelling the fallback mechanism or the custom fallback mechanism triggers a crash. mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware (git::20170111-fw-fixes)$ sudo ./fw_fallback.sh ./fw_fallback.sh: timeout works ./fw_fallback.sh: firmware comparison works ./fw_fallback.sh: fallback mechanism works [ this then sits here when it is trying the cancellation test ] Kernel log: test_firmware: loading 'nope-test-firmware.bin' misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2 misc test_firmware: Falling back to user helper BUG: unable to handle kernel NULL pointer dereference at 0000000000000038 IP: _request_firmware+0xa27/0xad0 PGD 0 Oops: 0000 [#1] SMP Modules linked in: test_firmware(E) ... etc ... CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G W E 4.10.0-rc3-next-20170111+ #30 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000 RIP: 0010:_request_firmware+0xa27/0xad0 RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246 RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000 RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000 RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000 R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480 R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8 FS: 00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0 Call Trace: request_firmware+0x37/0x50 trigger_request_store+0x79/0xd0 [test_firmware] dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x110/0x1a0 __vfs_write+0x37/0x160 ? _cond_resched+0x1a/0x50 vfs_write+0xb5/0x1a0 SyS_write+0x55/0xc0 ? trace_do_page_fault+0x37/0xd0 entry_SYSCALL_64_fastpath+0x1e/0xad RIP: 0033:0x7f8603f49620 RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620 RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001 RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336 R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80 R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950 Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b 45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00 RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10 CR2: 0000000000000038 ---[ end trace 6d94ac339c133e6f ]--- Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value") Reported-and-Tested-by: Jakub Kicinski Reported-and-Tested-by: Patrick Bruenn Reported-by: Chris Wilson CC: [3.10+] Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 4497d263209f..ac350c518e0c 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -558,9 +558,6 @@ static void fw_load_abort(struct firmware_priv *fw_priv) struct firmware_buf *buf = fw_priv->buf; __fw_load_abort(buf); - - /* avoid user action after loading abort */ - fw_priv->buf = NULL; } static LIST_HEAD(pending_fw_head); @@ -713,7 +710,7 @@ static ssize_t firmware_loading_store(struct device *dev, mutex_lock(&fw_lock); fw_buf = fw_priv->buf; - if (!fw_buf) + if (fw_state_is_aborted(&fw_buf->fw_st)) goto out; switch (loading) { -- cgit From 9b02c54bc951fca884ba5719f42a27e8240965bf Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Mon, 19 Dec 2016 12:10:27 -0800 Subject: cpufreq: brcmstb-avs-cpufreq: extend sysfs entry brcm_avs_pmap We extend the brcm_avs_pmap sysfs entry (which issues the GET_PMAP command to AVS) to include all fields from struct pmap. This means adding mode (AVS, DVS, DVFS) and state (the P-state) to the output. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 4fda623e55bb..2c6e3253ba64 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -954,9 +954,9 @@ static ssize_t show_brcm_avs_pmap(struct cpufreq_policy *policy, char *buf) brcm_avs_parse_p1(pmap.p1, &mdiv_p0, &pdiv, &ndiv); brcm_avs_parse_p2(pmap.p2, &mdiv_p1, &mdiv_p2, &mdiv_p3, &mdiv_p4); - return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u\n", + return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u %u %u\n", pmap.p1, pmap.p2, ndiv, pdiv, mdiv_p0, mdiv_p1, mdiv_p2, - mdiv_p3, mdiv_p4); + mdiv_p3, mdiv_p4, pmap.mode, pmap.state); } static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf) -- cgit From 3c223c19aea85d3dda1416c187915f4a30b04b1f Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Mon, 19 Dec 2016 12:10:28 -0800 Subject: cpufreq: brcmstb-avs-cpufreq: properly retrieve P-state upon suspend The AVS GET_PMAP command does return a P-state along with the P-map information. However, that P-state is the initial P-state when the P-map was first downloaded to AVS. It is *not* the current P-state. Therefore, we explicitly retrieve the P-state using the GET_PSTATE command. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 2c6e3253ba64..c94360671f41 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -784,8 +784,19 @@ static int brcm_avs_target_index(struct cpufreq_policy *policy, static int brcm_avs_suspend(struct cpufreq_policy *policy) { struct private_data *priv = policy->driver_data; + int ret; + + ret = brcm_avs_get_pmap(priv, &priv->pmap); + if (ret) + return ret; - return brcm_avs_get_pmap(priv, &priv->pmap); + /* + * We can't use the P-state returned by brcm_avs_get_pmap(), since + * that's the initial P-state from when the P-map was downloaded to the + * AVS co-processor, not necessarily the P-state we are running at now. + * So, we get the current P-state explicitly. + */ + return brcm_avs_get_pstate(priv, &priv->pmap.state); } static int brcm_avs_resume(struct cpufreq_policy *policy) -- cgit From 606f42265d384b9149bfb953c5dfc6d4710fef4c Mon Sep 17 00:00:00 2001 From: Prashanth Prakash Date: Thu, 26 Jan 2017 11:08:32 -0700 Subject: arm64: skip register_cpufreq_notifier on ACPI-based systems On ACPI based systems where the topology is setup using the API store_cpu_topology, at the moment we do not have necessary code to parse cpu capacity and handle cpufreq notifier, thus resulting in a kernel panic. Stack: init_cpu_capacity_callback+0xb4/0x1c8 notifier_call_chain+0x5c/0xa0 __blocking_notifier_call_chain+0x58/0xa0 blocking_notifier_call_chain+0x3c/0x50 cpufreq_set_policy+0xe4/0x328 cpufreq_init_policy+0x80/0x100 cpufreq_online+0x418/0x710 cpufreq_add_dev+0x118/0x180 subsys_interface_register+0xa4/0xf8 cpufreq_register_driver+0x1c0/0x298 cppc_cpufreq_init+0xdc/0x1000 [cppc_cpufreq] do_one_initcall+0x5c/0x168 do_init_module+0x64/0x1e4 load_module+0x130c/0x14d0 SyS_finit_module+0x108/0x120 el0_svc_naked+0x24/0x28 Fixes: 7202bde8b7ae ("arm64: parse cpu capacity-dmips-mhz from DT") Acked-by: Will Deacon Signed-off-by: Prashanth Prakash Signed-off-by: Catalin Marinas --- arch/arm64/kernel/topology.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 23e9e13bd2aa..655e65f38f31 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -11,6 +11,7 @@ * for more details. */ +#include #include #include #include @@ -209,7 +210,12 @@ static struct notifier_block init_cpu_capacity_notifier = { static int __init register_cpufreq_notifier(void) { - if (cap_parsing_failed) + /* + * on ACPI-based systems we need to use the default cpu capacity + * until we have the necessary code to parse the cpu capacity, so + * skip registering cpufreq notifier. + */ + if (!acpi_disabled || cap_parsing_failed) return -EINVAL; if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { -- cgit From 8413299cb3933dade6186bbee8363f190032107e Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 27 Jan 2017 15:45:11 +0100 Subject: ARM: dts: STiH407-family: set snps,dis_u3_susphy_quirk Since v4.10-rc1, the following logs appears in loop : [ 801.953836] usb usb6-port1: Cannot enable. Maybe the USB cable is bad? [ 801.960455] xhci-hcd xhci-hcd.0.auto: Cannot set link state. [ 801.966611] usb usb6-port1: cannot disable (err = -32) [ 806.083772] usb usb6-port1: Cannot enable. Maybe the USB cable is bad? [ 806.090370] xhci-hcd xhci-hcd.0.auto: Cannot set link state. [ 806.096494] usb usb6-port1: cannot disable (err = -32) After analysis, xhci try to set link in U3 and returns an error. Using snps,dis_u3_susphy_quirk fix this issue. Signed-off-by: Patrice Chotard --- arch/arm/boot/dts/stih407-family.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index c8b2944e304a..ace97e8576db 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -680,6 +680,7 @@ phy-names = "usb2-phy", "usb3-phy"; phys = <&usb2_picophy0>, <&phy_port2 PHY_TYPE_USB3>; + snps,dis_u3_susphy_quirk; }; }; -- cgit From 92e55f412cffd016cc245a74278cb4d7b89bb3bc Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Thu, 26 Jan 2017 22:56:21 +0100 Subject: tcp: don't annotate mark on control socket from tcp_v6_send_response() Unlike ipv4, this control socket is shared by all cpus so we cannot use it as scratchpad area to annotate the mark that we pass to ip6_xmit(). Add a new parameter to ip6_xmit() to indicate the mark. The SCTP socket family caches the flowi6 structure in the sctp_transport structure, so we cannot use to carry the mark unless we later on reset it back, which I discarded since it looks ugly to me. Fixes: bf99b4ded5f8 ("tcp: fix mark propagation with fwmark_reflect enabled") Suggested-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 +- net/dccp/ipv6.c | 4 ++-- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/tcp_ipv6.c | 5 ++--- net/sctp/ipv6.c | 3 ++- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 487e57391664..7afe991e900e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -871,7 +871,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb); * upper-layer output functions */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - struct ipv6_txoptions *opt, int tclass); + __u32 mark, struct ipv6_txoptions *opt, int tclass); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index adfc790f7193..c4e879c02186 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -227,7 +227,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -281,7 +281,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); + ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7396e75e161b..75c308239243 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -176,7 +176,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass); rcu_read_unlock(); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 38122d04fadc..2c0df09e9036 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -172,7 +172,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) * which are using proper atomic operations or spinlocks. */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - struct ipv6_txoptions *opt, int tclass) + __u32 mark, struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); @@ -240,7 +240,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b20622a5824..cb8929681dc7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -469,7 +469,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -840,8 +840,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ctl_sk->sk_mark = fl6.flowi6_mark; - ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); + ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 5ed8e79bf102..64dfd35ccdcc 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -222,7 +222,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); rcu_read_lock(); - res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } -- cgit From ab729823ec16aef384f09fd2cffe0b3d3f6e6cba Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Fri, 27 Jan 2017 10:40:56 +0100 Subject: gtp: add genl family modules alias Auto-load the module when userspace asks for the gtp netlink family. Signed-off-by: Andreas Schultz Acked-by: Harald Welte Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/gtp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 8b6810bad54b..7580ccc50e1d 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1376,3 +1376,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Interface driver for GTP encapsulated traffic"); MODULE_ALIAS_RTNL_LINK("gtp"); +MODULE_ALIAS_GENL_FAMILY("gtp"); -- cgit From c6ce1d08eede4c2968ed08aafa3165e8e183c5a1 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Fri, 27 Jan 2017 10:40:57 +0100 Subject: gtp: clear DF bit on GTP packet tx 3GPP TS 29.281 and 3GPP TS 29.060 imply that GTP-U packets should be sent with the DF bit cleared. For example 3GPP TS 29.060, Release 8, Section 13.2.2: > Backbone router: Any router in the backbone may fragment the GTP > packet if needed, according to IPv4. Signed-off-by: Andreas Schultz Acked-by: Harald Welte Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7580ccc50e1d..1df54d626f20 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -612,7 +612,7 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) pktinfo.fl4.saddr, pktinfo.fl4.daddr, pktinfo.iph->tos, ip4_dst_hoplimit(&pktinfo.rt->dst), - htons(IP_DF), + 0, pktinfo.gtph_port, pktinfo.gtph_port, true, false); break; -- cgit From 3ab1b469e847ba425af3c5ad5068cc94b55b38d0 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Fri, 27 Jan 2017 10:40:58 +0100 Subject: gtp: fix cross netns recv on gtp socket The use of the passed through netlink src_net to check for a cross netns operation was wrong. Using the GTP socket and the GTP netdevice is always correct (even if the netdev has been moved to new netns after link creation). Remove the now obsolete net field from gtp_dev. Signed-off-by: Andreas Schultz Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/gtp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 1df54d626f20..99d3df788ce8 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -69,7 +69,6 @@ struct gtp_dev { struct socket *sock0; struct socket *sock1u; - struct net *net; struct net_device *dev; unsigned int hash_size; @@ -316,7 +315,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - xnet = !net_eq(gtp->net, dev_net(gtp->dev)); + xnet = !net_eq(sock_net(sk), dev_net(gtp->dev)); switch (udp_sk(sk)->encap_type) { case UDP_ENCAP_GTP0: @@ -658,7 +657,7 @@ static void gtp_link_setup(struct net_device *dev) static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); static void gtp_hashtable_free(struct gtp_dev *gtp); static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net); + int fd_gtp0, int fd_gtp1); static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) @@ -675,7 +674,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, fd0 = nla_get_u32(data[IFLA_GTP_FD0]); fd1 = nla_get_u32(data[IFLA_GTP_FD1]); - err = gtp_encap_enable(dev, gtp, fd0, fd1, src_net); + err = gtp_encap_enable(dev, gtp, fd0, fd1); if (err < 0) goto out_err; @@ -821,7 +820,7 @@ static void gtp_hashtable_free(struct gtp_dev *gtp) } static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, - int fd_gtp0, int fd_gtp1, struct net *src_net) + int fd_gtp0, int fd_gtp1) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct socket *sock0, *sock1u; @@ -858,7 +857,6 @@ static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp, gtp->sock0 = sock0; gtp->sock1u = sock1u; - gtp->net = src_net; tuncfg.sk_user_data = gtp; tuncfg.encap_rcv = gtp_encap_recv; -- cgit From feb3cbea0946c67060e2d5bcb7499b0a6f6700fe Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 20 Jan 2017 08:20:24 -0800 Subject: ARM64: dts: meson-gxbb-odroidc2: fix GbE tx link breakage OdroidC2 GbE link breaks under heavy tx transfer. This happens even if the MAC does not enable Energy Efficient Ethernet (No Low Power state Idle on the Tx path). The problem seems to come from the phy Rx path, entering the LPI state. Disabling EEE advertisement on the phy prevent this feature to be negociated with the link partner and solve the issue. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 5d28e1cdc998..c59403adb387 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -151,6 +151,18 @@ status = "okay"; pinctrl-0 = <ð_rgmii_pins>; pinctrl-names = "default"; + phy-handle = <ð_phy0>; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + eth_phy0: ethernet-phy@0 { + reg = <0>; + eee-broken-1000t; + }; + }; }; &ir { -- cgit From bba8e3f42736cf7f974968a818e53b128286ad1d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 20 Jan 2017 08:20:25 -0800 Subject: ARM64: dts: meson-gx: Add firmware reserved memory zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Amlogic Meson GXBB/GXL/GXM secure monitor uses part of the memory space, this patch adds these reserved zones. Without such reserved memory zones, running the following stress command : $ stress-ng --vm 16 --vm-bytes 128M --timeout 10s multiple times: Could lead to the following kernel crashes : [ 46.937975] Bad mode in Error handler detected on CPU1, code 0xbf000000 -- SError ... [ 47.058536] Internal error: Attempting to execute userspace memory: 8600000f [#3] PREEMPT SMP ... Instead of the OOM killer. Fixes: 4f24eda8401f ("ARM64: dts: Prepare configs for Amlogic Meson GXBaby") Signed-off-by: Neil Armstrong Reviewed-by: Andreas Färber [khilman: added Fixes tag, added _reserved and unit addresses] Signed-off-by: Kevin Hilman Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index eada0b58ba1c..0cbe24b49710 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -55,6 +55,24 @@ #address-cells = <2>; #size-cells = <2>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + /* 16 MiB reserved for Hardware ROM Firmware */ + hwrom_reserved: hwrom@0 { + reg = <0x0 0x0 0x0 0x1000000>; + no-map; + }; + + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ + secmon_reserved: secmon@10000000 { + reg = <0x0 0x10000000 0x0 0x200000>; + no-map; + }; + }; + cpus { #address-cells = <0x2>; #size-cells = <0x0>; -- cgit From 9d162ed69f51cbd9ee5a0c7e82aba7acc96362ff Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 27 Jan 2017 08:46:23 +0100 Subject: net: phy: micrel: add support for KSZ8795 This is adds support for the PHYs in the KSZ8795 5port managed switch. It will allow to detect the link between the switch and the soc and uses the same read_status functions as the KSZ8873MLL switch. Signed-off-by: Sean Nyekjaer Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 14 ++++++++++++++ include/linux/micrel_phy.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9a77289109b7..e55809c5beb7 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1008,6 +1008,20 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, +}, { + .phy_id = PHY_ID_KSZ8795, + .phy_id_mask = MICREL_PHY_ID_MASK, + .name = "Micrel KSZ8795", + .features = (SUPPORTED_Pause | SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = kszphy_config_init, + .config_aneg = ksz8873mll_config_aneg, + .read_status = ksz8873mll_read_status, + .get_sset_count = kszphy_get_sset_count, + .get_strings = kszphy_get_strings, + .get_stats = kszphy_get_stats, + .suspend = genphy_suspend, + .resume = genphy_resume, } }; module_phy_driver(ksphy_driver); diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 257173e0095e..f541da68d1e7 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -35,6 +35,8 @@ #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 +#define PHY_ID_KSZ8795 0x00221550 + /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 #define MICREL_PHY_FXEN 0x00000002 -- cgit From 950eabbd6ddedc1b08350b9169a6a51b130ebaaf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2017 13:32:14 +0100 Subject: ISDN: eicon: silence misleading array-bounds warning With some gcc versions, we get a warning about the eicon driver, and that currently shows up as the only remaining warning in one of the build bots: In file included from ../drivers/isdn/hardware/eicon/message.c:30:0: eicon/message.c: In function 'mixer_notify_update': eicon/platform.h:333:18: warning: array subscript is above array bounds [-Warray-bounds] The code is easily changed to open-code the unusual PUT_WORD() line causing this to avoid the warning. Cc: stable@vger.kernel.org Link: http://arm-soc.lixom.net/buildlogs/stable-rc/v4.4.45/ Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/hardware/eicon/message.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 1a1d99704fe6..296f1411fe84 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -11297,7 +11297,8 @@ static void mixer_notify_update(PLCI *plci, byte others) ((CAPI_MSG *) msg)->header.ncci = 0; ((CAPI_MSG *) msg)->info.facility_req.Selector = SELECTOR_LINE_INTERCONNECT; ((CAPI_MSG *) msg)->info.facility_req.structs[0] = 3; - PUT_WORD(&(((CAPI_MSG *) msg)->info.facility_req.structs[1]), LI_REQ_SILENT_UPDATE); + ((CAPI_MSG *) msg)->info.facility_req.structs[1] = LI_REQ_SILENT_UPDATE & 0xff; + ((CAPI_MSG *) msg)->info.facility_req.structs[2] = LI_REQ_SILENT_UPDATE >> 8; ((CAPI_MSG *) msg)->info.facility_req.structs[3] = 0; w = api_put(notify_plci->appl, (CAPI_MSG *) msg); if (w != _QUEUE_FULL) -- cgit From e0d76fa4475ef2cf4b52d18588b8ce95153d021b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 26 Jan 2017 13:18:09 -0800 Subject: xfs: prevent quotacheck from overloading inode lru Quotacheck runs at mount time in situations where quota accounting must be recalculated. In doing so, it uses bulkstat to visit every inode in the filesystem. Historically, every inode processed during quotacheck was released and immediately tagged for reclaim because quotacheck runs before the superblock is marked active by the VFS. In other words, the final iput() lead to an immediate ->destroy_inode() call, which allowed the XFS background reclaim worker to start reclaiming inodes. Commit 17c12bcd3 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") marks the XFS superblock active sooner as part of the mount process to support caching inodes processed during log recovery. This occurs before quotacheck and thus means all inodes processed by quotacheck are inserted to the LRU on release. The s_umount lock is held until the mount has completed and thus prevents the shrinkers from operating on the sb. This means that quotacheck can excessively populate the inode LRU and lead to OOM conditions on systems without sufficient RAM. Update the quotacheck bulkstat handler to set XFS_IGET_DONTCACHE on inodes processed by quotacheck. This causes ->drop_inode() to return 1 and in turn causes iput_final() to evict the inode. This preserves the original quotacheck behavior and prevents it from overloading the LRU and running out of memory. CC: stable@vger.kernel.org # v4.9 Reported-by: Martin Svec Signed-off-by: Brian Foster Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_qm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, + &ip); if (error) { *res = BULKSTAT_RV_NOTHING; return error; -- cgit From 9aed02feae57bf7a40cb04ea0e3017cb7a998db4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 27 Jan 2017 10:45:27 -0800 Subject: ARC: [arcompact] handle unaligned access delay slot corner case After emulating an unaligned access in delay slot of a branch, we pretend as the delay slot never happened - so return back to actual branch target (or next PC if branch was not taken). Curently we did this by handling STATUS32.DE, we also need to clear the BTA.T bit, which is disregarded when returning from original misaligned exception, but could cause weirdness if it took the interrupt return path (in case interrupt was acive too) One ARC700 customer ran into this when enabling unaligned access fixup for kernel mode accesses as well Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/kernel/unaligned.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index abd961f3e763..91ebe382147f 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -241,8 +241,9 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, if (state.fault) goto fault; + /* clear any remanants of delay slot */ if (delay_mode(regs)) { - regs->ret = regs->bta; + regs->ret = regs->bta ~1U; regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; -- cgit From b4cfe3971f6eab542dd7ecc398bfa1aeec889934 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 15 Jan 2017 20:15:00 +0200 Subject: RDMA/cma: Fix unknown symbol when CONFIG_IPV6 is not enabled If IPV6 has not been enabled in the underlying kernel, we must avoid calling IPV6 procedures in rdma_cm.ko. This requires using "IS_ENABLED(CONFIG_IPV6)" in "if" statements surrounding any code which calls external IPV6 procedures. In the instance fixed here, procedure cma_bind_addr() called ipv6_addr_type() -- which resulted in calling external procedure __ipv6_addr_type(). Fixes: 6c26a77124ff ("RDMA/cma: fix IPv6 address resolution") Cc: # v4.2+ Cc: Spencer Baugh Signed-off-by: Jack Morgenstein Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e7dcfac877ca..3e70a9c5d79d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2811,7 +2811,8 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; - if (dst_addr->sa_family == AF_INET6) { + if (IS_ENABLED(CONFIG_IPV6) && + dst_addr->sa_family == AF_INET6) { struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; -- cgit From 030305d69fc6963c16003f50d7e8d74b02d0a143 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jan 2017 15:00:45 -0600 Subject: PCI/ASPM: Handle PCI-to-PCIe bridges as roots of PCIe hierarchies In a struct pcie_link_state, link->root points to the pcie_link_state of the root of the PCIe hierarchy. For the topmost link, this points to itself (link->root = link). For others, we copy the pointer from the parent (link->root = link->parent->root). Previously we recognized that Root Ports originated PCIe hierarchies, but we treated PCI/PCI-X to PCIe Bridges as being in the middle of the hierarchy, and when we tried to copy the pointer from link->parent->root, there was no parent, and we dereferenced a NULL pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000090 IP: [] pcie_aspm_init_link_state+0x170/0x820 Recognize that PCI/PCI-X to PCIe Bridges originate PCIe hierarchies just like Root Ports do, so link->root for these devices should also point to itself. Fixes: 51ebfc92b72b ("PCI: Enumerate switches below PCI-to-PCIe bridges") Link: https://bugzilla.kernel.org/show_bug.cgi?id=193411 Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1022181 Tested-by: lists@ssl-mail.com Tested-by: Jayachandran C. Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.2+ --- drivers/pci/pcie/aspm.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 17ac1dce3286..3dd8bcbb3011 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -532,25 +532,32 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return NULL; + INIT_LIST_HEAD(&link->sibling); INIT_LIST_HEAD(&link->children); INIT_LIST_HEAD(&link->link); link->pdev = pdev; - if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) { + + /* + * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe + * hierarchies. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE) { + link->root = link; + } else { struct pcie_link_state *parent; + parent = pdev->bus->parent->self->link_state; if (!parent) { kfree(link); return NULL; } + link->parent = parent; + link->root = link->parent->root; list_add(&link->link, &parent->children); } - /* Setup a pointer to the root port link */ - if (!link->parent) - link->root = link; - else - link->root = link->parent->root; list_add(&link->sibling, &link_list); pdev->link_state = link; -- cgit From 2b1d530cb3157f828fcaadd259613f59db3c6d1c Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 27 Jan 2017 14:19:25 +0200 Subject: MAINTAINERS: ath9k-devel is closed ath9k-devel list is now closed, only linux-wireless should be used. Reported-by: Michael Renzmann Signed-off-by: Kalle Valo --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..d7699d5b5863 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10169,7 +10169,6 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH9K WIRELESS DRIVER M: QCA ath9k Development L: linux-wireless@vger.kernel.org -L: ath9k-devel@lists.ath9k.org W: http://wireless.kernel.org/en/users/Drivers/ath9k S: Supported F: drivers/net/wireless/ath/ath9k/ -- cgit From bf29bddf0417a4783da3b24e8c9e017ac649326f Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Jan 2017 22:25:52 +0000 Subject: x86/efi: Always map the first physical page into the EFI pagetables Commit: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") stopped creating 1:1 mappings for all RAM, when running in native 64-bit mode. It turns out though that there are 64-bit EFI implementations in the wild (this particular problem has been reported on a Lenovo Yoga 710-11IKB), which still make use of the first physical page for their own private use, even though they explicitly mark it EFI_CONVENTIONAL_MEMORY in the memory map. In case there is no mapping for this particular frame in the EFI pagetables, as soon as firmware tries to make use of it, a triple fault occurs and the system reboots (in case of the Yoga 710-11IKB this is very early during bootup). Fix that by always mapping the first page of physical memory into the EFI pagetables. We're free to hand this page to the BIOS, as trim_bios_range() will reserve the first page and isolate it away from memory allocators anyway. Note that just reverting 129766708 alone is not enough on v4.9-rc1+ to fix the regression on affected hardware, as this commit: ab72a27da ("x86/efi: Consolidate region mapping logic") later made the first physical frame not to be mapped anyway. Reported-by: Hanka Pavlikova Signed-off-by: Jiri Kosina Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Borislav Petkov Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vojtech Pavlik Cc: Waiman Long Cc: linux-efi@vger.kernel.org Cc: stable@kernel.org # v4.8+ Fixes: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") Link: http://lkml.kernel.org/r/20170127222552.22336-1-matt@codeblueprint.co.uk [ Tidied up the changelog and the comment. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 319148bd4b05..2f25a363068c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -268,6 +268,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.use_pgd = true; + /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel -- cgit From 966d2b04e070bc040319aaebfec09e0144dc3341 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Sat, 28 Jan 2017 06:42:20 -0600 Subject: percpu-refcount: fix reference leak during percpu-atomic transition percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller Reviewed-by: Jens Axboe Signed-off-by: Tejun Heo Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+ --- include/linux/percpu-refcount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 1c7eec09e5eb..3a481a49546e 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) static inline bool percpu_ref_tryget(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret; + bool ret; rcu_read_lock_sched(); @@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret = false; + bool ret = false; rcu_read_lock_sched(); -- cgit From 83b5d1e3d3013dbf90645a5d07179d018c8243fa Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 3 Jan 2017 22:55:50 +0100 Subject: parisc, parport_gsc: Fixes for printk continuation lines Signed-off-by: Helge Deller --- drivers/parport/parport_gsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c index dd6d4ccb41e4..3858b87fd0bb 100644 --- a/drivers/parport/parport_gsc.c +++ b/drivers/parport/parport_gsc.c @@ -293,7 +293,7 @@ struct parport *parport_gsc_probe_port(unsigned long base, p->irq = PARPORT_IRQ_NONE; } if (p->irq != PARPORT_IRQ_NONE) { - printk(", irq %d", p->irq); + pr_cont(", irq %d", p->irq); if (p->dma == PARPORT_DMA_AUTO) { p->dma = PARPORT_DMA_NONE; @@ -303,8 +303,8 @@ struct parport *parport_gsc_probe_port(unsigned long base, is mandatory (see above) */ p->dma = PARPORT_DMA_NONE; - printk(" ["); -#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}} + pr_cont(" ["); +#define printmode(x) {if(p->modes&PARPORT_MODE_##x){pr_cont("%s%s",f?",":"",#x);f++;}} { int f = 0; printmode(PCSPP); @@ -315,7 +315,7 @@ struct parport *parport_gsc_probe_port(unsigned long base, // printmode(DMA); } #undef printmode - printk("]\n"); + pr_cont("]\n"); if (p->irq != PARPORT_IRQ_NONE) { if (request_irq (p->irq, parport_irq_handler, -- cgit From 2ad5d52d42810bed95100a3d912679d8864421ec Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 28 Jan 2017 11:52:02 +0100 Subject: parisc: Don't use BITS_PER_LONG in userspace-exported swab.h header In swab.h the "#if BITS_PER_LONG > 32" breaks compiling userspace programs if BITS_PER_LONG is #defined by userspace with the sizeof() compiler builtin. Solve this problem by using __BITS_PER_LONG instead. Since we now #include asm/bitsperlong.h avoid further potential userspace pollution by moving the #define of SHIFT_PER_LONG to bitops.h which is not exported to userspace. This patch unbreaks compiling qemu on hppa/parisc. Signed-off-by: Helge Deller Cc: --- arch/parisc/include/asm/bitops.h | 8 +++++++- arch/parisc/include/uapi/asm/bitsperlong.h | 2 -- arch/parisc/include/uapi/asm/swab.h | 5 +++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 3f9406d9b9d6..da87943328a5 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -6,7 +6,7 @@ #endif #include -#include /* for BITS_PER_LONG/SHIFT_PER_LONG */ +#include #include #include #include @@ -17,6 +17,12 @@ * to include/asm-i386/bitops.h or kerneldoc */ +#if __BITS_PER_LONG == 64 +#define SHIFT_PER_LONG 6 +#else +#define SHIFT_PER_LONG 5 +#endif + #define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1)) diff --git a/arch/parisc/include/uapi/asm/bitsperlong.h b/arch/parisc/include/uapi/asm/bitsperlong.h index e0a23c7bdd43..07fa7e50bdc0 100644 --- a/arch/parisc/include/uapi/asm/bitsperlong.h +++ b/arch/parisc/include/uapi/asm/bitsperlong.h @@ -3,10 +3,8 @@ #if defined(__LP64__) #define __BITS_PER_LONG 64 -#define SHIFT_PER_LONG 6 #else #define __BITS_PER_LONG 32 -#define SHIFT_PER_LONG 5 #endif #include diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h index e78403b129ef..928e1bbac98f 100644 --- a/arch/parisc/include/uapi/asm/swab.h +++ b/arch/parisc/include/uapi/asm/swab.h @@ -1,6 +1,7 @@ #ifndef _PARISC_SWAB_H #define _PARISC_SWAB_H +#include #include #include @@ -38,7 +39,7 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x) } #define __arch_swab32 __arch_swab32 -#if BITS_PER_LONG > 32 +#if __BITS_PER_LONG > 32 /* ** From "PA-RISC 2.0 Architecture", HP Professional Books. ** See Appendix I page 8 , "Endian Byte Swapping". @@ -61,6 +62,6 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 x) return x; } #define __arch_swab64 __arch_swab64 -#endif /* BITS_PER_LONG > 32 */ +#endif /* __BITS_PER_LONG > 32 */ #endif /* _PARISC_SWAB_H */ -- cgit From 9eb7892351a3a3b403d879b41c4e6efb2c96516f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 11 Jan 2017 19:35:41 +0200 Subject: net/mlx5: Change ENOTSUPP to EOPNOTSUPP As ENOTSUPP is specific to NFS, change the return error value to EOPNOTSUPP in various places in the mlx5 driver. Signed-off-by: Or Gerlitz Suggested-by: Yotam Gigi Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 +- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3797cc7c1288..caa837e5e2b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1728,7 +1728,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (cmd->cmdif_rev > CMD_IF_REV) { dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", CMD_IF_REV, cmd->cmdif_rev); - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto err_free_page; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 951dbd58594d..1619147a63e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -863,12 +863,12 @@ static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #else int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index f0b460f47f29..35f9ae037ba0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -89,7 +89,7 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, int i; if (!MLX5_CAP_GEN(priv->mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; for (i = 0; i < ets->ets_cap; i++) { @@ -236,7 +236,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, int err; if (!MLX5_CAP_GEN(priv->mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; err = mlx5e_dbcnl_validate_ets(netdev, ets); if (err) @@ -402,7 +402,7 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) struct mlx5_core_dev *mdev = priv->mdev; struct ieee_ets ets; struct ieee_pfc pfc; - int err = -ENOTSUPP; + int err = -EOPNOTSUPP; int i; if (!MLX5_CAP_GEN(mdev, ets)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5197817e4b2f..ffbdf9ee5a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -595,7 +595,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) - return -ENOTSUPP; + return -EOPNOTSUPP; coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; @@ -620,7 +620,7 @@ static int mlx5e_set_coalesce(struct net_device *netdev, int i; if (!MLX5_CAP_GEN(mdev, cq_moderation)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&priv->state_lock); @@ -1296,7 +1296,7 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) u32 mlx5_wol_mode; if (!wol_supported) - return -ENOTSUPP; + return -EOPNOTSUPP; if (wol->wolopts & ~wol_supported) return -EINVAL; @@ -1426,7 +1426,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!rx_mode_changed) return 0; @@ -1452,7 +1452,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, bool reset; if (!MLX5_CAP_GEN(mdev, cqe_compression)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) { netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d088effd7160..f33f72d0237c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -92,7 +92,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_ETHTOOL); if (!ns) - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.log_max_ft_size)), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2b7dd315020c..948351ae5bd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3331,7 +3331,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || !MLX5_CAP_GEN(mdev, nic_flow_table) || !MLX5_CAP_ETH(mdev, csum_cap) || @@ -3343,7 +3343,7 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) < 3) { mlx5_core_warn(mdev, "Not creating net device, some required device capabilities are missing\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f14d9c9ba773..bb712139b36e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -133,7 +133,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; + return -EOPNOTSUPP; esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", vport, vlan, qos, set_flags); @@ -1630,7 +1630,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 03293ed1cc22..657d319fc4c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -166,7 +166,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, return 0; out_notsupp: - return -ENOTSUPP; + return -EOPNOTSUPP; } int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c4478ecd8056..b53fc85a2375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -322,7 +322,7 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, flow_table_properties_nic_receive. flow_modify_en); if (!atomic_mod_cap) - return -ENOTSUPP; + return -EOPNOTSUPP; opmod = 1; return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d01e9f21d469..3c315eb8d270 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -807,7 +807,7 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return 0; } - return -ENOTSUPP; + return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index d2ec9d232a70..fd12e0a377a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -620,7 +620,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, u32 out[MLX5_ST_SZ_DW(qtct_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), MLX5_REG_QETCR, 0, 1); @@ -632,7 +632,7 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, u32 in[MLX5_ST_SZ_DW(qtct_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; memset(in, 0, sizeof(in)); return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 269e4401c342..7129c30a2ab4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -532,7 +532,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return -EACCES; if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) - return -ENOTSUPP; + return -EOPNOTSUPP; in = mlx5_vzalloc(inlen); if (!in) -- cgit From eff596da48784316ccb83bef82bc1213b512d5e0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 12 Jan 2017 13:04:01 +0200 Subject: net/mlx5: Return EOPNOTSUPP when failing to get steering name-space When we fail to retrieve a hardware steering name-space, the returned error code should say that this operation is not supported. Align the various places in the driver where this call is made to this convention. Also, make sure to warn when we fail to retrieve a SW (ANCHOR) name-space. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1fe80de5d68f..a0e5a69402b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1089,7 +1089,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) MLX5_FLOW_NAMESPACE_KERNEL); if (!priv->fs.ns) - return -EINVAL; + return -EOPNOTSUPP; err = mlx5e_arfs_create_tables(priv); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bb712139b36e..d0c8bf014453 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -353,7 +353,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -ENOMEM; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); @@ -962,7 +962,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); - return -EIO; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); @@ -1079,7 +1079,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); - return -EIO; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 657d319fc4c6..5803216157cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,7 +535,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); if (!ns) { esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); - return -ENOMEM; + return -EOPNOTSUPP; } ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0ac7a2fc916c..6346a8f5883b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1822,7 +1822,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) struct mlx5_flow_table *ft; ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); - if (!ns) + if (WARN_ON(!ns)) return -EINVAL; ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); if (IS_ERR(ft)) { -- cgit From 5403dc703ff277f8a2a12a83ac820750485f13b3 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 11 Jan 2017 19:39:42 +0200 Subject: net/mlx5: E-Switch, Err when retrieving steering name-space fails Make sure to return error when we failed retrieving the FDB steering name space. Also, while around, correctly print the error when mode change revert fails in the warning message. Signed-off-by: Or Gerlitz Reported-by: Leon Romanovsky Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5803216157cf..c61bca138e65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -424,6 +424,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; goto ns_err; } @@ -655,7 +656,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err1) - esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1); } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, -- cgit From 5bae8c031053c69b4aa74b7f1ba15d4ec8426208 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 15 Jan 2017 19:05:38 +0200 Subject: net/mlx5: E-Switch, Re-enable RoCE on mode change only after FDB destroy We must re-enable RoCE on the e-switch management port (PF) only after destroying the FDB in its switchdev/offloaded mode. Otherwise, when encapsulation is supported, this re-enablement will fail. Also, it's more natural and symmetric to disable RoCE on the PF before we create the FDB under switchdev mode, so do that as well and revert if getting into error during the mode change later. Fixes: 9da34cd34e85 ('net/mlx5: Disable RoCE on the e-switch management [..]') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 29 ++++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c61bca138e65..595f7c7383b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -675,9 +675,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) int vport; int err; + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + err = esw_create_offloads_fdb_table(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw); if (err) @@ -697,11 +702,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) goto err_reps; } - /* disable PF RoCE so missed packets don't go through RoCE steering */ - mlx5_dev_list_lock(); - mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - return 0; err_reps: @@ -718,6 +718,13 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_table(esw); + +create_fdb_err: + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } @@ -725,11 +732,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { @@ -739,6 +741,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); } + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } -- cgit From 3e621b19b0bb1f5bea34f1fbc5fb5629191eda2b Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 12 Jan 2017 11:07:40 +0200 Subject: net/mlx5e: Support TC encapsulation offloads with upper devices When tunneling is used, some virtualizations systems set the (mlx5e) uplink device to be stacked under upper devices such as bridge or ovs internal port, where the VTEP IP address used for the encapsulation is set on that upper device. In order to support such use-cases, we also deal with a setup where the egress mirred device isn't representing a port on the HW e-switch to where the ingress device belongs. We use eswitch service function which returns the uplink and set it as the egress device of the tc encap rule. Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads") Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 46bef6a26a8c..c5282b6aba8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -663,6 +663,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, __be32 *saddr, int *out_ttl) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct rtable *rt; struct neighbour *n = NULL; int ttl; @@ -677,12 +678,11 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, #else return -EOPNOTSUPP; #endif - - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { - pr_warn("%s: can't offload, devices not on same HW e-switch\n", __func__); - ip_rt_put(rt); - return -EOPNOTSUPP; - } + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); @@ -693,7 +693,6 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, *out_n = n; *saddr = fl4->saddr; *out_ttl = ttl; - *out_dev = rt->dst.dev; return 0; } -- cgit From 1d3398facd08a7fd4202f269317a95668eb880b9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 11 Jan 2017 14:32:26 +0200 Subject: net/mlx5e: Modify TIRs hash only when it's needed We don't need to modify our TIRs unless the user requested a change in the hash function/key, for example when changing indirection only. Tested: # Modify TIRs hash is needed ethtool -X ethX hkey ethtool -X ethX hfunc # Modify TIRs hash is not needed ethtool -X ethX equal All cases are verified with TCP Multi-Stream traffic over IPv4 & IPv6. Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ffbdf9ee5a9b..6f4eb34259f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -996,6 +996,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, { struct mlx5e_priv *priv = netdev_priv(dev); int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + bool hash_changed = false; void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && @@ -1017,14 +1018,21 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); } - if (key) + if (hfunc != ETH_RSS_HASH_NO_CHANGE && + hfunc != priv->params.rss_hfunc) { + priv->params.rss_hfunc = hfunc; + hash_changed = true; + } + + if (key) { memcpy(priv->params.toeplitz_hash_key, key, sizeof(priv->params.toeplitz_hash_key)); + hash_changed = hash_changed || + priv->params.rss_hfunc == ETH_RSS_HASH_TOP; + } - if (hfunc != ETH_RSS_HASH_NO_CHANGE) - priv->params.rss_hfunc = hfunc; - - mlx5e_modify_tirs_hash(priv, in, inlen); + if (hash_changed) + mlx5e_modify_tirs_hash(priv, in, inlen); mutex_unlock(&priv->state_lock); -- cgit From a100ff3eef193d2d79daf98dcd97a54776ffeb78 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jan 2017 16:25:46 +0200 Subject: net/mlx5e: Fix update of hash function/key via ethtool Modifying TIR hash should change selected fields bitmask in addition to the function and key. Formerly, Only on ethool mlx5e_set_rxfh "ethtoo -X" we would not set this field resulting in zeroing of its value, which means no packet fields are used for RX RSS hash calculation thus causing all traffic to arrive in RQ[0]. On driver load out of the box we don't have this issue, since the TIR hash is fully created from scratch. Tested: ethtool -X ethX hkey ethtool -X ethX hfunc ethtool -X ethX equal All cases are verified with TCP Multi-Stream traffic over IPv4 & IPv6. Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 13 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 198 ++++++++++----------- 3 files changed, 109 insertions(+), 105 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1619147a63e8..d5ecb8f53fd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -791,7 +791,8 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); -void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, + enum mlx5e_traffic_types tt); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6f4eb34259f0..bb67863aa361 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -980,15 +980,18 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) { - struct mlx5_core_dev *mdev = priv->mdev; void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - int i; + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + int tt; MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - mlx5e_build_tir_ctx_hash(tirc, priv); - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 948351ae5bd2..f14ca3385fdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2022,8 +2022,23 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); } -void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, + enum mlx5e_traffic_types tt) { + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(priv->params.rss_hfunc)); if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { @@ -2035,6 +2050,88 @@ void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); memcpy(rss_key, priv->params.toeplitz_hash_key, len); } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + default: + WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + } } static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) @@ -2404,110 +2501,13 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, enum mlx5e_traffic_types tt) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); -#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP) - -#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_L4_SPORT |\ - MLX5_HASH_FIELD_SEL_L4_DPORT) - -#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_IPSEC_SPI) - mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_tir_ctx_hash(tirc, priv); - - switch (tt) { - case MLX5E_TT_IPV4_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV6_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV4_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV6_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV4: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - - case MLX5E_TT_IPV6: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - default: - WARN_ONCE(true, - "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); - } + mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, -- cgit From d15118af268324ecfc968dd90396e966f4f9b3ff Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 19 Jan 2017 14:53:07 +0200 Subject: net/mlx5e: Check ets capability before ets query FW command On dcbnl callback getpgtccfgtx, the driver should check the ets capability before ets query command is sent to firmware. It is valid to return from this void function without changing in/out parameters, as these parameters are initialized to DCB_ATTR_VALUE_UNDEFINED. Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 35f9ae037ba0..0523ed47f597 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -511,6 +511,11 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + if (!MLX5_CAP_GEN(priv->mdev, ets)) { + netdev_err(netdev, "%s, ets is not supported\n", __func__); + return; + } + if (priority >= CEE_DCBX_MAX_PRIO) { netdev_err(netdev, "%s, priority is out of range\n", __func__); -- cgit From 39cb2c9a316e77f6dfba96c543e55b6672d5a37e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Jan 2017 13:50:06 -0800 Subject: drm/i915: Check for NULL i915_vma in intel_unpin_fb_obj() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've seen this trigger twice now, where the i915_gem_object_to_ggtt() call in intel_unpin_fb_obj() returns NULL, resulting in an oops immediately afterwards as the (inlined) call to i915_vma_unpin_fence() tries to dereference it. It seems to be some race condition where the object is going away at shutdown time, since both times happened when shutting down the X server. The call chains were different: - VT ioctl(KDSETMODE, KD_TEXT): intel_cleanup_plane_fb+0x5b/0xa0 [i915] drm_atomic_helper_cleanup_planes+0x6f/0x90 [drm_kms_helper] intel_atomic_commit_tail+0x749/0xfe0 [i915] intel_atomic_commit+0x3cb/0x4f0 [i915] drm_atomic_commit+0x4b/0x50 [drm] restore_fbdev_mode+0x14c/0x2a0 [drm_kms_helper] drm_fb_helper_restore_fbdev_mode_unlocked+0x34/0x80 [drm_kms_helper] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] intel_fbdev_set_par+0x18/0x70 [i915] fb_set_var+0x236/0x460 fbcon_blank+0x30f/0x350 do_unblank_screen+0xd2/0x1a0 vt_ioctl+0x507/0x12a0 tty_ioctl+0x355/0xc30 do_vfs_ioctl+0xa3/0x5e0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x13/0x94 - i915 unpin_work workqueue: intel_unpin_work_fn+0x58/0x140 [i915] process_one_work+0x1f1/0x480 worker_thread+0x48/0x4d0 kthread+0x101/0x140 and this patch purely papers over the issue by adding a NULL pointer check and a WARN_ON_ONCE() to avoid the oops that would then generally make the machine unresponsive. Other callers of i915_gem_object_to_ggtt() seem to also check for the returned pointer being NULL and warn about it, so this clearly has happened before in other places. [ Reported it originally to the i915 developers on Jan 8, applying the ugly workaround on my own now after triggering the problem for the second time with no feedback. This is likely to be the same bug reported as https://bugs.freedesktop.org/show_bug.cgi?id=98829 https://bugs.freedesktop.org/show_bug.cgi?id=99134 which has a patch for the underlying problem, but it hasn't gotten to me, so I'm applying the workaround. ] Cc: Daniel Vetter Cc: Jani Nikula Cc: Ville Syrjälä Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Imre Deak Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 77f7b1d849a4..f0b9aa7a0483 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2251,6 +2251,9 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) intel_fill_fb_ggtt_view(&view, fb, rotation); vma = i915_gem_object_to_ggtt(obj, &view); + if (WARN_ON_ONCE(!vma)) + return; + i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); } -- cgit From 566cf877a1fcb6d6dc0126b076aad062054c2637 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Jan 2017 14:25:17 -0800 Subject: Linux 4.10-rc6 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 098840012b9b..96b27a888285 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc5 -NAME = Anniversary Edition +EXTRAVERSION = -rc6 +NAME = Fearless Coyote # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit From 0a764db103376cf69d04449b10688f3516cc0b88 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 27 Jan 2017 15:24:43 +0300 Subject: stmmac: Discard masked flags in interrupt status register DW GMAC databook says the following about bits in "Register 15 (Interrupt Mask Register)": --------------------------->8------------------------- When set, this bit __disables_the_assertion_of_the_interrupt_signal__ because of the setting of XXX bit in Register 14 (Interrupt Status Register). --------------------------->8------------------------- In fact even if we mask one bit in the mask register it doesn't prevent corresponding bit to appear in the status register, it only disables interrupt generation for corresponding event. But currently we expect a bit different behavior: status bits to be in sync with their masks, i.e. if mask for bit A is set in the mask register then bit A won't appear in the interrupt status register. This was proven to be incorrect assumption, see discussion here [1]. That misunderstanding causes unexpected behaviour of the GMAC, for example we were happy enough to just see bogus messages about link state changes. So from now on we'll be only checking bits that really may trigger an interrupt. [1] https://lkml.org/lkml/2016/11/3/413 Signed-off-by: Alexey Brodkin Cc: Giuseppe Cavallaro Cc: Fabrice Gasnier Cc: Joachim Eastwood Cc: Phil Reid Cc: David Miller Cc: Alexandre Torgue Cc: Vineet Gupta Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index be3c91c7f211..5484fd726d5a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -305,8 +305,12 @@ static int dwmac1000_irq_status(struct mac_device_info *hw, { void __iomem *ioaddr = hw->pcsr; u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); + u32 intr_mask = readl(ioaddr + GMAC_INT_MASK); int ret = 0; + /* Discard masked bits */ + intr_status &= ~intr_mask; + /* Not used events (e.g. MMC interrupts) are not handled. */ if ((intr_status & GMAC_INT_STATUS_MMCTIS)) x->mmc_tx_irq_n++; -- cgit From dc97a89e726c4e1830320d1db8215ef77ecebae0 Mon Sep 17 00:00:00 2001 From: Rafal Ozieblo Date: Fri, 27 Jan 2017 15:08:20 +0000 Subject: net: macb: Fix 64 bit addressing support for GEM This patch adds support for 32 bit GEM in 64 bit system. It checks capability at runtime and uses appropriate buffer descriptor. Signed-off-by: Rafal Ozieblo Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 188 +++++++++++++++++++++++++----------- drivers/net/ethernet/cadence/macb.h | 20 +++- 2 files changed, 147 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index c0fb80acc2da..baba2db9d9c2 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -43,13 +43,13 @@ #define DEFAULT_RX_RING_SIZE 512 /* must be power of 2 */ #define MIN_RX_RING_SIZE 64 #define MAX_RX_RING_SIZE 8192 -#define RX_RING_BYTES(bp) (sizeof(struct macb_dma_desc) \ +#define RX_RING_BYTES(bp) (macb_dma_desc_get_size(bp) \ * (bp)->rx_ring_size) #define DEFAULT_TX_RING_SIZE 512 /* must be power of 2 */ #define MIN_TX_RING_SIZE 64 #define MAX_TX_RING_SIZE 4096 -#define TX_RING_BYTES(bp) (sizeof(struct macb_dma_desc) \ +#define TX_RING_BYTES(bp) (macb_dma_desc_get_size(bp) \ * (bp)->tx_ring_size) /* level of occupied TX descriptors under which we wake up TX process */ @@ -78,6 +78,37 @@ */ #define MACB_HALT_TIMEOUT 1230 +/* DMA buffer descriptor might be different size + * depends on hardware configuration. + */ +static unsigned int macb_dma_desc_get_size(struct macb *bp) +{ +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64); +#endif + return sizeof(struct macb_dma_desc); +} + +static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx) +{ +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* Dma buffer descriptor is 4 words length (instead of 2 words) + * for 64b GEM. + */ + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + idx <<= 1; +#endif + return idx; +} + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc) +{ + return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc)); +} +#endif + /* Ring buffer accessors */ static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index) { @@ -87,7 +118,9 @@ static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index) static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue, unsigned int index) { - return &queue->tx_ring[macb_tx_ring_wrap(queue->bp, index)]; + index = macb_tx_ring_wrap(queue->bp, index); + index = macb_adj_dma_desc_idx(queue->bp, index); + return &queue->tx_ring[index]; } static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue, @@ -101,7 +134,7 @@ static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index) dma_addr_t offset; offset = macb_tx_ring_wrap(queue->bp, index) * - sizeof(struct macb_dma_desc); + macb_dma_desc_get_size(queue->bp); return queue->tx_ring_dma + offset; } @@ -113,7 +146,9 @@ static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index) static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index) { - return &bp->rx_ring[macb_rx_ring_wrap(bp, index)]; + index = macb_rx_ring_wrap(bp, index); + index = macb_adj_dma_desc_idx(bp, index); + return &bp->rx_ring[index]; } static void *macb_rx_buffer(struct macb *bp, unsigned int index) @@ -560,12 +595,32 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb) } } -static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr) +static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_t addr) { - desc->addr = (u32)addr; #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - desc->addrh = (u32)(addr >> 32); + struct macb_dma_desc_64 *desc_64; + + if (bp->hw_dma_cap == HW_DMA_CAP_64B) { + desc_64 = macb_64b_desc(bp, desc); + desc_64->addrh = upper_32_bits(addr); + } #endif + desc->addr = lower_32_bits(addr); +} + +static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc) +{ + dma_addr_t addr = 0; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + struct macb_dma_desc_64 *desc_64; + + if (bp->hw_dma_cap == HW_DMA_CAP_64B) { + desc_64 = macb_64b_desc(bp, desc); + addr = ((u64)(desc_64->addrh) << 32); + } +#endif + addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); + return addr; } static void macb_tx_error_task(struct work_struct *work) @@ -649,16 +704,17 @@ static void macb_tx_error_task(struct work_struct *work) /* Set end of TX queue */ desc = macb_tx_desc(queue, 0); - macb_set_addr(desc, 0); + macb_set_addr(bp, desc, 0); desc->ctrl = MACB_BIT(TX_USED); /* Make descriptor updates visible to hardware */ wmb(); /* Reinitialize the TX desc queue */ - queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); + queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma)); #endif /* Make TX ring reflect state of hardware */ queue->tx_head = 0; @@ -750,6 +806,7 @@ static void gem_rx_refill(struct macb *bp) unsigned int entry; struct sk_buff *skb; dma_addr_t paddr; + struct macb_dma_desc *desc; while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, bp->rx_ring_size) > 0) { @@ -759,6 +816,7 @@ static void gem_rx_refill(struct macb *bp) rmb(); bp->rx_prepared_head++; + desc = macb_rx_desc(bp, entry); if (!bp->rx_skbuff[entry]) { /* allocate sk_buff for this free entry in ring */ @@ -782,14 +840,14 @@ static void gem_rx_refill(struct macb *bp) if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); - macb_set_addr(&(bp->rx_ring[entry]), paddr); - bp->rx_ring[entry].ctrl = 0; + macb_set_addr(bp, desc, paddr); + desc->ctrl = 0; /* properly align Ethernet header */ skb_reserve(skb, NET_IP_ALIGN); } else { - bp->rx_ring[entry].addr &= ~MACB_BIT(RX_USED); - bp->rx_ring[entry].ctrl = 0; + desc->addr &= ~MACB_BIT(RX_USED); + desc->ctrl = 0; } } @@ -835,16 +893,13 @@ static int gem_rx(struct macb *bp, int budget) bool rxused; entry = macb_rx_ring_wrap(bp, bp->rx_tail); - desc = &bp->rx_ring[entry]; + desc = macb_rx_desc(bp, entry); /* Make hw descriptor updates visible to CPU */ rmb(); rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false; - addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - addr |= ((u64)(desc->addrh) << 32); -#endif + addr = macb_get_addr(bp, desc); ctrl = desc->ctrl; if (!rxused) @@ -987,15 +1042,17 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, static inline void macb_init_rx_ring(struct macb *bp) { dma_addr_t addr; + struct macb_dma_desc *desc = NULL; int i; addr = bp->rx_buffers_dma; for (i = 0; i < bp->rx_ring_size; i++) { - bp->rx_ring[i].addr = addr; - bp->rx_ring[i].ctrl = 0; + desc = macb_rx_desc(bp, i); + macb_set_addr(bp, desc, addr); + desc->ctrl = 0; addr += bp->rx_buffer_size; } - bp->rx_ring[bp->rx_ring_size - 1].addr |= MACB_BIT(RX_WRAP); + desc->addr |= MACB_BIT(RX_WRAP); bp->rx_tail = 0; } @@ -1008,15 +1065,14 @@ static int macb_rx(struct macb *bp, int budget) for (tail = bp->rx_tail; budget > 0; tail++) { struct macb_dma_desc *desc = macb_rx_desc(bp, tail); - u32 addr, ctrl; + u32 ctrl; /* Make hw descriptor updates visible to CPU */ rmb(); - addr = desc->addr; ctrl = desc->ctrl; - if (!(addr & MACB_BIT(RX_USED))) + if (!(desc->addr & MACB_BIT(RX_USED))) break; if (ctrl & MACB_BIT(RX_SOF)) { @@ -1336,7 +1392,7 @@ static unsigned int macb_tx_map(struct macb *bp, i = tx_head; entry = macb_tx_ring_wrap(bp, i); ctrl = MACB_BIT(TX_USED); - desc = &queue->tx_ring[entry]; + desc = macb_tx_desc(queue, entry); desc->ctrl = ctrl; if (lso_ctrl) { @@ -1358,7 +1414,7 @@ static unsigned int macb_tx_map(struct macb *bp, i--; entry = macb_tx_ring_wrap(bp, i); tx_skb = &queue->tx_skb[entry]; - desc = &queue->tx_ring[entry]; + desc = macb_tx_desc(queue, entry); ctrl = (u32)tx_skb->size; if (eof) { @@ -1379,7 +1435,7 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BF(MSS_MFS, mss_mfs); /* Set TX buffer descriptor */ - macb_set_addr(desc, tx_skb->mapping); + macb_set_addr(bp, desc, tx_skb->mapping); /* desc->addr must be visible to hardware before clearing * 'TX_USED' bit in desc->ctrl. */ @@ -1586,11 +1642,9 @@ static void gem_free_rx_buffers(struct macb *bp) if (!skb) continue; - desc = &bp->rx_ring[i]; - addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - addr |= ((u64)(desc->addrh) << 32); -#endif + desc = macb_rx_desc(bp, i); + addr = macb_get_addr(bp, desc); + dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); @@ -1711,15 +1765,17 @@ out_err: static void gem_init_rings(struct macb *bp) { struct macb_queue *queue; + struct macb_dma_desc *desc = NULL; unsigned int q; int i; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { for (i = 0; i < bp->tx_ring_size; i++) { - queue->tx_ring[i].addr = 0; - queue->tx_ring[i].ctrl = MACB_BIT(TX_USED); + desc = macb_tx_desc(queue, i); + macb_set_addr(bp, desc, 0); + desc->ctrl = MACB_BIT(TX_USED); } - queue->tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP); + desc->ctrl |= MACB_BIT(TX_WRAP); queue->tx_head = 0; queue->tx_tail = 0; } @@ -1733,16 +1789,18 @@ static void gem_init_rings(struct macb *bp) static void macb_init_rings(struct macb *bp) { int i; + struct macb_dma_desc *desc = NULL; macb_init_rx_ring(bp); for (i = 0; i < bp->tx_ring_size; i++) { - bp->queues[0].tx_ring[i].addr = 0; - bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED); + desc = macb_tx_desc(&bp->queues[0], i); + macb_set_addr(bp, desc, 0); + desc->ctrl = MACB_BIT(TX_USED); } bp->queues[0].tx_head = 0; bp->queues[0].tx_tail = 0; - bp->queues[0].tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP); + desc->ctrl |= MACB_BIT(TX_WRAP); } static void macb_reset_hw(struct macb *bp) @@ -1863,7 +1921,8 @@ static void macb_configure_dma(struct macb *bp) dmacfg &= ~GEM_BIT(TXCOEN); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - dmacfg |= GEM_BIT(ADDR64); + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + dmacfg |= GEM_BIT(ADDR64); #endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", dmacfg); @@ -1910,14 +1969,16 @@ static void macb_init_hw(struct macb *bp) macb_configure_dma(bp); /* Initialize TX and RX buffers */ - macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma)); + macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32)); + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma)); #endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); + queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma)); #endif /* Enable interrupts */ @@ -2627,7 +2688,8 @@ static int macb_init(struct platform_device *pdev) queue->IMR = GEM_IMR(hw_q - 1); queue->TBQP = GEM_TBQP(hw_q - 1); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - queue->TBQPH = GEM_TBQPH(hw_q -1); + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + queue->TBQPH = GEM_TBQPH(hw_q - 1); #endif } else { /* queue0 uses legacy registers */ @@ -2637,7 +2699,8 @@ static int macb_init(struct platform_device *pdev) queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - queue->TBQPH = MACB_TBQPH; + if (bp->hw_dma_cap == HW_DMA_CAP_64B) + queue->TBQPH = MACB_TBQPH; #endif } @@ -2730,13 +2793,14 @@ static int macb_init(struct platform_device *pdev) static int at91ether_start(struct net_device *dev) { struct macb *lp = netdev_priv(dev); + struct macb_dma_desc *desc; dma_addr_t addr; u32 ctl; int i; lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev, (AT91ETHER_MAX_RX_DESCR * - sizeof(struct macb_dma_desc)), + macb_dma_desc_get_size(lp)), &lp->rx_ring_dma, GFP_KERNEL); if (!lp->rx_ring) return -ENOMEM; @@ -2748,7 +2812,7 @@ static int at91ether_start(struct net_device *dev) if (!lp->rx_buffers) { dma_free_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * - sizeof(struct macb_dma_desc), + macb_dma_desc_get_size(lp), lp->rx_ring, lp->rx_ring_dma); lp->rx_ring = NULL; return -ENOMEM; @@ -2756,13 +2820,14 @@ static int at91ether_start(struct net_device *dev) addr = lp->rx_buffers_dma; for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) { - lp->rx_ring[i].addr = addr; - lp->rx_ring[i].ctrl = 0; + desc = macb_rx_desc(lp, i); + macb_set_addr(lp, desc, addr); + desc->ctrl = 0; addr += AT91ETHER_MAX_RBUFF_SZ; } /* Set the Wrap bit on the last descriptor */ - lp->rx_ring[AT91ETHER_MAX_RX_DESCR - 1].addr |= MACB_BIT(RX_WRAP); + desc->addr |= MACB_BIT(RX_WRAP); /* Reset buffer index */ lp->rx_tail = 0; @@ -2834,7 +2899,7 @@ static int at91ether_close(struct net_device *dev) dma_free_coherent(&lp->pdev->dev, AT91ETHER_MAX_RX_DESCR * - sizeof(struct macb_dma_desc), + macb_dma_desc_get_size(lp), lp->rx_ring, lp->rx_ring_dma); lp->rx_ring = NULL; @@ -2885,13 +2950,15 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) static void at91ether_rx(struct net_device *dev) { struct macb *lp = netdev_priv(dev); + struct macb_dma_desc *desc; unsigned char *p_recv; struct sk_buff *skb; unsigned int pktlen; - while (lp->rx_ring[lp->rx_tail].addr & MACB_BIT(RX_USED)) { + desc = macb_rx_desc(lp, lp->rx_tail); + while (desc->addr & MACB_BIT(RX_USED)) { p_recv = lp->rx_buffers + lp->rx_tail * AT91ETHER_MAX_RBUFF_SZ; - pktlen = MACB_BF(RX_FRMLEN, lp->rx_ring[lp->rx_tail].ctrl); + pktlen = MACB_BF(RX_FRMLEN, desc->ctrl); skb = netdev_alloc_skb(dev, pktlen + 2); if (skb) { skb_reserve(skb, 2); @@ -2905,17 +2972,19 @@ static void at91ether_rx(struct net_device *dev) lp->stats.rx_dropped++; } - if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH)) + if (desc->ctrl & MACB_BIT(RX_MHASH_MATCH)) lp->stats.multicast++; /* reset ownership bit */ - lp->rx_ring[lp->rx_tail].addr &= ~MACB_BIT(RX_USED); + desc->addr &= ~MACB_BIT(RX_USED); /* wrap after last buffer */ if (lp->rx_tail == AT91ETHER_MAX_RX_DESCR - 1) lp->rx_tail = 0; else lp->rx_tail++; + + desc = macb_rx_desc(lp, lp->rx_tail); } } @@ -3211,8 +3280,11 @@ static int macb_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32) + if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + bp->hw_dma_cap = HW_DMA_CAP_64B; + } else + bp->hw_dma_cap = HW_DMA_CAP_32B; #endif spin_lock_init(&bp->lock); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d67adad67be1..fc8550a5d47f 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -385,6 +385,8 @@ /* Bitfields in DCFG6. */ #define GEM_PBUF_LSO_OFFSET 27 #define GEM_PBUF_LSO_SIZE 1 +#define GEM_DAW64_OFFSET 23 +#define GEM_DAW64_SIZE 1 /* Constants for CLK */ #define MACB_CLK_DIV8 0 @@ -487,12 +489,20 @@ struct macb_dma_desc { u32 addr; u32 ctrl; +}; + #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - u32 addrh; - u32 resvd; -#endif +enum macb_hw_dma_cap { + HW_DMA_CAP_32B, + HW_DMA_CAP_64B, }; +struct macb_dma_desc_64 { + u32 addrh; + u32 resvd; +}; +#endif + /* DMA descriptor bitfields */ #define MACB_RX_USED_OFFSET 0 #define MACB_RX_USED_SIZE 1 @@ -874,6 +884,10 @@ struct macb { unsigned int jumbo_max_len; u32 wol; + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + enum macb_hw_dma_cap hw_dma_cap; +#endif }; static inline bool macb_is_gem(struct macb *bp) -- cgit From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Jan 2017 08:11:44 -0800 Subject: can: Fix kernel panic at security_sock_rcv_skb Zhang Yanmin reported crashes [1] and provided a patch adding a synchronize_rcu() call in can_rx_unregister() The main problem seems that the sockets themselves are not RCU protected. If CAN uses RCU for delivery, then sockets should be freed only after one RCU grace period. Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's ease stable backports with the following fix instead. [1] BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] selinux_socket_sock_rcv_skb+0x65/0x2a0 Call Trace: [] security_sock_rcv_skb+0x4c/0x60 [] sk_filter+0x41/0x210 [] sock_queue_rcv_skb+0x53/0x3a0 [] raw_rcv+0x2a3/0x3c0 [] can_rcv_filter+0x12b/0x370 [] can_receive+0xd9/0x120 [] can_rcv+0xab/0x100 [] __netif_receive_skb_core+0xd8c/0x11f0 [] __netif_receive_skb+0x24/0xb0 [] process_backlog+0x127/0x280 [] net_rx_action+0x33b/0x4f0 [] __do_softirq+0x184/0x440 [] do_softirq_own_stack+0x1c/0x30 [] do_softirq.part.18+0x3b/0x40 [] do_softirq+0x1d/0x20 [] netif_rx_ni+0xe5/0x110 [] slcan_receive_buf+0x507/0x520 [] flush_to_ldisc+0x21c/0x230 [] process_one_work+0x24f/0x670 [] worker_thread+0x9d/0x6f0 [] ? rescuer_thread+0x480/0x480 [] kthread+0x12c/0x150 [] ret_from_fork+0x3f/0x70 Reported-by: Zhang Yanmin Signed-off-by: Eric Dumazet Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/can/core.h | 7 +++---- net/can/af_can.c | 12 ++++++++++-- net/can/af_can.h | 3 ++- net/can/bcm.c | 4 ++-- net/can/gw.c | 2 +- net/can/raw.c | 4 ++-- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/can/core.h b/include/linux/can/core.h index a0875001b13c..df08a41d5be5 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -45,10 +45,9 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -extern int can_rx_register(struct net_device *dev, canid_t can_id, - canid_t mask, - void (*func)(struct sk_buff *, void *), - void *data, char *ident); +int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk); extern void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, diff --git a/net/can/af_can.c b/net/can/af_can.c index 1108079d934f..5488e4a6ccd0 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * @func: callback function on filter match * @data: returned parameter for callback function * @ident: string for calling module identification + * @sk: socket pointer (might be NULL) * * Description: * Invokes the callback function with the received sk_buff and the given @@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, */ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, - char *ident) + char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, r->func = func; r->data = data; r->ident = ident; + r->sk = sk; hlist_add_head_rcu(&r->list, rl); d->entries++; @@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register); static void can_rx_delete_receiver(struct rcu_head *rp) { struct receiver *r = container_of(rp, struct receiver, rcu); + struct sock *sk = r->sk; kmem_cache_free(rcv_cache, r); + if (sk) + sock_put(sk); } /** @@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, spin_unlock(&can_rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) + if (r) { + if (r->sk) + sock_hold(r->sk); call_rcu(&r->rcu, can_rx_delete_receiver); + } } EXPORT_SYMBOL(can_rx_unregister); diff --git a/net/can/af_can.h b/net/can/af_can.h index fca0fe9fc45a..b86f5129e838 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -50,13 +50,14 @@ struct receiver { struct hlist_node list; - struct rcu_head rcu; canid_t can_id; canid_t mask; unsigned long matches; void (*func)(struct sk_buff *, void *); void *data; char *ident; + struct sock *sk; + struct rcu_head rcu; }; #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) diff --git a/net/can/bcm.c b/net/can/bcm.c index 21ac75390e3d..5c9407181918 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1216,7 +1216,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = can_rx_register(dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, - "bcm"); + "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); @@ -1225,7 +1225,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } else err = can_rx_register(NULL, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op, "bcm"); + bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); diff --git a/net/can/gw.c b/net/can/gw.c index a54ab0c82104..7056a1a2bb70 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj) { return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, - gwj, "gw"); + gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) diff --git a/net/can/raw.c b/net/can/raw.c index b075f028d7e2..6dc546a06673 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, for (i = 0; i < count; i++) { err = can_rx_register(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) @@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, if (err_mask) err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); return err; } -- cgit From cf626c3b252b2c9d131be0dd66096ec3bf729e54 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 27 Jan 2017 21:39:03 +0100 Subject: net: phy: micrel: KSZ8795 do not set SUPPORTED_[Asym_]Pause As pr commit "net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause" this phy driver should not set these feature bits. Signed-off-by: Sean Nyekjaer Fixes: 9d162ed69f51 ("net: phy: micrel: add support for KSZ8795") Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e55809c5beb7..6742070ca676 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1012,7 +1012,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ8795, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8795", - .features = (SUPPORTED_Pause | SUPPORTED_Asym_Pause), + .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, -- cgit From d1156b489fa734d1af763d6a07b1637c01bb0aed Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 28 Jan 2017 01:07:30 +0300 Subject: net: adaptec: starfire: add checks for dma mapping errors init_ring(), refill_rx_ring() and start_tx() don't check if mapping dma memory succeed. The patch adds the checks and failure handling. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/adaptec/starfire.c | 45 +++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index c12d2618eebf..3872ab96b80a 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1152,6 +1152,12 @@ static void init_ring(struct net_device *dev) if (skb == NULL) break; np->rx_info[i].mapping = pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(np->pci_dev, + np->rx_info[i].mapping)) { + dev_kfree_skb(skb); + np->rx_info[i].skb = NULL; + break; + } /* Grrr, we cannot offset to correctly align the IP header. */ np->rx_ring[i].rxaddr = cpu_to_dma(np->rx_info[i].mapping | RxDescValid); } @@ -1182,8 +1188,9 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); unsigned int entry; + unsigned int prev_tx; u32 status; - int i; + int i, j; /* * be cautious here, wrapping the queue has weird semantics @@ -1201,6 +1208,7 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) } #endif /* ZEROCOPY && HAS_BROKEN_FIRMWARE */ + prev_tx = np->cur_tx; entry = np->cur_tx % TX_RING_SIZE; for (i = 0; i < skb_num_frags(skb); i++) { int wrap_ring = 0; @@ -1234,6 +1242,11 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) skb_frag_size(this_frag), PCI_DMA_TODEVICE); } + if (pci_dma_mapping_error(np->pci_dev, + np->tx_info[entry].mapping)) { + dev->stats.tx_dropped++; + goto err_out; + } np->tx_ring[entry].addr = cpu_to_dma(np->tx_info[entry].mapping); np->tx_ring[entry].status = cpu_to_le32(status); @@ -1268,8 +1281,30 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); return NETDEV_TX_OK; -} +err_out: + entry = prev_tx % TX_RING_SIZE; + np->tx_info[entry].skb = NULL; + if (i > 0) { + pci_unmap_single(np->pci_dev, + np->tx_info[entry].mapping, + skb_first_frag_len(skb), + PCI_DMA_TODEVICE); + np->tx_info[entry].mapping = 0; + entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE; + for (j = 1; j < i; j++) { + pci_unmap_single(np->pci_dev, + np->tx_info[entry].mapping, + skb_frag_size( + &skb_shinfo(skb)->frags[j-1]), + PCI_DMA_TODEVICE); + entry++; + } + } + dev_kfree_skb_any(skb); + np->cur_tx = prev_tx; + return NETDEV_TX_OK; +} /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ @@ -1569,6 +1604,12 @@ static void refill_rx_ring(struct net_device *dev) break; /* Better luck next round. */ np->rx_info[entry].mapping = pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(np->pci_dev, + np->rx_info[entry].mapping)) { + dev_kfree_skb(skb); + np->rx_info[entry].skb = NULL; + break; + } np->rx_ring[entry].rxaddr = cpu_to_dma(np->rx_info[entry].mapping | RxDescValid); } -- cgit From a0615a16f7d0ceb5804d295203c302d496d8ee91 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Wed, 25 Jan 2017 09:54:33 -0600 Subject: powerpc/mm: Use the correct pointer when setting a 2MB pte When setting a 2MB pte, radix__map_kernel_page() is using the address ptep = (pte_t *)pudp; Fix this conversion to use pmdp instead. Use pmdp_ptep() to do this instead of casting the pointer. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Cc: stable@vger.kernel.org # v4.7+ Reviewed-by: Aneesh Kumar K.V Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfa53ccc8baf..34f1a0dbc898 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -65,7 +65,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); @@ -90,7 +90,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { -- cgit From e82d02580af45663fad6d3596e4344c606e81e10 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 23 Jan 2017 15:15:32 +0800 Subject: pinctrl: berlin-bg4ct: fix the value for "sd1a" of pin SCRD0_CRD_PRES This should be a typo. Signed-off-by: Jisheng Zhang Signed-off-by: Linus Walleij --- drivers/pinctrl/berlin/berlin-bg4ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/berlin/berlin-bg4ct.c b/drivers/pinctrl/berlin/berlin-bg4ct.c index 09172043d589..c617ec49e9ed 100644 --- a/drivers/pinctrl/berlin/berlin-bg4ct.c +++ b/drivers/pinctrl/berlin/berlin-bg4ct.c @@ -217,7 +217,7 @@ static const struct berlin_desc_group berlin4ct_soc_pinctrl_groups[] = { BERLIN_PINCTRL_GROUP("SCRD0_CRD_PRES", 0xc, 0x3, 0x15, BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO20 */ BERLIN_PINCTRL_FUNCTION(0x1, "scrd0"), /* crd pres */ - BERLIN_PINCTRL_FUNCTION(0x1, "sd1a")), /* DAT3 */ + BERLIN_PINCTRL_FUNCTION(0x3, "sd1a")), /* DAT3 */ BERLIN_PINCTRL_GROUP("SPI1_SS0n", 0xc, 0x3, 0x18, BERLIN_PINCTRL_FUNCTION(0x0, "spi1"), /* SS0n */ BERLIN_PINCTRL_FUNCTION(0x1, "gpio"), /* GPIO37 */ -- cgit From 2154d94b40ea2a5de05245521371d0461bb0d669 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 23 Jan 2017 09:21:30 +0100 Subject: pinctrl: sunxi: Don't enforce bias disable (for now) Commit 07fe64ba213f ("pinctrl: sunxi: Handle bias disable") actually enforced enforced the disabling of the pull up/down resistors instead of ignoring it like it was done before. This was part of a wider rework to switch to the generic pinconf bindings, and was meant to be merged together with DT patches that were switching to it, and removing what was considered default values by both the binding and the boards. This included no bias on a pin. However, those DT patches were delayed to 4.11, which would be fine only for a significant number boards having the bias setup wrong, which in turns break the MMC on those boards (and possibly other devices too). In order to avoid conflicts as much as possible, bring back the old behaviour for 4.10, and we'll revert that commit once all the DT bits will have landed. Tested-by: Priit Laes Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 0eb51e33cb1b..207a8de4e1ed 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -564,8 +564,7 @@ static int sunxi_pconf_group_set(struct pinctrl_dev *pctldev, val = arg / 10 - 1; break; case PIN_CONFIG_BIAS_DISABLE: - val = 0; - break; + continue; case PIN_CONFIG_BIAS_PULL_UP: if (arg == 0) return -EINVAL; -- cgit From 19b26d92dfb70f56440c187a20c49102ab648b97 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Jan 2017 17:28:22 +0200 Subject: pinctrl: intel: merrifield: Add missed check in mrfld_config_set() Not every pin can be configured. Add missed check to prevent access violation. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-merrifield.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index b21896126f76..4d4ef42a39b5 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -794,6 +794,9 @@ static int mrfld_config_set(struct pinctrl_dev *pctldev, unsigned int pin, unsigned int i; int ret; + if (!mrfld_buf_available(mp, pin)) + return -ENOTSUPP; + for (i = 0; i < nconfigs; i++) { switch (pinconf_to_config_param(configs[i])) { case PIN_CONFIG_BIAS_DISABLE: -- cgit From 24c2503255d35c269b67162c397a1a1c1e02f6ce Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jan 2017 21:00:48 +0100 Subject: x86/microcode: Do not access the initrd after it has been freed When we look for microcode blobs, we first try builtin and if that doesn't succeed, we fallback to the initrd supplied to the kernel. However, at some point doing boot, that initrd gets jettisoned and we shouldn't access it anymore. But we do, as the below KASAN report shows. That's because find_microcode_in_initrd() doesn't check whether the initrd is still valid or not. So do that. ================================================================== BUG: KASAN: use-after-free in find_cpio_data Read of size 1 by task swapper/1/0 page:ffffea0000db9d40 count:0 mapcount:0 mapping: (null) index:0x1 flags: 0x100000000000000() raw: 0100000000000000 0000000000000000 0000000000000001 00000000ffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.10.0-rc5-debug-00075-g2dbde22 #3 Hardware name: Dell Inc. XPS 13 9360/0839Y6, BIOS 1.2.3 12/01/2016 Call Trace: dump_stack ? _atomic_dec_and_lock ? __dump_page kasan_report_error ? pointer ? find_cpio_data __asan_report_load1_noabort ? find_cpio_data find_cpio_data ? vsprintf ? dump_stack ? get_ucode_user ? print_usage_bug find_microcode_in_initrd __load_ucode_intel ? collect_cpu_info_early ? debug_check_no_locks_freed load_ucode_intel_ap ? collect_cpu_info ? trace_hardirqs_on ? flat_send_IPI_mask_allbutself load_ucode_ap ? get_builtin_firmware ? flush_tlb_func ? do_raw_spin_trylock ? cpumask_weight cpu_init ? trace_hardirqs_off ? play_dead_common ? native_play_dead ? hlt_play_dead ? syscall_init ? arch_cpu_idle_dead ? do_idle start_secondary start_cpu Memory state around the buggy address: ffff880036e74f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff880036e74f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff880036e75000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff880036e75080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff880036e75100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Reported-by: Andrey Ryabinin Tested-by: Andrey Ryabinin Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170126165833.evjemhbqzaepirxo@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 5 +++-- arch/x86/kernel/cpu/microcode/core.c | 22 +++++++++++++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 38711df3bcb5..2266f864b747 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); +extern bool initrd_gone; #else static inline int __init microcode_init(void) { return 0; }; static inline void __init load_ucode_bsp(void) { } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6a31e2691f3a..079e81733a58 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family) reget: if (!get_builtin_microcode(&cp, family)) { #ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); + if (!initrd_gone) + cp = find_cpio_data(ucode_path, (void *)initrd_start, + initrd_end - initrd_start, NULL); #endif if (!(cp.data && cp.size)) { /* diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 2af69d27da62..73102d932760 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -46,6 +46,8 @@ static struct microcode_ops *microcode_ops; static bool dis_ucode_ldr = true; +bool initrd_gone; + LIST_HEAD(microcode_cache); /* @@ -190,21 +192,24 @@ void load_ucode_ap(void) static int __init save_microcode_in_initrd(void) { struct cpuinfo_x86 *c = &boot_cpu_data; + int ret = -EINVAL; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (c->x86 >= 6) - return save_microcode_in_initrd_intel(); + ret = save_microcode_in_initrd_intel(); break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(c->x86); + ret = save_microcode_in_initrd_amd(c->x86); break; default: break; } - return -EINVAL; + initrd_gone = true; + + return ret; } struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) @@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) * has the virtual address of the beginning of the initrd. It also * possibly relocates the ramdisk. In either case, initrd_start contains * the updated address so use that instead. + * + * initrd_gone is for the hotplug case where we've thrown out initrd + * already. */ - if (!use_pa && initrd_start) - start = initrd_start; + if (!use_pa) { + if (initrd_gone) + return (struct cpio_data){ NULL, 0, "" }; + if (initrd_start) + start = initrd_start; + } return find_cpio_data(path, (void *)start, size, NULL); #else /* !CONFIG_BLK_DEV_INITRD */ -- cgit From 4e5b54f127426c82dc2816340c26d951a5bb3429 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 18 Dec 2016 14:35:45 +0100 Subject: drm: prevent double-(un)registration for connectors If we're unlucky then the registration from a hotplugged connector might race with the final registration step on driver load. And since MST topology discover is asynchronous that's even somewhat likely. v2: Also update the kerneldoc for @registered! v3: Review from Chris: - Improve kerneldoc for late_register/early_unregister callbacks. - Use mutex_destroy. Reviewed-by: Chris Wilson Cc: Chris Wilson Reviewed-by: Sean Paul Reported-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161218133545.2106-1-daniel.vetter@ffwll.ch (cherry picked from commit e73ab00e9a0f1731f34d0620a9c55f5c30c4ad4e) --- drivers/gpu/drm/drm_connector.c | 20 +++++++++++++++----- include/drm/drm_connector.h | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 5a4526289392..8be60a4576b3 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -225,6 +225,7 @@ int drm_connector_init(struct drm_device *dev, INIT_LIST_HEAD(&connector->probed_modes); INIT_LIST_HEAD(&connector->modes); + mutex_init(&connector->mutex); connector->edid_blob_ptr = NULL; connector->status = connector_status_unknown; @@ -359,6 +360,8 @@ void drm_connector_cleanup(struct drm_connector *connector) connector->funcs->atomic_destroy_state(connector, connector->state); + mutex_destroy(&connector->mutex); + memset(connector, 0, sizeof(*connector)); } EXPORT_SYMBOL(drm_connector_cleanup); @@ -374,14 +377,15 @@ EXPORT_SYMBOL(drm_connector_cleanup); */ int drm_connector_register(struct drm_connector *connector) { - int ret; + int ret = 0; + mutex_lock(&connector->mutex); if (connector->registered) - return 0; + goto unlock; ret = drm_sysfs_connector_add(connector); if (ret) - return ret; + goto unlock; ret = drm_debugfs_connector_add(connector); if (ret) { @@ -397,12 +401,14 @@ int drm_connector_register(struct drm_connector *connector) drm_mode_object_register(connector->dev, &connector->base); connector->registered = true; - return 0; + goto unlock; err_debugfs: drm_debugfs_connector_remove(connector); err_sysfs: drm_sysfs_connector_remove(connector); +unlock: + mutex_unlock(&connector->mutex); return ret; } EXPORT_SYMBOL(drm_connector_register); @@ -415,8 +421,11 @@ EXPORT_SYMBOL(drm_connector_register); */ void drm_connector_unregister(struct drm_connector *connector) { - if (!connector->registered) + mutex_lock(&connector->mutex); + if (!connector->registered) { + mutex_unlock(&connector->mutex); return; + } if (connector->funcs->early_unregister) connector->funcs->early_unregister(connector); @@ -425,6 +434,7 @@ void drm_connector_unregister(struct drm_connector *connector) drm_debugfs_connector_remove(connector); connector->registered = false; + mutex_unlock(&connector->mutex); } EXPORT_SYMBOL(drm_connector_unregister); diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index a9b95246e26e..045a97cbeba2 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -381,6 +381,8 @@ struct drm_connector_funcs { * core drm connector interfaces. Everything added from this callback * should be unregistered in the early_unregister callback. * + * This is called while holding drm_connector->mutex. + * * Returns: * * 0 on success, or a negative error code on failure. @@ -395,6 +397,8 @@ struct drm_connector_funcs { * late_register(). It is called from drm_connector_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. + * + * This is called while holding drm_connector->mutex. */ void (*early_unregister)(struct drm_connector *connector); @@ -559,7 +563,6 @@ struct drm_cmdline_mode { * @interlace_allowed: can this connector handle interlaced modes? * @doublescan_allowed: can this connector handle doublescan? * @stereo_allowed: can this connector handle stereo modes? - * @registered: is this connector exposed (registered) with userspace? * @modes: modes available on this connector (from fill_modes() + user) * @status: one of the drm_connector_status enums (connected, not, or unknown) * @probed_modes: list of modes derived directly from the display @@ -607,6 +610,13 @@ struct drm_connector { char *name; + /** + * @mutex: Lock for general connector state, but currently only protects + * @registered. Most of the connector state is still protected by the + * mutex in &drm_mode_config. + */ + struct mutex mutex; + /** * @index: Compacted connector index, which matches the position inside * the mode_config.list for drivers not supporting hot-add/removing. Can @@ -620,6 +630,10 @@ struct drm_connector { bool interlace_allowed; bool doublescan_allowed; bool stereo_allowed; + /** + * @registered: Is this connector exposed (registered) with userspace? + * Protected by @mutex. + */ bool registered; struct list_head modes; /* list of modes on this connector */ -- cgit From e6e7b48b295afa5a5ab440de0a94d9ad8b3ce2d0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 12 Jan 2017 17:15:56 +0100 Subject: drm: Don't race connector registration I was under the misconception that the sysfs dev stuff can be fully set up, and then registered all in one step with device_add. That's true for properties and property groups, but not for parents and child devices. Those must be fully registered before you can register a child. Add a bit of tracking to make sure that asynchronous mst connector hotplugging gets this right. For consistency we rely upon the implicit barriers of the connector->mutex, which is taken anyway, to ensure that at least either the connector or device registration call will work out. Mildly tested since I can't reliably reproduce this on my mst box here. Reported-by: Dave Hansen Cc: Dave Hansen Acked-by: Chris Wilson Cc: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1484237756-2720-1-git-send-email-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_connector.c | 3 +++ drivers/gpu/drm/drm_drv.c | 4 ++++ include/drm/drmP.h | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 8be60a4576b3..7a7019ac9388 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -379,6 +379,9 @@ int drm_connector_register(struct drm_connector *connector) { int ret = 0; + if (!connector->dev->registered) + return 0; + mutex_lock(&connector->mutex); if (connector->registered) goto unlock; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a525751b4559..6594b4088f11 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -745,6 +745,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) if (ret) goto err_minors; + dev->registered = true; + if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) @@ -785,6 +787,8 @@ void drm_dev_unregister(struct drm_device *dev) drm_lastclose(dev); + dev->registered = false; + if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_unregister_all(dev); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 192016e2b518..9c4ee144b5f6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -517,6 +517,7 @@ struct drm_device { struct drm_minor *control; /**< Control node */ struct drm_minor *primary; /**< Primary node */ struct drm_minor *render; /**< Render node */ + bool registered; /* currently active master for this device. Protected by master_mutex */ struct drm_master *master; -- cgit From a06393ed03167771246c4c43192d9c264bc48412 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 18 Jan 2017 21:30:51 +0100 Subject: can: bcm: fix hrtimer/tasklet termination in bcm op removal When removing a bcm tx operation either a hrtimer or a tasklet might run. As the hrtimer triggers its associated tasklet and vice versa we need to take care to mutually terminate both handlers. Reported-by: Michael Josenhans Signed-off-by: Oliver Hartkopp Tested-by: Michael Josenhans Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 5c9407181918..95d13b233c65 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -734,14 +734,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, static void bcm_remove_op(struct bcm_op *op) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); - - if (op->tsklet.func) - tasklet_kill(&op->tsklet); + if (op->tsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || + hrtimer_active(&op->timer)) { + hrtimer_cancel(&op->timer); + tasklet_kill(&op->tsklet); + } + } - if (op->thrtsklet.func) - tasklet_kill(&op->thrtsklet); + if (op->thrtsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || + hrtimer_active(&op->thrtimer)) { + hrtimer_cancel(&op->thrtimer); + tasklet_kill(&op->thrtsklet); + } + } if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); -- cgit From a76a82a3e38c8d3fb6499e3dfaeb0949241ab588 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2017 16:39:55 +0100 Subject: perf/core: Fix use-after-free bug Dmitry reported a KASAN use-after-free on event->group_leader. It turns out there's a hole in perf_remove_from_context() due to event_function_call() not calling its function when the task associated with the event is already dead. In this case the event will have been detached from the task, but the grouping will have been retained, such that group operations might still work properly while there are live child events etc. This does however mean that we can miss a perf_group_detach() call when the group decomposes, this in turn can then lead to use-after-free. Fix it by explicitly doing the group detach if its still required. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v4.5+ Cc: syzkaller Fixes: 63b6da39bb38 ("perf: Fix perf_event_exit_task() race") Link: http://lkml.kernel.org/r/20170126153955.GD6515@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 110b38a58493..4e1f4c0070ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1469,7 +1469,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) static void list_add_event(struct perf_event *event, struct perf_event_context *ctx) { - lockdep_assert_held(&ctx->lock); WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); @@ -1624,6 +1623,8 @@ static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double attach due to group movement in perf_event_open. */ @@ -1697,6 +1698,8 @@ static void perf_group_detach(struct perf_event *event) struct perf_event *sibling, *tmp; struct list_head *list = NULL; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1895,9 +1898,29 @@ __perf_remove_from_context(struct perf_event *event, */ static void perf_remove_from_context(struct perf_event *event, unsigned long flags) { - lockdep_assert_held(&event->ctx->mutex); + struct perf_event_context *ctx = event->ctx; + + lockdep_assert_held(&ctx->mutex); event_function_call(event, __perf_remove_from_context, (void *)flags); + + /* + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. + */ + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); + raw_spin_unlock_irq(&ctx->lock); + } } /* -- cgit From 0b3589be9b98994ce3d5aeca52445d1f5627c4ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2017 23:15:08 +0100 Subject: perf/core: Fix PERF_RECORD_MMAP2 prot/flags for anonymous memory Andres reported that MMAP2 records for anonymous memory always have their protection field 0. Turns out, someone daft put the prot/flags generation code in the file branch, leaving them unset for anonymous memory. Reported-by: Andres Freund Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Don Zickus Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@kernel.org Cc: anton@ozlabs.org Cc: namhyung@kernel.org Cc: stable@vger.kernel.org # v3.16+ Fixes: f972eb63b100 ("perf: Pass protection and flags bits through mmap2 interface") Link: http://lkml.kernel.org/r/20170126221508.GF6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4e1f4c0070ce..e5aaa806702d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6632,6 +6632,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; char *name; + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + if (file) { struct inode *inode; dev_t dev; @@ -6658,27 +6679,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) maj = MAJOR(dev); min = MINOR(dev); - if (vma->vm_flags & VM_READ) - prot |= PROT_READ; - if (vma->vm_flags & VM_WRITE) - prot |= PROT_WRITE; - if (vma->vm_flags & VM_EXEC) - prot |= PROT_EXEC; - - if (vma->vm_flags & VM_MAYSHARE) - flags = MAP_SHARED; - else - flags = MAP_PRIVATE; - - if (vma->vm_flags & VM_DENYWRITE) - flags |= MAP_DENYWRITE; - if (vma->vm_flags & VM_MAYEXEC) - flags |= MAP_EXECUTABLE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - if (vma->vm_flags & VM_HUGETLB) - flags |= MAP_HUGETLB; - goto got_name; } else { if (vma->vm_ops && vma->vm_ops->name) { -- cgit From 97a98ae5b8acf08d07d972c087b2def060bc9b73 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Tue, 17 Jan 2017 21:10:11 +0100 Subject: ARM: 8642/1: LPAE: catch pending imprecise abort on unmask Asynchronous external abort is coded differently in DFSR with LPAE enabled. Fixes: 9254970c "ARM: 8447/1: catch pending imprecise abort on unmask". Signed-off-by: Alexander Sverdlin Cc: Russell King Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Russell King --- arch/arm/mm/fault.c | 4 ++-- arch/arm/mm/fault.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 3a2e678b8d30..0122ad1a6027 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -610,9 +610,9 @@ static int __init early_abort_handler(unsigned long addr, unsigned int fsr, void __init early_abt_enable(void) { - fsr_info[22].fn = early_abort_handler; + fsr_info[FSR_FS_AEA].fn = early_abort_handler; local_abt_enable(); - fsr_info[22].fn = do_bad; + fsr_info[FSR_FS_AEA].fn = do_bad; } #ifndef CONFIG_ARM_LPAE diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 67532f242271..afc1f84e763b 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -11,11 +11,15 @@ #define FSR_FS5_0 (0x3f) #ifdef CONFIG_ARM_LPAE +#define FSR_FS_AEA 17 + static inline int fsr_fs(unsigned int fsr) { return fsr & FSR_FS5_0; } #else +#define FSR_FS_AEA 22 + static inline int fsr_fs(unsigned int fsr) { return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; -- cgit From 228dbbfb5d77f8e047b2a1d78da14b7158433027 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 17:11:56 +0100 Subject: ARM: 8643/3: arm/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Cc: # 3.0.x- Fixes: 5be6f62b0059 ("ARM: 6883/1: ptrace: Migrate to regsets framework") Signed-off-by: Dave Martin Acked-by: Russell King Signed-off-by: Russell King --- arch/arm/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ce131ed5939d..ae738a6319f6 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -600,7 +600,7 @@ static int gpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - struct pt_regs newregs; + struct pt_regs newregs = *task_pt_regs(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, -- cgit From ed72b81bb7a49e8bfaa8dd6ab0b0e103ff0771ae Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 Jan 2017 09:41:40 -0200 Subject: [media] cec rst: remove "This API is not yet finalized" notice The API is now finalized, so this notice should be dropped. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/cec/cec-func-close.rst | 5 ----- Documentation/media/uapi/cec/cec-func-ioctl.rst | 5 ----- Documentation/media/uapi/cec/cec-func-open.rst | 5 ----- Documentation/media/uapi/cec/cec-func-poll.rst | 5 ----- Documentation/media/uapi/cec/cec-intro.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-dqevent.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-g-mode.rst | 5 ----- Documentation/media/uapi/cec/cec-ioc-receive.rst | 5 ----- 11 files changed, 55 deletions(-) diff --git a/Documentation/media/uapi/cec/cec-func-close.rst b/Documentation/media/uapi/cec/cec-func-close.rst index 8267c31b317d..895d9c2d1c04 100644 --- a/Documentation/media/uapi/cec/cec-func-close.rst +++ b/Documentation/media/uapi/cec/cec-func-close.rst @@ -33,11 +33,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - Closes the cec device. Resources associated with the file descriptor are freed. The device configuration remain unchanged. diff --git a/Documentation/media/uapi/cec/cec-func-ioctl.rst b/Documentation/media/uapi/cec/cec-func-ioctl.rst index 9e8dbb118d6a..7dcfd178fb24 100644 --- a/Documentation/media/uapi/cec/cec-func-ioctl.rst +++ b/Documentation/media/uapi/cec/cec-func-ioctl.rst @@ -39,11 +39,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - The :c:func:`ioctl()` function manipulates cec device parameters. The argument ``fd`` must be an open file descriptor. diff --git a/Documentation/media/uapi/cec/cec-func-open.rst b/Documentation/media/uapi/cec/cec-func-open.rst index af3f5b5c24c6..0304388cd159 100644 --- a/Documentation/media/uapi/cec/cec-func-open.rst +++ b/Documentation/media/uapi/cec/cec-func-open.rst @@ -46,11 +46,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - To open a cec device applications call :c:func:`open()` with the desired device name. The function has no side effects; the device configuration remain unchanged. diff --git a/Documentation/media/uapi/cec/cec-func-poll.rst b/Documentation/media/uapi/cec/cec-func-poll.rst index cfb73e6027a5..6a863cfda6e0 100644 --- a/Documentation/media/uapi/cec/cec-func-poll.rst +++ b/Documentation/media/uapi/cec/cec-func-poll.rst @@ -39,11 +39,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - With the :c:func:`poll()` function applications can wait for CEC events. diff --git a/Documentation/media/uapi/cec/cec-intro.rst b/Documentation/media/uapi/cec/cec-intro.rst index 4a19ea5323a9..7d31d37b0642 100644 --- a/Documentation/media/uapi/cec/cec-intro.rst +++ b/Documentation/media/uapi/cec/cec-intro.rst @@ -3,11 +3,6 @@ Introduction ============ -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - HDMI connectors provide a single pin for use by the Consumer Electronics Control protocol. This protocol allows different devices connected by an HDMI cable to communicate. The protocol for CEC version 1.4 is defined diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst index 2b0ddb14b280..a0e961f11017 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst @@ -29,11 +29,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - All cec devices must support :ref:`ioctl CEC_ADAP_G_CAPS `. To query device information, applications call the ioctl with a pointer to a struct :c:type:`cec_caps`. The driver fills the structure and diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst index b878637e91b3..09f09bbe28d4 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst @@ -35,11 +35,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - To query the current CEC logical addresses, applications call :ref:`ioctl CEC_ADAP_G_LOG_ADDRS ` with a pointer to a struct :c:type:`cec_log_addrs` where the driver stores the logical addresses. diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst index 3357deb43c85..a3cdc75cec3e 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst @@ -35,11 +35,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - To query the current physical address applications call :ref:`ioctl CEC_ADAP_G_PHYS_ADDR ` with a pointer to a __u16 where the driver stores the physical address. diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index e256c6605de7..6e589a1fae17 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -30,11 +30,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - CEC devices can send asynchronous events. These can be retrieved by calling :c:func:`CEC_DQEVENT`. If the file descriptor is in non-blocking mode and no event is pending, then it will return -1 and diff --git a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst index 4f5818b9d277..e4ded9df0a84 100644 --- a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst +++ b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst @@ -31,11 +31,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - By default any filehandle can use :ref:`CEC_TRANSMIT`, but in order to prevent applications from stepping on each others toes it must be possible to obtain exclusive access to the CEC adapter. This ioctl sets the diff --git a/Documentation/media/uapi/cec/cec-ioc-receive.rst b/Documentation/media/uapi/cec/cec-ioc-receive.rst index bdf015b1d1dc..dc2adb391c0a 100644 --- a/Documentation/media/uapi/cec/cec-ioc-receive.rst +++ b/Documentation/media/uapi/cec/cec-ioc-receive.rst @@ -34,11 +34,6 @@ Arguments Description =========== -.. note:: - - This documents the proposed CEC API. This API is not yet finalized - and is currently only available as a staging kernel module. - To receive a CEC message the application has to fill in the ``timeout`` field of struct :c:type:`cec_msg` and pass it to :ref:`ioctl CEC_RECEIVE `. -- cgit From 8015d6b83cadc8f9f94c7bc8430255090ddf43d4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 Jan 2017 09:54:24 -0200 Subject: [media] cec-intro.rst: mention the v4l-utils package and CEC utilities Mention where to find the CEC utilities. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/cec/cec-intro.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/media/uapi/cec/cec-intro.rst b/Documentation/media/uapi/cec/cec-intro.rst index 7d31d37b0642..07ee2b8f89d6 100644 --- a/Documentation/media/uapi/cec/cec-intro.rst +++ b/Documentation/media/uapi/cec/cec-intro.rst @@ -26,3 +26,15 @@ control just the CEC pin. Drivers that support CEC will create a CEC device node (/dev/cecX) to give userspace access to the CEC adapter. The :ref:`CEC_ADAP_G_CAPS` ioctl will tell userspace what it is allowed to do. + +In order to check the support and test it, it is suggested to download +the `v4l-utils `_ package. It +provides three tools to handle CEC: + +- cec-ctl: the Swiss army knife of CEC. Allows you to configure, transmit + and monitor CEC messages. + +- cec-compliance: does a CEC compliance test of a remote CEC device to + determine how compliant the CEC implementation is. + +- cec-follower: emulates a CEC follower. -- cgit From f9f96fc10c09ca16e336854c08bc1563eed97985 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 10 Jan 2017 09:44:54 -0200 Subject: [media] cec: fix wrong last_la determination Due to an incorrect condition the last_la used for the initial attempt at claiming a logical address could be wrong. The last_la wasn't converted to a mask when ANDing with type2mask, so that test was broken. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index ebb5e391b800..87a6b65ed3af 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1206,7 +1206,7 @@ static int cec_config_thread_func(void *arg) las->log_addr[i] = CEC_LOG_ADDR_INVALID; if (last_la == CEC_LOG_ADDR_INVALID || last_la == CEC_LOG_ADDR_UNREGISTERED || - !(last_la & type2mask[type])) + !((1 << last_la) & type2mask[type])) last_la = la_list[0]; err = cec_config_log_addr(adap, i, last_la); -- cgit From 08d85f3ea99f1eeafc4e8507936190e86a16ee8c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 17 Jan 2017 16:00:48 +0000 Subject: irqdomain: Avoid activating interrupts more than once Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early"), we can end-up activating a PCI/MSI twice (once at allocation time, and once at startup time). This is normally of no consequences, except that there is some HW out there that may misbehave if activate is used more than once (the GICv3 ITS, for example, uses the activate callback to issue the MAPVI command, and the architecture spec says that "If there is an existing mapping for the EventID-DeviceID combination, behavior is UNPREDICTABLE"). While this could be worked around in each individual driver, it may make more sense to tackle the issue at the core level. In order to avoid getting in that situation, let's have a per-interrupt flag to remember if we have already activated that interrupt or not. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Reported-and-tested-by: Andre Przywara Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 17 +++++++++++++++++ kernel/irq/irqdomain.c | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index e79875574b39..39e3254e5769 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -184,6 +184,7 @@ struct irq_data { * * IRQD_TRIGGER_MASK - Mask for the trigger type bits * IRQD_SETAFFINITY_PENDING - Affinity setting is pending + * IRQD_ACTIVATED - Interrupt has already been activated * IRQD_NO_BALANCING - Balancing disabled for this IRQ * IRQD_PER_CPU - Interrupt is per cpu * IRQD_AFFINITY_SET - Interrupt affinity was set @@ -202,6 +203,7 @@ struct irq_data { enum { IRQD_TRIGGER_MASK = 0xf, IRQD_SETAFFINITY_PENDING = (1 << 8), + IRQD_ACTIVATED = (1 << 9), IRQD_NO_BALANCING = (1 << 10), IRQD_PER_CPU = (1 << 11), IRQD_AFFINITY_SET = (1 << 12), @@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED; } +static inline bool irqd_is_activated(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_ACTIVATED; +} + +static inline void irqd_set_activated(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_ACTIVATED; +} + +static inline void irqd_clr_activated(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_ACTIVATED; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c0a0ae43521..b59e6768c5e9 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1346,6 +1346,30 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); +static void __irq_domain_activate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (irq_data->parent_data) + __irq_domain_activate_irq(irq_data->parent_data); + if (domain->ops->activate) + domain->ops->activate(domain, irq_data); + } +} + +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); + if (irq_data->parent_data) + __irq_domain_deactivate_irq(irq_data->parent_data); + } +} + /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt @@ -1356,13 +1380,9 @@ EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); */ void irq_domain_activate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (irq_data->parent_data) - irq_domain_activate_irq(irq_data->parent_data); - if (domain->ops->activate) - domain->ops->activate(domain, irq_data); + if (!irqd_is_activated(irq_data)) { + __irq_domain_activate_irq(irq_data); + irqd_set_activated(irq_data); } } @@ -1376,13 +1396,9 @@ void irq_domain_activate_irq(struct irq_data *irq_data) */ void irq_domain_deactivate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (domain->ops->deactivate) - domain->ops->deactivate(domain, irq_data); - if (irq_data->parent_data) - irq_domain_deactivate_irq(irq_data->parent_data); + if (irqd_is_activated(irq_data)) { + __irq_domain_deactivate_irq(irq_data); + irqd_clr_activated(irq_data); } } -- cgit From 827e1579e1d5cb66e340e7be1944b825b542bbdf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jan 2017 19:24:07 +0200 Subject: pinctrl: baytrail: Rectify debounce support (part 2) The commit 04ff5a095d66 ("pinctrl: baytrail: Rectify debounce support") almost fixes the logic of debuonce but missed couple of things, i.e. typo in mask when disabling debounce and lack of enabling it back. This patch addresses above issues. Reported-by: Jean Delvare Fixes: 04ff5a095d66 ("pinctrl: baytrail: Rectify debounce support") Signed-off-by: Andy Shevchenko Reviewed-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index c123488266ce..3eb7c2bde87e 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1243,10 +1243,12 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, debounce = readl(db_reg); debounce &= ~BYT_DEBOUNCE_PULSE_MASK; + if (arg) + conf |= BYT_DEBOUNCE_EN; + else + conf &= ~BYT_DEBOUNCE_EN; + switch (arg) { - case 0: - conf &= BYT_DEBOUNCE_EN; - break; case 375: debounce |= BYT_DEBOUNCE_PULSE_375US; break; @@ -1269,7 +1271,9 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, debounce |= BYT_DEBOUNCE_PULSE_24MS; break; default: - ret = -EINVAL; + if (arg) + ret = -EINVAL; + break; } if (!ret) -- cgit From 1b89970d81bbd52720fc64a3fe9572ee33588363 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jan 2017 19:24:08 +0200 Subject: pinctrl: baytrail: Debounce register is one per community Debounce value is set globally per community. Otherwise user will easily get a kernel crash when they start using the feature: BUG: unable to handle kernel paging request at ffffc900003be000 IP: byt_gpio_dbg_show+0xa9/0x430 Make it clear in byt_gpio_reg(). Note that this fix just prevents kernel to crash, but doesn't make any difference to the existing logic. It means the last caller will win the trade and debounce value will be configured accordingly. The actual logic fix needs to be thought about and it's not as important as crash fix. That's why the latter goes separately and right now. Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration") Cc: Cristina Ciocan Signed-off-by: Andy Shevchenko Reviewed-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 3eb7c2bde87e..05ba052ff6b9 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -731,16 +731,23 @@ static void __iomem *byt_gpio_reg(struct byt_gpio *vg, unsigned int offset, int reg) { struct byt_community *comm = byt_get_community(vg, offset); - u32 reg_offset = 0; + u32 reg_offset; if (!comm) return NULL; offset -= comm->pin_base; - if (reg == BYT_INT_STAT_REG) + switch (reg) { + case BYT_INT_STAT_REG: reg_offset = (offset / 32) * 4; - else + break; + case BYT_DEBOUNCE_REG: + reg_offset = 0; + break; + default: reg_offset = comm->pad_map[offset] * 16; + break; + } return comm->reg_base + reg_offset + reg; } -- cgit From cdca06e4e85974d8a3503ab15709dbbaf90d3dd1 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2017 12:35:28 +0100 Subject: pinctrl: baytrail: Add missing spinlock usage in byt_gpio_irq_handler According to VLI64 Intel Atom E3800 Specification Update (#329901) concurrent read accesses may result in returning 0xffffffff and write accesses may be dropped silently. To workaround all accesses must be protected by locks. Cc: stable@vger.kernel.org Signed-off-by: Alexander Stein Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 05ba052ff6b9..d94aef17348b 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1623,7 +1623,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) continue; } + raw_spin_lock(&vg->lock); pending = readl(reg); + raw_spin_unlock(&vg->lock); for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); -- cgit From 94842b4fc4d6b1691cfc86c6f5251f299d27f4ba Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Sat, 28 Jan 2017 22:53:28 +0300 Subject: net: ethtool: add support for 2500BaseT and 5000BaseT link modes This patch introduce support for 2500BaseT and 5000BaseT link modes. These modes are included in the new IEEE 802.3bz standard. Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f0db7788f887..3dc91a46e8b8 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1384,6 +1384,8 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit @@ -1393,7 +1395,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + = ETHTOOL_LINK_MODE_5000baseT_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit From 4af0e5bb95ee3ba5ea4bd7dbb94e1648a5279cc9 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Sun, 29 Jan 2017 19:52:20 +0700 Subject: gianfar: synchronize DMA API usage by free_skb_rx_queue w/ gfar_new_page In spite of switching to paged allocation of Rx buffers, the driver still called dma_unmap_single() in the Rx queues tear-down path. The DMA region unmapping code in free_skb_rx_queue() basically predates the introduction of paged allocation to the driver. While being refactored, it apparently hasn't reflected the change in the DMA API usage by its counterpart gfar_new_page(). As a result, setting an interface to the DOWN state now yields the following: # ip link set eth2 down fsl-gianfar ffe24000.ethernet: DMA-API: device driver frees DMA memory with wrong function [device address=0x000000001ecd0000] [size=40] ------------[ cut here ]------------ WARNING: CPU: 1 PID: 189 at lib/dma-debug.c:1123 check_unmap+0x8e0/0xa28 CPU: 1 PID: 189 Comm: ip Tainted: G O 4.9.5 #1 task: dee73400 task.stack: dede2000 NIP: c02101e8 LR: c02101e8 CTR: c0260d74 REGS: dede3bb0 TRAP: 0700 Tainted: G O (4.9.5) MSR: 00021000 CR: 28002222 XER: 00000000 GPR00: c02101e8 dede3c60 dee73400 000000b6 dfbd033c dfbd36c4 1f622000 dede2000 GPR08: 00000007 c05b1634 1f622000 00000000 22002484 100a9904 00000000 00000000 GPR16: 00000000 db4c849c 00000002 db4c8480 00000001 df142240 db4c84bc 00000000 GPR24: c0706148 c0700000 00029000 c07552e8 c07323b4 dede3cb8 c07605e0 db535540 NIP [c02101e8] check_unmap+0x8e0/0xa28 LR [c02101e8] check_unmap+0x8e0/0xa28 Call Trace: [dede3c60] [c02101e8] check_unmap+0x8e0/0xa28 (unreliable) [dede3cb0] [c02103b8] debug_dma_unmap_page+0x88/0x9c [dede3d30] [c02dffbc] free_skb_resources+0x2c4/0x404 [dede3d80] [c02e39b4] gfar_close+0x24/0xc8 [dede3da0] [c0361550] __dev_close_many+0xa0/0xf8 [dede3dd0] [c03616f0] __dev_close+0x2c/0x4c [dede3df0] [c036b1b8] __dev_change_flags+0xa0/0x174 [dede3e10] [c036b2ac] dev_change_flags+0x20/0x60 [dede3e30] [c03e130c] devinet_ioctl+0x540/0x824 [dede3e90] [c0347dcc] sock_ioctl+0x134/0x298 [dede3eb0] [c0111814] do_vfs_ioctl+0xac/0x854 [dede3f20] [c0111ffc] SyS_ioctl+0x40/0x74 [dede3f40] [c000f290] ret_from_syscall+0x0/0x3c --- interrupt: c01 at 0xff45da0 LR = 0xff45cd0 Instruction dump: 811d001c 7c66482e 813d0020 9061000c 807f000c 5463103a 7cc6182e 3c60c052 386309ac 90c10008 4cc63182 4826b845 <0fe00000> 4bfffa60 3c80c052 388402c4 ---[ end trace 695ae6d7ac1d0c47 ]--- Mapped at: [] gfar_alloc_rx_buffs+0x178/0x248 [] startup_gfar+0x368/0x570 [] __dev_open+0xdc/0x150 [] __dev_change_flags+0xa0/0x174 [] dev_change_flags+0x20/0x60 Even though the issue was discovered in 4.9 kernel, the code in question is identical in the current net and net-next trees. Fixes: 75354148ce69 ("gianfar: Add paged allocation and Rx S/G") Signed-off-by: Arseny Solokha Acked-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index c1b671667920..957bfc220978 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2010,8 +2010,8 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue) if (!rxb->page) continue; - dma_unmap_single(rx_queue->dev, rxb->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(rx_queue->dev, rxb->dma, + PAGE_SIZE, DMA_FROM_DEVICE); __free_page(rxb->page); rxb->page = NULL; -- cgit From d585df1c5ccf995fcee910705ad7a9cdd11d4152 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 30 Jan 2017 15:11:45 +0200 Subject: net/mlx4_core: Avoid command timeouts during VF driver device shutdown Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. The result is that the VF driver on the VM will experience a command timeout during the shutdown process when the Hypervisor does not deliver a command-completion event to the VM. To avoid FW command timeouts on the VM when the driver's shutdown method is invoked, we detect the absence of the VF's comm channel at the very start of the shutdown process. If the comm-channel has already been disabled, we cause all FW commands during the device shutdown process to immediately return success (and thus avoid all command timeouts). Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 2 +- drivers/net/ethernet/mellanox/mlx4/intf.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index c7e939945259..53daa6ca5d83 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -158,7 +158,7 @@ static int mlx4_reset_slave(struct mlx4_dev *dev) return -ETIMEDOUT; } -static int mlx4_comm_internal_err(u32 slave_read) +int mlx4_comm_internal_err(u32 slave_read) { return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 0e8b7c44931f..8258d08acd8c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -222,6 +222,18 @@ void mlx4_unregister_device(struct mlx4_dev *dev) return; mlx4_stop_catas_poll(dev); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION && + mlx4_is_slave(dev)) { + /* In mlx4_remove_one on a VF */ + u32 slave_read = + swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read)); + + if (mlx4_comm_internal_err(slave_read)) { + mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n", + __func__); + mlx4_enter_error_state(dev->persist); + } + } mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 88ee7d8a5923..086920b615af 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1220,6 +1220,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); +int mlx4_comm_internal_err(u32 slave_read); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); -- cgit From 2b89ed65a6f201a6a4f0450ad289aa4bf491608c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 29 Jan 2017 22:52:53 -0500 Subject: ipv6: Paritially checksum full MTU frames IPv6 will mark data that is smaller that mtu - headersize as CHECKSUM_PARTIAL, but if the data will completely fill the mtu, the packet checksum will be computed in software instead. Extend the conditional to include the data that fills the mtu as well. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2c0df09e9036..b6a94ff0bbd0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1344,7 +1344,7 @@ emsgsize: */ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && - length < mtu - headersize && + length <= mtu - headersize && !(flags & MSG_MORE) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; -- cgit From e085b9d8fcbd7c8a3175268a16437cf359c0d052 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 27 Jan 2017 15:43:42 +0100 Subject: MAINTAINERS: at91: change email address Following the Microchip / Atmel merger and the unification of internal IT, it's more convenient for me to swith to the microchip.com address. Change all my entries to reflect this. Signed-off-by: Nicolas Ferre Signed-off-by: Alexandre Belloni --- MAINTAINERS | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cfff2c9e3d94..d0f5de961ff5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1091,7 +1091,7 @@ F: arch/arm/boot/dts/aspeed-* F: drivers/*/*aspeed* ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT -M: Nicolas Ferre +M: Nicolas Ferre M: Alexandre Belloni M: Jean-Christophe Plagniol-Villard L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -2178,7 +2178,7 @@ S: Maintained F: drivers/mmc/host/atmel-mci.c ATMEL AT91 SAMA5D2-Compatible Shutdown Controller -M: Nicolas Ferre +M: Nicolas Ferre S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c @@ -2189,7 +2189,7 @@ S: Supported F: drivers/iio/adc/at91-sama5d2_adc.c ATMEL Audio ALSA driver -M: Nicolas Ferre +M: Nicolas Ferre L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/atmel @@ -2223,14 +2223,14 @@ F: drivers/media/platform/soc_camera/atmel-isi.c F: include/media/atmel-isi.h ATMEL LCDFB DRIVER -M: Nicolas Ferre +M: Nicolas Ferre L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/fbdev/atmel_lcdfb.c F: include/video/atmel_lcdc.h ATMEL MACB ETHERNET DRIVER -M: Nicolas Ferre +M: Nicolas Ferre S: Supported F: drivers/net/ethernet/cadence/ @@ -2248,26 +2248,26 @@ S: Supported F: drivers/mmc/host/sdhci-of-at91.c ATMEL SPI DRIVER -M: Nicolas Ferre +M: Nicolas Ferre S: Supported F: drivers/spi/spi-atmel.* ATMEL SSC DRIVER -M: Nicolas Ferre +M: Nicolas Ferre L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: drivers/misc/atmel-ssc.c F: include/linux/atmel-ssc.h ATMEL Timer Counter (TC) AND CLOCKSOURCE DRIVERS -M: Nicolas Ferre +M: Nicolas Ferre L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: drivers/misc/atmel_tclib.c F: drivers/clocksource/tcb_clksrc.c ATMEL USBA UDC DRIVER -M: Nicolas Ferre +M: Nicolas Ferre L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: drivers/usb/gadget/udc/atmel_usba_udc.* -- cgit From 420a3879d694e5c3e734fb92151d19b2ec503e46 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 27 Jan 2017 14:33:44 +0100 Subject: MAINTAINERS: change email address from atmel to microchip Use microchip email address instead of old atmel one. Signed-off-by: Ludovic Desroches Signed-off-by: Alexandre Belloni --- MAINTAINERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d0f5de961ff5..eb9739ec4e92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2173,7 +2173,7 @@ F: include/linux/atm* F: include/uapi/linux/atm* ATMEL AT91 / AT32 MCI DRIVER -M: Ludovic Desroches +M: Ludovic Desroches S: Maintained F: drivers/mmc/host/atmel-mci.c @@ -2183,7 +2183,7 @@ S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c ATMEL SAMA5D2 ADC DRIVER -M: Ludovic Desroches +M: Ludovic Desroches L: linux-iio@vger.kernel.org S: Supported F: drivers/iio/adc/at91-sama5d2_adc.c @@ -2203,20 +2203,20 @@ F: drivers/dma/at_hdmac_regs.h F: include/linux/platform_data/dma-atmel.h ATMEL XDMA DRIVER -M: Ludovic Desroches +M: Ludovic Desroches L: linux-arm-kernel@lists.infradead.org L: dmaengine@vger.kernel.org S: Supported F: drivers/dma/at_xdmac.c ATMEL I2C DRIVER -M: Ludovic Desroches +M: Ludovic Desroches L: linux-i2c@vger.kernel.org S: Supported F: drivers/i2c/busses/i2c-at91.c ATMEL ISI DRIVER -M: Ludovic Desroches +M: Ludovic Desroches L: linux-media@vger.kernel.org S: Supported F: drivers/media/platform/soc_camera/atmel-isi.c @@ -2242,7 +2242,7 @@ S: Supported F: drivers/mtd/nand/atmel_nand* ATMEL SDMMC DRIVER -M: Ludovic Desroches +M: Ludovic Desroches L: linux-mmc@vger.kernel.org S: Supported F: drivers/mmc/host/sdhci-of-at91.c @@ -9710,7 +9710,7 @@ S: Maintained F: drivers/pinctrl/pinctrl-at91.* PIN CONTROLLER - ATMEL AT91 PIO4 -M: Ludovic Desroches +M: Ludovic Desroches L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-gpio@vger.kernel.org S: Supported -- cgit From 040587af31228d82c52267f717c9fcdb65f36335 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 30 Jan 2017 16:19:02 +0100 Subject: net/sched: cls_flower: Correct matching on ICMPv6 code When matching on the ICMPv6 code ICMPV6_CODE rather than ICMPV4_CODE attributes should be used. This corrects what appears to be a typo. Sample usage: tc qdisc add dev eth0 ingress tc filter add dev eth0 protocol ipv6 parent ffff: flower \ indev eth0 ip_proto icmpv6 type 128 code 0 action drop Without this change the code parameter above is effectively ignored. Fixes: 7b684884fbfa ("net/sched: cls_flower: Support matching on ICMP type and code") Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 970db7a41684..5752789acc13 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -568,9 +568,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE_MASK, sizeof(key->icmp.type)); - fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE, + fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code, - TCA_FLOWER_KEY_ICMPV4_CODE_MASK, + TCA_FLOWER_KEY_ICMPV6_CODE_MASK, sizeof(key->icmp.code)); } -- cgit From 7a7dc961a28b965a0d0303c2e989df17b411708b Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 17 Jan 2017 10:59:02 -0500 Subject: sparc64: Zero pages on allocation for mondo and error queues. Error queues use a non-zero first word to detect if the queues are full. Using pages that have not been zeroed may result in false positive overflow events. These queues are set up once during boot so zeroing all mondo and error queue pages is safe. Note that the false positive overflow does not always occur because the page allocation for these queues is so early in the boot cycle that higher number CPUs get fresh pages. It is only when traps are serviced with lower number CPUs who were given already used pages that this issue is exposed. Signed-off-by: Liam R. Howlett Signed-off-by: David S. Miller --- arch/sparc/kernel/irq_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 34a7930b76ef..baed4cdeda75 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -1021,7 +1021,7 @@ static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask) unsigned long order = get_order(size); unsigned long p; - p = __get_free_pages(GFP_KERNEL, order); + p = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!p) { prom_printf("SUN4V: Error, cannot allocate queue.\n"); prom_halt(); -- cgit From 047487241ff59374fded8c477f21453681f5995c Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 17 Jan 2017 10:59:03 -0500 Subject: sparc64: Handle PIO & MEM non-resumable errors. User processes trying to access an invalid memory address via PIO will receive a SIGBUS signal instead of causing a panic. Memory errors will receive a SIGKILL since a SIGBUS may result in a coredump which may attempt to repeat the faulting access. Signed-off-by: Liam R. Howlett Signed-off-by: David S. Miller --- arch/sparc/kernel/traps_64.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 496fa926e1e0..d44fb806bbd7 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2051,6 +2051,73 @@ void sun4v_resum_overflow(struct pt_regs *regs) atomic_inc(&sun4v_resum_oflow_cnt); } +/* Given a set of registers, get the virtual addressi that was being accessed + * by the faulting instructions at tpc. + */ +static unsigned long sun4v_get_vaddr(struct pt_regs *regs) +{ + unsigned int insn; + + if (!copy_from_user(&insn, (void __user *)regs->tpc, 4)) { + return compute_effective_address(regs, insn, + (insn >> 25) & 0x1f); + } + return 0; +} + +/* Attempt to handle non-resumable errors generated from userspace. + * Returns true if the signal was handled, false otherwise. + */ +bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, + struct sun4v_error_entry *ent) { + + unsigned int attrs = ent->err_attrs; + + if (attrs & SUN4V_ERR_ATTRS_MEMORY) { + unsigned long addr = ent->err_raddr; + siginfo_t info; + + if (addr == ~(u64)0) { + /* This seems highly unlikely to ever occur */ + pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory error detected in unknown location!\n"); + } else { + unsigned long page_cnt = DIV_ROUND_UP(ent->err_size, + PAGE_SIZE); + + /* Break the unfortunate news. */ + pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory failed at %016lX\n", + addr); + pr_emerg("SUN4V NON-RECOVERABLE ERROR: Claiming %lu ages.\n", + page_cnt); + + while (page_cnt-- > 0) { + if (pfn_valid(addr >> PAGE_SHIFT)) + get_page(pfn_to_page(addr >> PAGE_SHIFT)); + addr += PAGE_SIZE; + } + } + info.si_signo = SIGKILL; + info.si_errno = 0; + info.si_trapno = 0; + force_sig_info(info.si_signo, &info, current); + + return true; + } + if (attrs & SUN4V_ERR_ATTRS_PIO) { + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)sun4v_get_vaddr(regs); + force_sig_info(info.si_signo, &info, current); + + return true; + } + + /* Default to doing nothing */ + return false; +} + /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. * Log the event, clear the first word of the entry, and die. */ @@ -2075,6 +2142,12 @@ void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset) put_cpu(); + if (!(regs->tstate & TSTATE_PRIV) && + sun4v_nonresum_error_user_handled(regs, &local_copy)) { + /* DON'T PANIC: This userspace error was handled. */ + return; + } + #ifdef CONFIG_PCI /* Check for the special PCI poke sequence. */ if (pci_poke_in_progress && pci_poke_cpu == cpu) { -- cgit From 52f5631a4c056ad01682393be56d2be237e81610 Mon Sep 17 00:00:00 2001 From: Jurij Smakov Date: Mon, 30 Jan 2017 15:41:36 -0600 Subject: rtlwifi: rtl8192ce: Fix loading of incorrect firmware In commit cf4747d7535a ("rtlwifi: Fix regression caused by commit d86e64768859, an error in the edit results in the wrong firmware being loaded for some models of the RTL8188/8192CE. In this condition, the connection suffered from high ping latency, slow transfer rates, and required higher signal strengths to work at all See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=853073, https://bugzilla.opensuse.org/show_bug.cgi?id=1017471, and https://github.com/lwfinger/rtlwifi_new/issues/203 for descriptions of the problems. This patch fixes all of those problems. Fixes: cf4747d7535a ("rtlwifi: Fix regression caused by commit d86e64768859") Signed-off-by: Jurij Smakov Signed-off-by: Larry Finger Cc: Stable # 4.9+ Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 691ddef1ae28..a33a06d58a9a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -92,7 +92,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); - char *fw_name = "rtlwifi/rtl8192cfwU.bin"; + char *fw_name; rtl8192ce_bt_reg_init(hw); @@ -164,8 +164,13 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) } /* request fw */ - if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) + if (IS_VENDOR_UMC_A_CUT(rtlhal->version) && + !IS_92C_SERIAL(rtlhal->version)) + fw_name = "rtlwifi/rtl8192cfwU.bin"; + else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) fw_name = "rtlwifi/rtl8192cfwU_B.bin"; + else + fw_name = "rtlwifi/rtl8192cfw.bin"; rtlpriv->max_fw_size = 0x4000; pr_info("Using firmware %s\n", fw_name); -- cgit From 05e0be7c900797e9164976a6014d534ce3035909 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 31 Jan 2017 00:47:30 -0800 Subject: Input: synaptics-rmi4 - fix reversed conditions in enable/disable_irq_wake These tests are reversed. A warning should be displayed if an error is returned, not on success. Signed-off-by: Christophe JAILLET Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 11447ab1055c..bf5c36e229ba 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -901,7 +901,7 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) data->enabled = true; if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { retval = disable_irq_wake(irq); - if (!retval) + if (retval) dev_warn(&rmi_dev->dev, "Failed to disable irq for wake: %d\n", retval); @@ -936,7 +936,7 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake) disable_irq(irq); if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { retval = enable_irq_wake(irq); - if (!retval) + if (retval) dev_warn(&rmi_dev->dev, "Failed to enable irq for wake: %d\n", retval); -- cgit From 433e19cf33d34bb6751c874a9c00980552fe508c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 28 Jan 2017 11:46:02 -0700 Subject: Drivers: hv: vmbus: finally fix hv_need_to_signal_on_read() Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in hv_need_to_signal_on_read()") added the proper mb(), but removed the test "prev_write_sz < pending_sz" when making the signal decision. As a result, the guest can signal the host unnecessarily, and then the host can throttle the guest because the host thinks the guest is buggy or malicious; finally the user running stress test can perceive intermittent freeze of the guest. This patch brings back the test, and properly handles the in-place consumption APIs used by NetVSC (see get_next_pkt_raw(), put_pkt_raw() and commit_rd_index()). Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in hv_need_to_signal_on_read()") Signed-off-by: Dexuan Cui Reported-by: Rolf Neugebauer Tested-by: Rolf Neugebauer Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 1 + drivers/net/hyperv/netvsc.c | 6 ++++++ include/linux/hyperv.h | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index cd49cb17eb7f..308dbda700eb 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -383,6 +383,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, return ret; } + init_cached_read_index(channel); next_read_location = hv_get_next_read_location(inring_info); next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc, sizeof(desc), diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5a1cc089acb7..86e5749226ef 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1295,6 +1295,9 @@ void netvsc_channel_cb(void *context) ndev = hv_get_drvdata(device); buffer = get_per_channel_state(channel); + /* commit_rd_index() -> hv_signal_on_read() needs this. */ + init_cached_read_index(channel); + do { desc = get_next_pkt_raw(channel); if (desc != NULL) { @@ -1347,6 +1350,9 @@ void netvsc_channel_cb(void *context) bufferlen = bytes_recvd; } + + init_cached_read_index(channel); + } while (1); if (bufferlen > NETVSC_PACKET_SIZE) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 42fe43fb0c80..183efde54269 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -128,6 +128,7 @@ struct hv_ring_buffer_info { u32 ring_data_startoffset; u32 priv_write_index; u32 priv_read_index; + u32 cached_read_index; }; /* @@ -180,6 +181,19 @@ static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi) return write; } +static inline u32 hv_get_cached_bytes_to_write( + const struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, write; + + dsize = rbi->ring_datasize; + read_loc = rbi->cached_read_index; + write_loc = rbi->ring_buffer->write_index; + + write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + return write; +} /* * VMBUS version is 32 bit entity broken up into * two 16 bit quantities: major_number. minor_number. @@ -1488,7 +1502,7 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) static inline void hv_signal_on_read(struct vmbus_channel *channel) { - u32 cur_write_sz; + u32 cur_write_sz, cached_write_sz; u32 pending_sz; struct hv_ring_buffer_info *rbi = &channel->inbound; @@ -1512,12 +1526,24 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) cur_write_sz = hv_get_bytes_to_write(rbi); - if (cur_write_sz >= pending_sz) + if (cur_write_sz < pending_sz) + return; + + cached_write_sz = hv_get_cached_bytes_to_write(rbi); + if (cached_write_sz < pending_sz) vmbus_setevent(channel); return; } +static inline void +init_cached_read_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *rbi = &channel->inbound; + + rbi->cached_read_index = rbi->ring_buffer->read_index; +} + /* * An API to support in-place processing of incoming VMBUS packets. */ @@ -1569,6 +1595,8 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, * This call commits the read index and potentially signals the host. * Here is the pattern for using the "in-place" consumption APIs: * + * init_cached_read_index(); + * * while (get_next_pkt_raw() { * process the packet "in-place"; * put_pkt_raw(); -- cgit From 96692b097ba76d0c637ae8af47b29c73da33c9d0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 14 Dec 2016 09:52:39 +1000 Subject: drm/nouveau/fence/g84-: protect against concurrent access to semaphore buffers Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fence.h | 1 + drivers/gpu/drm/nouveau/nv84_fence.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index ccdce1b4eec4..d5e58a38f160 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -99,6 +99,7 @@ struct nv84_fence_priv { struct nouveau_bo *bo; struct nouveau_bo *bo_gart; u32 *suspend; + struct mutex mutex; }; int nv84_fence_context_new(struct nouveau_channel *); diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 52b87ae83e7b..f0b322bec7df 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -107,8 +107,10 @@ nv84_fence_context_del(struct nouveau_channel *chan) struct nv84_fence_chan *fctx = chan->fence; nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence); + mutex_lock(&priv->mutex); nouveau_bo_vma_del(priv->bo, &fctx->vma_gart); nouveau_bo_vma_del(priv->bo, &fctx->vma); + mutex_unlock(&priv->mutex); nouveau_fence_context_del(&fctx->base); chan->fence = NULL; nouveau_fence_context_free(&fctx->base); @@ -134,11 +136,13 @@ nv84_fence_context_new(struct nouveau_channel *chan) fctx->base.sync32 = nv84_fence_sync32; fctx->base.sequence = nv84_fence_read(chan); + mutex_lock(&priv->mutex); ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma); if (ret == 0) { ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm, &fctx->vma_gart); } + mutex_unlock(&priv->mutex); if (ret) nv84_fence_context_del(chan); @@ -212,6 +216,8 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); priv->base.uevent = true; + mutex_init(&priv->mutex); + /* Use VRAM if there is any ; otherwise fallback to system memory */ domain = drm->device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM : /* -- cgit From c966b6279f610a24ac1d42dcbe30e10fa61220b2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 25 May 2016 17:11:40 +1000 Subject: drm/nouveau: prevent userspace from deleting client object Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_usif.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c index 08f9c6fa0f7f..1fba38622744 100644 --- a/drivers/gpu/drm/nouveau/nouveau_usif.c +++ b/drivers/gpu/drm/nouveau/nouveau_usif.c @@ -313,7 +313,8 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc) if (!(ret = nvif_unpack(-ENOSYS, &data, &size, argv->v0, 0, 0, true))) { /* block access to objects not created via this interface */ owner = argv->v0.owner; - if (argv->v0.object == 0ULL) + if (argv->v0.object == 0ULL && + argv->v0.type != NVIF_IOCTL_V0_DEL) argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */ else argv->v0.owner = NVDRM_OBJECT_USIF; -- cgit From 7dfee6827780d4228148263545af936d0cae8930 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jan 2017 10:22:15 +1000 Subject: drm/nouveau/disp/mcp7x: disable dptmds workaround The workaround appears to cause regressions on these boards, and from inspection of RM traces, NVIDIA don't appear to do it on them either. Signed-off-by: Ben Skeggs Tested-by: Roy Spliet --- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index 567466f93cd5..0db8efbf1c2e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device) case 0x94: case 0x96: case 0x98: - case 0xaa: - case 0xac: return true; default: break; -- cgit From d72498ca2cbcf15e5038b184a95f061bca21e820 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Wed, 7 Dec 2016 06:30:15 +0200 Subject: drm/nouveau/nouveau/led: prevent compiling the led-code if nouveau=y and leds=m The proper fix would have been to select LEDS_CLASS but this can lead to a circular dependency, as found out by Arnd. This patch implements Arnd's suggestion instead, at the cost of some auto-magic for a fringe feature. Reported-by: Arnd Bergmann Reported-by: Intel's 0-DAY Fixes: 8d021d71b324 ("drm/nouveau/drm/nouveau: add a LED driver for the NVIDIA logo") Signed-off-by: Martin Peres Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_led.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_led.h b/drivers/gpu/drm/nouveau/nouveau_led.h index 187ecdb82002..21a5775028cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_led.h +++ b/drivers/gpu/drm/nouveau/nouveau_led.h @@ -42,7 +42,7 @@ nouveau_led(struct drm_device *dev) } /* nouveau_led.c */ -#if IS_ENABLED(CONFIG_LEDS_CLASS) +#if IS_REACHABLE(CONFIG_LEDS_CLASS) int nouveau_led_init(struct drm_device *dev); void nouveau_led_suspend(struct drm_device *dev); void nouveau_led_resume(struct drm_device *dev); -- cgit From d347583a39e2df609a9e40c835f72d3614665b53 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Wed, 11 Jan 2017 15:47:18 -0500 Subject: drm/nouveau/disp/gt215: Fix HDA ELD handling (thus, HDMI audio) on gt215 Store the ELD correctly, not just enough copies of the first byte to pad out the given ELD size. Signed-off-by: Alastair Bridgewater Fixes: 120b0c39c756 ("drm/nv50-/disp: audit and version SOR_HDA_ELD method") Cc: stable@vger.kernel.org # v3.17+ Reviewed-by: Ilia Mirkin Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c index 6f0436df0219..f8f2f16c22a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c @@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1) ); } for (i = 0; i < size; i++) - nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]); + nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]); for (; i < 0x60; i++) nvkm_wr32(device, 0x61c440 + soff, (i << 8)); nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003); -- cgit From 24bf7ae359b8cca165bb30742d2b1c03a1eb23af Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 19 Jan 2017 22:56:30 -0500 Subject: drm/nouveau/nv1a,nv1f/disp: fix memory clock rate retrieval Based on the xf86-video-nv code, NFORCE (NV1A) and NFORCE2 (NV1F) have a different way of retrieving clocks. See the nv_hw.c:nForceUpdateArbitrationSettings function in the original code for how these clocks were accessed. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54587 Cc: stable@vger.kernel.org Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv04/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c index 74856a8b8f35..e64f52464ecf 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/hw.c +++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c @@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t mpllP; pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP); + mpllP = (mpllP >> 8) & 0xf; if (!mpllP) mpllP = 4; @@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t clock; pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock); - return clock; + return clock / 1000; } ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals); -- cgit From 2b5078937355c0d662ecef54b7d4d04f48da4fa9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 24 Jan 2017 09:32:26 +1000 Subject: drm/nouveau/kms/nv50: request vblank events for commits that send completion events This somehow fixes an issue where sync-to-vblank longer works correctly after resume from suspend. From a HW perspective, we don't need the IRQs turned on to be able to detect flip completion, so it's assumed that this is required for the voodoo in the core DRM vblank core. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 2c2c64507661..32097fd615fd 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -4052,6 +4052,11 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) } } + for_each_crtc_in_state(state, crtc, crtc_state, i) { + if (crtc->state->event) + drm_crtc_vblank_get(crtc); + } + /* Update plane(s). */ for_each_plane_in_state(state, plane, plane_state, i) { struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state); @@ -4101,6 +4106,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) drm_crtc_send_vblank_event(crtc, crtc->state->event); spin_unlock_irqrestore(&crtc->dev->event_lock, flags); crtc->state->event = NULL; + drm_crtc_vblank_put(crtc); } } -- cgit From 161e6d44a5e2d3f85365cb717d60e363171b39e6 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Jan 2017 12:23:42 -0200 Subject: mmc: sdhci: Ignore unexpected CARD_INT interrupts One of our kernelCI boxes hanged at boot because a faulty eSDHC device was triggering spurious CARD_INT interrupts for SD cards, causing CMD52 reads, which are not allowed for SD devices. This adds a sanity check to the interruption path, preventing that illegal command from getting sent if the CARD_INT interruption should be disabled. This quirk allows that particular machine to resume boot despite the faulty hardware, instead of getting hung dealing with thousands of mishandled interrupts. Suggested-by: Adrian Hunter Signed-off-by: Gabriel Krisman Bertazi Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: --- drivers/mmc/host/sdhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 23909804ffb8..0def99590d16 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2733,7 +2733,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) if (intmask & SDHCI_INT_RETUNE) mmc_retune_needed(host->mmc); - if (intmask & SDHCI_INT_CARD_INT) { + if ((intmask & SDHCI_INT_CARD_INT) && + (host->ier & SDHCI_INT_CARD_INT)) { sdhci_enable_sdio_irq_nolock(host, false); host->thread_isr |= SDHCI_INT_CARD_INT; result = IRQ_WAKE_THREAD; -- cgit From 81917bad86a66f2bdcb12b4c10ab1bf333ed25ec Mon Sep 17 00:00:00 2001 From: Roger Shimizu Date: Mon, 30 Jan 2017 20:07:29 +0900 Subject: ARM: dts: orion5x-lschl: Fix model name Model name should be consistent with legacy device file, so that user can migrate their system from legacy device support to device-tree safely. Legacy device file is currently removed, but it can be found on 4.8 or previous version of linux: arch/arm/mach-orion5x/ls-chl-setup.c Fixes: f94f268979a2 ("ARM: dts: orion5x: convert ls-chl to FDT") Cc: Ashley Hughes Signed-off-by: Roger Shimizu Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/orion5x-lschl.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/orion5x-lschl.dts b/arch/arm/boot/dts/orion5x-lschl.dts index 947409252845..ea6c881634b9 100644 --- a/arch/arm/boot/dts/orion5x-lschl.dts +++ b/arch/arm/boot/dts/orion5x-lschl.dts @@ -2,7 +2,7 @@ * Device Tree file for Buffalo Linkstation LS-CHLv3 * * Copyright (C) 2016 Ash Hughes - * Copyright (C) 2015, 2016 + * Copyright (C) 2015-2017 * Roger Shimizu * * This file is dual-licensed: you can use it either under the terms @@ -52,7 +52,7 @@ #include / { - model = "Buffalo Linkstation Live v3 (LS-CHL)"; + model = "Buffalo Linkstation LiveV3 (LS-CHL)"; compatible = "buffalo,lschl", "marvell,orion5x-88f5182", "marvell,orion5x"; memory { /* 128 MB */ -- cgit From 6cfd3cd8d8365cf78db1d25cd276d3d900eb8541 Mon Sep 17 00:00:00 2001 From: Roger Shimizu Date: Mon, 30 Jan 2017 20:07:30 +0900 Subject: ARM: dts: orion5x-lschl: More consistent naming on linkstation series DTS files, which includes orion5x-linkstation.dtsi, are named: orion5x-linkstation-*.dts So we rename the file below: arch/arm/boot/dts/orion5x-lschl.dts to the new name: arch/arm/boot/dts/orion5x-linkstation-lschl.dts Because DTS conversion of this device was just introduced in 4.9, Debian is still using legacy device support, other distros are the same, so here we won't expect any impact actually. Fixes: f94f268979a2 ("ARM: dts: orion5x: convert ls-chl to FDT") Cc: Ashley Hughes Signed-off-by: Roger Shimizu Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/orion5x-linkstation-lschl.dts | 171 ++++++++++++++++++++++++ arch/arm/boot/dts/orion5x-lschl.dts | 171 ------------------------ 2 files changed, 171 insertions(+), 171 deletions(-) create mode 100644 arch/arm/boot/dts/orion5x-linkstation-lschl.dts delete mode 100644 arch/arm/boot/dts/orion5x-lschl.dts diff --git a/arch/arm/boot/dts/orion5x-linkstation-lschl.dts b/arch/arm/boot/dts/orion5x-linkstation-lschl.dts new file mode 100644 index 000000000000..ea6c881634b9 --- /dev/null +++ b/arch/arm/boot/dts/orion5x-linkstation-lschl.dts @@ -0,0 +1,171 @@ +/* + * Device Tree file for Buffalo Linkstation LS-CHLv3 + * + * Copyright (C) 2016 Ash Hughes + * Copyright (C) 2015-2017 + * Roger Shimizu + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/dts-v1/; + +#include "orion5x-linkstation.dtsi" +#include "mvebu-linkstation-gpio-simple.dtsi" +#include "mvebu-linkstation-fan.dtsi" +#include + +/ { + model = "Buffalo Linkstation LiveV3 (LS-CHL)"; + compatible = "buffalo,lschl", "marvell,orion5x-88f5182", "marvell,orion5x"; + + memory { /* 128 MB */ + device_type = "memory"; + reg = <0x00000000 0x8000000>; + }; + + gpio_keys { + func { + label = "Function Button"; + linux,code = ; + gpios = <&gpio0 15 GPIO_ACTIVE_LOW>; + }; + + power-on-switch { + gpios = <&gpio0 8 GPIO_ACTIVE_LOW>; + }; + + power-auto-switch { + gpios = <&gpio0 10 GPIO_ACTIVE_LOW>; + }; + }; + + gpio_leds { + pinctrl-0 = <&pmx_led_power &pmx_led_alarm &pmx_led_info &pmx_led_func>; + blue-power-led { + gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; + }; + + red-alarm-led { + gpios = <&gpio0 2 GPIO_ACTIVE_LOW>; + }; + + amber-info-led { + gpios = <&gpio0 3 GPIO_ACTIVE_LOW>; + }; + + func { + label = "lschl:func:blue:top"; + gpios = <&gpio0 17 GPIO_ACTIVE_LOW>; + }; + }; + + gpio_fan { + gpios = <&gpio0 14 GPIO_ACTIVE_LOW + &gpio0 16 GPIO_ACTIVE_LOW>; + + alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; + }; +}; + +&pinctrl { + pmx_led_power: pmx-leds { + marvell,pins = "mpp0"; + marvell,function = "gpio"; + }; + + pmx_power_hdd: pmx-power-hdd { + marvell,pins = "mpp1"; + marvell,function = "gpio"; + }; + + pmx_led_alarm: pmx-leds { + marvell,pins = "mpp2"; + marvell,function = "gpio"; + }; + + pmx_led_info: pmx-leds { + marvell,pins = "mpp3"; + marvell,function = "gpio"; + }; + + pmx_fan_lock: pmx-fan-lock { + marvell,pins = "mpp6"; + marvell,function = "gpio"; + }; + + pmx_power_switch: pmx-power-switch { + marvell,pins = "mpp8", "mpp10", "mpp15"; + marvell,function = "gpio"; + }; + + pmx_power_usb: pmx-power-usb { + marvell,pins = "mpp9"; + marvell,function = "gpio"; + }; + + pmx_fan_high: pmx-fan-high { + marvell,pins = "mpp14"; + marvell,function = "gpio"; + }; + + pmx_fan_low: pmx-fan-low { + marvell,pins = "mpp16"; + marvell,function = "gpio"; + }; + + pmx_led_func: pmx-leds { + marvell,pins = "mpp17"; + marvell,function = "gpio"; + }; + + pmx_sw_init: pmx-sw-init { + marvell,pins = "mpp7"; + marvell,function = "gpio"; + }; +}; + +&hdd_power { + gpios = <&gpio0 1 GPIO_ACTIVE_HIGH>; +}; + +&usb_power { + gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>; +}; + diff --git a/arch/arm/boot/dts/orion5x-lschl.dts b/arch/arm/boot/dts/orion5x-lschl.dts deleted file mode 100644 index ea6c881634b9..000000000000 --- a/arch/arm/boot/dts/orion5x-lschl.dts +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Device Tree file for Buffalo Linkstation LS-CHLv3 - * - * Copyright (C) 2016 Ash Hughes - * Copyright (C) 2015-2017 - * Roger Shimizu - * - * This file is dual-licensed: you can use it either under the terms - * of the GPL or the X11 license, at your option. Note that this dual - * licensing only applies to this file, and not this project as a - * whole. - * - * a) This file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This file is distributed in the hope that it will be useful - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Or, alternatively - * - * b) Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/dts-v1/; - -#include "orion5x-linkstation.dtsi" -#include "mvebu-linkstation-gpio-simple.dtsi" -#include "mvebu-linkstation-fan.dtsi" -#include - -/ { - model = "Buffalo Linkstation LiveV3 (LS-CHL)"; - compatible = "buffalo,lschl", "marvell,orion5x-88f5182", "marvell,orion5x"; - - memory { /* 128 MB */ - device_type = "memory"; - reg = <0x00000000 0x8000000>; - }; - - gpio_keys { - func { - label = "Function Button"; - linux,code = ; - gpios = <&gpio0 15 GPIO_ACTIVE_LOW>; - }; - - power-on-switch { - gpios = <&gpio0 8 GPIO_ACTIVE_LOW>; - }; - - power-auto-switch { - gpios = <&gpio0 10 GPIO_ACTIVE_LOW>; - }; - }; - - gpio_leds { - pinctrl-0 = <&pmx_led_power &pmx_led_alarm &pmx_led_info &pmx_led_func>; - blue-power-led { - gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; - }; - - red-alarm-led { - gpios = <&gpio0 2 GPIO_ACTIVE_LOW>; - }; - - amber-info-led { - gpios = <&gpio0 3 GPIO_ACTIVE_LOW>; - }; - - func { - label = "lschl:func:blue:top"; - gpios = <&gpio0 17 GPIO_ACTIVE_LOW>; - }; - }; - - gpio_fan { - gpios = <&gpio0 14 GPIO_ACTIVE_LOW - &gpio0 16 GPIO_ACTIVE_LOW>; - - alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; - }; -}; - -&pinctrl { - pmx_led_power: pmx-leds { - marvell,pins = "mpp0"; - marvell,function = "gpio"; - }; - - pmx_power_hdd: pmx-power-hdd { - marvell,pins = "mpp1"; - marvell,function = "gpio"; - }; - - pmx_led_alarm: pmx-leds { - marvell,pins = "mpp2"; - marvell,function = "gpio"; - }; - - pmx_led_info: pmx-leds { - marvell,pins = "mpp3"; - marvell,function = "gpio"; - }; - - pmx_fan_lock: pmx-fan-lock { - marvell,pins = "mpp6"; - marvell,function = "gpio"; - }; - - pmx_power_switch: pmx-power-switch { - marvell,pins = "mpp8", "mpp10", "mpp15"; - marvell,function = "gpio"; - }; - - pmx_power_usb: pmx-power-usb { - marvell,pins = "mpp9"; - marvell,function = "gpio"; - }; - - pmx_fan_high: pmx-fan-high { - marvell,pins = "mpp14"; - marvell,function = "gpio"; - }; - - pmx_fan_low: pmx-fan-low { - marvell,pins = "mpp16"; - marvell,function = "gpio"; - }; - - pmx_led_func: pmx-leds { - marvell,pins = "mpp17"; - marvell,function = "gpio"; - }; - - pmx_sw_init: pmx-sw-init { - marvell,pins = "mpp7"; - marvell,function = "gpio"; - }; -}; - -&hdd_power { - gpios = <&gpio0 1 GPIO_ACTIVE_HIGH>; -}; - -&usb_power { - gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>; -}; - -- cgit From 7a7b5df84b6b4e5d599c7289526eed96541a0654 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Jan 2017 11:26:38 +0100 Subject: HID: cp2112: fix sleep-while-atomic A recent commit fixing DMA-buffers on stack added a shared transfer buffer protected by a spinlock. This is broken as the USB HID request callbacks can sleep. Fix this up by replacing the spinlock with a mutex. Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable") Cc: stable # 4.9 Signed-off-by: Johan Hovold Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index f31a778b0851..3e0b6bad29f2 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -168,7 +168,7 @@ struct cp2112_device { atomic_t xfer_avail; struct gpio_chip gc; u8 *in_out_buffer; - spinlock_t lock; + struct mutex lock; struct gpio_desc *desc[8]; bool gpio_poll; @@ -186,10 +186,9 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -213,7 +212,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) ret = 0; exit: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret <= 0 ? ret : -EIO; } @@ -222,10 +221,9 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); buf[0] = CP2112_GPIO_SET; buf[1] = value ? 0xff : 0; @@ -237,7 +235,7 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) if (ret < 0) hid_err(hdev, "error setting GPIO values: %d\n", ret); - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); } static int cp2112_gpio_get_all(struct gpio_chip *chip) @@ -245,10 +243,9 @@ static int cp2112_gpio_get_all(struct gpio_chip *chip) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT, @@ -262,7 +259,7 @@ static int cp2112_gpio_get_all(struct gpio_chip *chip) ret = buf[1]; exit: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret; } @@ -284,10 +281,9 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -308,7 +304,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, goto fail; } - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); /* * Set gpio value when output direction is already set, @@ -319,7 +315,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, return 0; fail: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret < 0 ? ret : -EIO; } @@ -1235,7 +1231,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!dev->in_out_buffer) return -ENOMEM; - spin_lock_init(&dev->lock); + mutex_init(&dev->lock); ret = hid_parse(hdev); if (ret) { -- cgit From 8e9faa15469ed7c7467423db4c62aeed3ff4cae3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Jan 2017 11:26:39 +0100 Subject: HID: cp2112: fix gpio-callback error handling In case of a zero-length report, the gpio direction_input callback would currently return success instead of an errno. Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable") Cc: stable # 4.9 Signed-off-by: Johan Hovold Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 3e0b6bad29f2..b22d0f83f8e3 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -213,7 +213,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) exit: mutex_unlock(&dev->lock); - return ret <= 0 ? ret : -EIO; + return ret < 0 ? ret : -EIO; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -- cgit From 92c715fca907686f5298220ece53423e38ba3aed Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 31 Jan 2017 10:25:25 +0100 Subject: drm/atomic: Fix double free in drm_atomic_state_default_clear drm_atomic_helper_page_flip and drm_atomic_ioctl set their own events in crtc_state->event. But when it's set the event is freed in 2 places. Solve this by only freeing the event in the atomic ioctl when it allocated its own event. This has been broken twice. The first time when the code was introduced, but only in the corner case when an event is allocated, but more crtc's were included by atomic check and then failing. This can mostly happen when you do an atomic modeset in i915 and the display clock is changed, which forces all crtc's to be included to the state. This has been broken worse by adding in-fences support, which caused the double free to be done unconditionally. [IGT] kms_rotation_crc: starting subtest primary-rotation-180 ============================================================================= BUG kmalloc-128 (Tainted: G U ): Object already free ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in drm_atomic_helper_setup_commit+0x285/0x2f0 [drm_kms_helper] age=0 cpu=3 pid=1529 ___slab_alloc+0x308/0x3b0 __slab_alloc+0xd/0x20 kmem_cache_alloc_trace+0x92/0x1c0 drm_atomic_helper_setup_commit+0x285/0x2f0 [drm_kms_helper] intel_atomic_commit+0x35/0x4f0 [i915] drm_atomic_commit+0x46/0x50 [drm] drm_mode_atomic_ioctl+0x7d4/0xab0 [drm] drm_ioctl+0x2b3/0x490 [drm] do_vfs_ioctl+0x69c/0x700 SyS_ioctl+0x4e/0x80 entry_SYSCALL_64_fastpath+0x13/0x94 INFO: Freed in drm_event_cancel_free+0xa3/0xb0 [drm] age=0 cpu=3 pid=1529 __slab_free+0x48/0x2e0 kfree+0x159/0x1a0 drm_event_cancel_free+0xa3/0xb0 [drm] drm_mode_atomic_ioctl+0x86d/0xab0 [drm] drm_ioctl+0x2b3/0x490 [drm] do_vfs_ioctl+0x69c/0x700 SyS_ioctl+0x4e/0x80 entry_SYSCALL_64_fastpath+0x13/0x94 INFO: Slab 0xffffde1f0997b080 objects=17 used=2 fp=0xffff92fb65ec2578 flags=0x200000000008101 INFO: Object 0xffff92fb65ec2578 @offset=1400 fp=0xffff92fb65ec2ae8 Redzone ffff92fb65ec2570: bb bb bb bb bb bb bb bb ........ Object ffff92fb65ec2578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec2588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec2598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec25a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec25b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec25c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec25d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff92fb65ec25e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. Redzone ffff92fb65ec25f8: bb bb bb bb bb bb bb bb ........ Padding ffff92fb65ec2738: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ CPU: 3 PID: 180 Comm: kworker/3:2 Tainted: G BU 4.10.0-rc6-patser+ #5039 Hardware name: /NUC5PPYB, BIOS PYBSWCEL.86A.0031.2015.0601.1712 06/01/2015 Workqueue: events intel_atomic_helper_free_state [i915] Call Trace: dump_stack+0x4d/0x6d print_trailer+0x20c/0x220 free_debug_processing+0x1c6/0x330 ? drm_atomic_state_default_clear+0xf7/0x1c0 [drm] __slab_free+0x48/0x2e0 ? drm_atomic_state_default_clear+0xf7/0x1c0 [drm] kfree+0x159/0x1a0 drm_atomic_state_default_clear+0xf7/0x1c0 [drm] ? drm_atomic_state_clear+0x30/0x30 [drm] intel_atomic_state_clear+0xd/0x20 [i915] drm_atomic_state_clear+0x1a/0x30 [drm] __drm_atomic_state_free+0x13/0x60 [drm] intel_atomic_helper_free_state+0x5d/0x70 [i915] process_one_work+0x260/0x4a0 worker_thread+0x2d1/0x4f0 kthread+0x127/0x130 ? process_one_work+0x4a0/0x4a0 ? kthread_stop+0x120/0x120 ret_from_fork+0x29/0x40 FIX kmalloc-128: Object at 0xffff92fb65ec2578 not freed Fixes: 3b24f7d67581 ("drm/atomic: Add struct drm_crtc_commit to track async updates") Fixes: 9626014258a5 ("drm/fence: add in-fences support") Cc: # v4.8+ Cc: Daniel Vetter Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Reviewed-by: Gustavo Padovan Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1485854725-27640-1-git-send-email-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_atomic.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 50f5cf7b69d1..fdfb1ec17e66 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -2032,13 +2032,16 @@ static void complete_crtc_signaling(struct drm_device *dev, } for_each_crtc_in_state(state, crtc, crtc_state, i) { + struct drm_pending_vblank_event *event = crtc_state->event; /* - * TEST_ONLY and PAGE_FLIP_EVENT are mutually - * exclusive, if they weren't, this code should be - * called on success for TEST_ONLY too. + * Free the allocated event. drm_atomic_helper_setup_commit + * can allocate an event too, so only free it if it's ours + * to prevent a double free in drm_atomic_state_clear. */ - if (crtc_state->event) - drm_event_cancel_free(dev, &crtc_state->event->base); + if (event && (event->base.fence || event->base.file_priv)) { + drm_event_cancel_free(dev, &event->base); + crtc_state->event = NULL; + } } if (!fence_state) -- cgit From 79c6f448c8b79c321e4a1f31f98194e4f6b6cae7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Jan 2017 19:27:10 -0500 Subject: tracing: Fix hwlat kthread migration The hwlat tracer creates a kernel thread at start of the tracer. It is pinned to a single CPU and will move to the next CPU after each period of running. If the user modifies the migration thread's affinity, it will not change after that happens. The original code created the thread at the first instance it was called, but later was changed to destroy the thread after the tracer was finished, and would not be created until the next instance of the tracer was established. The code that initialized the affinity was only called on the initial instantiation of the tracer. After that, it was not initialized, and the previous affinity did not match the current newly created one, making it appear that the user modified the thread's affinity when it did not, and the thread failed to migrate again. Cc: stable@vger.kernel.org Fixes: 0330f7aa8ee6 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 775569ec50d0..af344a1bf0d0 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -266,7 +266,7 @@ out: static struct cpumask save_cpumask; static bool disable_migrate; -static void move_to_next_cpu(void) +static void move_to_next_cpu(bool initmask) { static struct cpumask *current_mask; int next_cpu; @@ -275,7 +275,7 @@ static void move_to_next_cpu(void) return; /* Just pick the first CPU on first iteration */ - if (!current_mask) { + if (initmask) { current_mask = &save_cpumask; get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); @@ -330,10 +330,12 @@ static void move_to_next_cpu(void) static int kthread_fn(void *data) { u64 interval; + bool initmask = true; while (!kthread_should_stop()) { - move_to_next_cpu(); + move_to_next_cpu(initmask); + initmask = false; local_irq_disable(); get_sample(); -- cgit From d07830db1bdb254e4b50d366010b219286b8c937 Mon Sep 17 00:00:00 2001 From: "Marcel J.E. Mol" Date: Mon, 30 Jan 2017 19:26:40 +0100 Subject: USB: serial: pl2303: add ATEN device ID Seems that ATEN serial-to-usb devices using pl2303 exist with different device ids. This patch adds a missing device ID so it is recognised by the driver. Signed-off-by: Marcel J.E. Mol Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 46fca6b75846..1db4b61bdf7b 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID_UCSGT) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index e3b7af8adfb7..09d9be88209e 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 -- cgit From d19fb70dd68c4e960e2ac09b0b9c79dfdeefa726 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 18 Jan 2017 19:04:42 +0800 Subject: NFSD: Fix a null reference case in find_or_create_lock_stateid() nfsd assigns the nfs4_free_lock_stateid to .sc_free in init_lock_stateid(). If nfsd doesn't go through init_lock_stateid() and put stateid at end, there is a NULL reference to .sc_free when calling nfs4_put_stid(ns). This patch let the nfs4_stid.sc_free assignment to nfs4_alloc_stid(). Cc: stable@vger.kernel.org Fixes: 356a95ece7aa "nfsd: clean up races in lock stateid searching..." Signed-off-by: Kinglong Mee Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 5 +++-- fs/nfsd/nfs4state.c | 19 ++++++++----------- fs/nfsd/state.h | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 596205d939a1..1fc07a9c70e9 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_layout_stateid *ls; struct nfs4_stid *stp; - stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); + stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache, + nfsd4_free_layout_stateid); if (!stp) return NULL; - stp->sc_free = nfsd4_free_layout_stateid; + get_nfs4_file(fp); stp->sc_file = fp; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b4beaaa4eaa..a0dee8ae9f97 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,8 +633,8 @@ out: return co; } -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab) +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; @@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, idr_preload_end(); if (new_id < 0) goto out_free; + + stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; @@ -675,15 +677,12 @@ out_free: static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; - struct nfs4_ol_stateid *stp; - stid = nfs4_alloc_stid(clp, stateid_slab); + stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; - stp = openlockstateid(stid); - stp->st_stid.sc_free = nfs4_free_ol_stateid; - return stp; + return openlockstateid(stid); } static void nfs4_free_deleg(struct nfs4_stid *stid) @@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) goto out_dec; - dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; - stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, lst = find_lock_stateid(lo, fi); if (lst == NULL) { spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab); + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c9399366f9df..4516e8b7d776 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab); +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); -- cgit From 41f53350a0f36a7b8e31bec0d0ca907e028ab4cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Jan 2017 09:22:41 +0100 Subject: nfsd: special case truncates some more Both the NFS protocols and the Linux VFS use a setattr operation with a bitmap of attributs to set to set various file attributes including the file size and the uid/gid. The Linux syscalls never mixes size updates with unrelated updates like the uid/gid, and some file systems like XFS and GFS2 rely on the fact that truncates might not update random other attributes, and many other file systems handle the case but do not update the different attributes in the same transaction. NFSD on the other hand passes the attributes it gets on the wire more or less directly through to the VFS, leading to updates the file systems don't expect. XFS at least has an assert on the allowed attributes, which caught an unusual NFS client setting the size and group at the same time. To handle this issue properly this switches nfsd to call vfs_truncate for size changes, and then handle all other attributes through notify_change. As a side effect this also means less boilerplace code around the size change as we can now reuse the VFS code. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 97 +++++++++++++++++++++++------------------------------------ 1 file changed, 37 insertions(+), 60 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 26c6fdb4bf67..ca13236dbb1f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -332,37 +332,6 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) } } -static __be32 -nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct iattr *iap) -{ - struct inode *inode = d_inode(fhp->fh_dentry); - int host_err; - - if (iap->ia_size < inode->i_size) { - __be32 err; - - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); - if (err) - return err; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserrno; - - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) - goto out_put_write_access; - return 0; - -out_put_write_access: - put_write_access(inode); -out_nfserrno: - return nfserrno(host_err); -} - /* * Set various file attributes. After this call fhp needs an fh_put. */ @@ -377,7 +346,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; - int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -390,11 +358,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - goto out; + return err; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - return nfserrno(host_err); + goto out_host_err; } dentry = fhp->fh_dentry; @@ -405,50 +373,59 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - goto out; + return 0; nfsd_sanitize_attrs(inode, iap); + if (check_guard && guardtime != inode->i_ctime.tv_sec) + return nfserr_notsync; + /* * The size case is special, it changes the file in addition to the - * attributes. + * attributes, and file systems don't expect it to be mixed with + * "random" attribute changes. We thus split out the size change + * into a separate call for vfs_truncate, and do the rest as a + * a separate setattr call. */ if (iap->ia_valid & ATTR_SIZE) { - err = nfsd_get_write_access(rqstp, fhp, iap); - if (err) - goto out; - size_change = 1; + struct path path = { + .mnt = fhp->fh_export->ex_path.mnt, + .dentry = dentry, + }; + bool implicit_mtime = false; /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) + * vfs_truncate implicity updates the mtime IFF the file size + * actually changes. Avoid the additional seattr call below if + * the only other attribute that the client sends is the mtime. */ - if (iap->ia_size != i_size_read(inode)) - iap->ia_valid |= ATTR_MTIME; - } + if (iap->ia_size != i_size_read(inode) && + ((iap->ia_valid & ~(ATTR_SIZE | ATTR_MTIME)) == 0)) + implicit_mtime = true; - iap->ia_valid |= ATTR_CTIME; + host_err = vfs_truncate(&path, iap->ia_size); + if (host_err) + goto out_host_err; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; + iap->ia_valid &= ~ATTR_SIZE; + if (implicit_mtime) + iap->ia_valid &= ~ATTR_MTIME; + if (!iap->ia_valid) + goto done; } + iap->ia_valid |= ATTR_CTIME; + fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); - err = nfserrno(host_err); + if (host_err) + goto out_host_err; -out_put_write_access: - if (size_change) - put_write_access(inode); - if (!err) - err = nfserrno(commit_metadata(fhp)); -out: - return err; +done: + host_err = commit_metadata(fhp); +out_host_err: + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) -- cgit From 034dd34ff4916ec1f8f74e39ca3efb04eab2f791 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 Jan 2017 11:37:50 -0500 Subject: svcrpc: fix oops in absence of krb5 module Olga Kornievskaia says: "I ran into this oops in the nfsd (below) (4.10-rc3 kernel). To trigger this I had a client (unsuccessfully) try to mount the server with krb5 where the server doesn't have the rpcsec_gss_krb5 module built." The problem is that rsci.cred is copied from a svc_cred structure that gss_proxy didn't properly initialize. Fix that. [120408.542387] general protection fault: 0000 [#1] SMP ... [120408.565724] CPU: 0 PID: 3601 Comm: nfsd Not tainted 4.10.0-rc3+ #16 [120408.567037] Hardware name: VMware, Inc. VMware Virtual = Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [120408.569225] task: ffff8800776f95c0 task.stack: ffffc90003d58000 [120408.570483] RIP: 0010:gss_mech_put+0xb/0x20 [auth_rpcgss] ... [120408.584946] ? rsc_free+0x55/0x90 [auth_rpcgss] [120408.585901] gss_proxy_save_rsc+0xb2/0x2a0 [auth_rpcgss] [120408.587017] svcauth_gss_proxy_init+0x3cc/0x520 [auth_rpcgss] [120408.588257] ? __enqueue_entity+0x6c/0x70 [120408.589101] svcauth_gss_accept+0x391/0xb90 [auth_rpcgss] [120408.590212] ? try_to_wake_up+0x4a/0x360 [120408.591036] ? wake_up_process+0x15/0x20 [120408.592093] ? svc_xprt_do_enqueue+0x12e/0x2d0 [sunrpc] [120408.593177] svc_authenticate+0xe1/0x100 [sunrpc] [120408.594168] svc_process_common+0x203/0x710 [sunrpc] [120408.595220] svc_process+0x105/0x1c0 [sunrpc] [120408.596278] nfsd+0xe9/0x160 [nfsd] [120408.597060] kthread+0x101/0x140 [120408.597734] ? nfsd_destroy+0x60/0x60 [nfsd] [120408.598626] ? kthread_park+0x90/0x90 [120408.599448] ret_from_fork+0x22/0x30 Fixes: 1d658336b05f "SUNRPC: Add RPC based upcall mechanism for RPCGSS auth" Cc: stable@vger.kernel.org Cc: Simo Sorce Reported-by: Olga Kornievskaia Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index dc6fb79a361f..25d9a9cf7b66 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; -- cgit From c73e44269369e936165f0f9b61f1f09a11dae01c Mon Sep 17 00:00:00 2001 From: Vincent Date: Mon, 30 Jan 2017 15:06:43 +0100 Subject: net: thunderx: avoid dereferencing xcv when NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following smatch and coccinelle warnings: drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119 xcv_setup_link() error: we previously assumed 'xcv' could be null (see line 118) [smatch] drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119:16-20: ERROR: xcv is NULL but dereferenced. [coccinelle] Fixes: 6465859aba1e66a5 ("net: thunderx: Add RGMII interface type support") Signed-off-by: Vincent Stehlé Cc: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_xcv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c index 67befedef709..578c7f8f11bf 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -116,8 +116,7 @@ void xcv_setup_link(bool link_up, int link_speed) int speed = 2; if (!xcv) { - dev_err(&xcv->pdev->dev, - "XCV init not done, probe may have failed\n"); + pr_err("XCV init not done, probe may have failed\n"); return; } -- cgit From 90427ef5d2a4b9a24079889bf16afdcdaebc4240 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Mon, 30 Jan 2017 14:09:42 -0800 Subject: ipv6: fix flow labels when the traffic class is non-0 ip6_make_flowlabel() determines the flow label for IPv6 packets. It's supposed to be passed a flow label, which it returns as is if non-0 and in some other cases, otherwise it calculates a new value. The problem is callers often pass a flowi6.flowlabel, which may also contain traffic class bits. If the traffic class is non-0 ip6_make_flowlabel() mistakes the non-0 it gets as a flow label and returns the whole thing. Thus it can return a 'flow label' longer than 20b and the low 20b of that is typically 0 resulting in packets with 0 label. Moreover, different packets of a flow may be labeled differently. For a TCP flow with ECN non-payload and payload packets get different labels as exemplified by this pair of consecutive packets: (pure ACK) Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2:: 0110 .... = Version: 6 .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) .... .... .... 0001 1100 1110 0100 1001 = Flow Label: 0x1ce49 Payload Length: 32 Next Header: TCP (6) (payload) Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2:: 0110 .... = Version: 6 .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0)) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2) .... .... .... 0000 0000 0000 0000 0000 = Flow Label: 0x00000 Payload Length: 688 Next Header: TCP (6) This patch allows ip6_make_flowlabel() to be passed more than just a flow label and has it extract the part it really wants. This was simpler than modifying the callers. With this patch packets like the above become Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2:: 0110 .... = Version: 6 .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e Payload Length: 32 Next Header: TCP (6) Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2:: 0110 .... = Version: 6 .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0)) .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2) .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e Payload Length: 688 Next Header: TCP (6) Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipv6.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7afe991e900e..dbf0abba33b8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,6 +776,11 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, { u32 hash; + /* @flowlabel may include more than a flow label, eg, the traffic class. + * Here we want only the flow label value. + */ + flowlabel &= IPV6_FLOWLABEL_MASK; + if (flowlabel || net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || (!autolabel && -- cgit From 62deb8187d116581c88c69a2dd9b5c16588545d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Jan 2017 14:29:17 +0000 Subject: FS-Cache: Initialise stores_lock in netfs cookie Initialise the stores_lock in fscache netfs cookies. Technically, it shouldn't be necessary, since the netfs cookie is an index and stores no data, but initialising it anyway adds insignificant overhead. Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Steve Dickson Signed-off-by: Al Viro --- fs/fscache/netfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 9b28649df3a1..a8aa00be4444 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ -- cgit From 6bdded59c8933940ac7e5b416448276ac89d1144 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Jan 2017 14:29:25 +0000 Subject: fscache: Clear outstanding writes when disabling a cookie fscache_disable_cookie() needs to clear the outstanding writes on the cookie it's disabling because they cannot be completed after. Without this, fscache_nfs_open_file() gets stuck because it disables the cookie when the file is opened for writing but can't uncache the pages till afterwards - otherwise there's a race between the open routine and anyone who already has it open R/O and is still reading from it. Looking in /proc/pid/stack of the offending process shows: [] __fscache_wait_on_page_write+0x82/0x9b [fscache] [] __fscache_uncache_all_inode_pages+0x91/0xe1 [fscache] [] nfs_fscache_open_file+0x59/0x9e [nfs] [] nfs4_file_open+0x17f/0x1b8 [nfsv4] [] do_dentry_open+0x16d/0x2b7 [] vfs_open+0x5c/0x65 [] path_openat+0x785/0x8fb [] do_filp_open+0x48/0x9e [] do_sys_open+0x13b/0x1cb [] SyS_open+0x19/0x1b [] do_syscall_64+0x80/0x17a [] return_from_SYSCALL_64+0x0/0x7a [] 0xffffffffffffffff Reported-by: Jianhong Yin Signed-off-by: David Howells Acked-by: Jeff Layton Acked-by: Steve Dickson Signed-off-by: Al Viro --- fs/fscache/cookie.c | 5 +++++ fs/fscache/object.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 4304072161aa..40d61077bead 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (invalidate) set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); } } else { @@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, TASK_UNINTERRUPTIBLE); + /* Make sure any pending writes are cancelled. */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + fscache_invalidate_writes(cookie); + /* Reset the cookie state if it wasn't relinquished */ if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { atomic_inc(&cookie->n_active); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 9e792e30f4db..be02a086ed9b 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -645,6 +645,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob fscache_mark_object_dead(object); object->oob_event_mask = 0; + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) { + /* Reject any new read/write ops and abort any that are pending. */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + } + if (list_empty(&object->dependents) && object->n_ops == 0 && object->n_children == 0) -- cgit From e26bfebdfc0d212d366de9990a096665d5c0209a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2017 09:45:28 +0000 Subject: fscache: Fix dead object requeue Under some circumstances, an fscache object can become queued such that it fscache_object_work_func() can be called once the object is in the OBJECT_DEAD state. This results in the kernel oopsing when it tries to invoke the handler for the state (which is hard coded to 0x2). The way this comes about is something like the following: (1) The object dispatcher is processing a work state for an object. This is done in workqueue context. (2) An out-of-band event comes in that isn't masked, causing the object to be queued, say EV_KILL. (3) The object dispatcher finishes processing the current work state on that object and then sees there's another event to process, so, without returning to the workqueue core, it processes that event too. It then follows the chain of events that initiates until we reach OBJECT_DEAD without going through a wait state (such as WAIT_FOR_CLEARANCE). At this point, object->events may be 0, object->event_mask will be 0 and oob_event_mask will be 0. (4) The object dispatcher returns to the workqueue processor, and in due course, this sees that the object's work item is still queued and invokes it again. (5) The current state is a work state (OBJECT_DEAD), so the dispatcher jumps to it - resulting in an OOPS. When I'm seeing this, the work state in (1) appears to have been either LOOK_UP_OBJECT or CREATE_OBJECT (object->oob_table is fscache_osm_lookup_oob). The window for (2) is very small: (A) object->event_mask is cleared whilst the event dispatch process is underway - though there's no memory barrier to force this to the top of the function. The window, therefore is from the time the object was selected by the workqueue processor and made requeueable to the time the mask was cleared. (B) fscache_raise_event() will only queue the object if it manages to set the event bit and the corresponding event_mask bit was set. The enqueuement is then deferred slightly whilst we get a ref on the object and get the per-CPU variable for workqueue congestion. This slight deferral slightly increases the probability by allowing extra time for the workqueue to make the item requeueable. Handle this by giving the dead state a processor function and checking the for the dead state address rather than seeing if the processor function is address 0x2. The dead state processor function can then set a flag to indicate that it's occurred and give a warning if it occurs more than once per object. If this race occurs, an oops similar to the following is seen (note the RIP value): BUG: unable to handle kernel NULL pointer dereference at 0000000000000002 IP: [<0000000000000002>] 0x1 PGD 0 Oops: 0010 [#1] SMP Modules linked in: ... CPU: 17 PID: 16077 Comm: kworker/u48:9 Not tainted 3.10.0-327.18.2.el7.x86_64 #1 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015 Workqueue: fscache_object fscache_object_work_func [fscache] task: ffff880302b63980 ti: ffff880717544000 task.ti: ffff880717544000 RIP: 0010:[<0000000000000002>] [<0000000000000002>] 0x1 RSP: 0018:ffff880717547df8 EFLAGS: 00010202 RAX: ffffffffa0368640 RBX: ffff880edf7a4480 RCX: dead000000200200 RDX: 0000000000000002 RSI: 00000000ffffffff RDI: ffff880edf7a4480 RBP: ffff880717547e18 R08: 0000000000000000 R09: dfc40a25cb3a4510 R10: dfc40a25cb3a4510 R11: 0000000000000400 R12: 0000000000000000 R13: ffff880edf7a4510 R14: ffff8817f6153400 R15: 0000000000000600 FS: 0000000000000000(0000) GS:ffff88181f420000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000002 CR3: 000000000194a000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0363695 ffff880edf7a4510 ffff88093f16f900 ffff8817faa4ec00 ffff880717547e60 ffffffff8109d5db 00000000faa4ec18 0000000000000000 ffff8817faa4ec18 ffff88093f16f930 ffff880302b63980 ffff88093f16f900 Call Trace: [] ? fscache_object_work_func+0xa5/0x200 [fscache] [] process_one_work+0x17b/0x470 [] worker_thread+0x21c/0x400 [] ? rescuer_thread+0x400/0x400 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Signed-off-by: David Howells Acked-by: Jeremy McNicoll Tested-by: Frank Sorenson Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington Signed-off-by: Al Viro --- fs/fscache/object.c | 26 ++++++++++++++++++++++++-- include/linux/fscache-cache.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index be02a086ed9b..7a182c87f378 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object static const struct fscache_state *fscache_object_available(struct fscache_object *, int); static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); static const struct fscache_state *fscache_update_object(struct fscache_object *, int); +static const struct fscache_state *fscache_object_dead(struct fscache_object *, int); #define __STATE_NAME(n) fscache_osm_##n #define STATE(n) (&__STATE_NAME(n)) @@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); -static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); +static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead); static WAIT_STATE(WAIT_FOR_INIT, "?INI", TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); @@ -229,6 +230,10 @@ execute_work_state: event = -1; if (new_state == NO_TRANSIT) { _debug("{OBJ%x} %s notrans", object->debug_id, state->name); + if (unlikely(state == STATE(OBJECT_DEAD))) { + _leave(" [dead]"); + return; + } fscache_enqueue_object(object); event_mask = object->oob_event_mask; goto unmask_events; @@ -239,7 +244,7 @@ execute_work_state: object->state = state = new_state; if (state->work) { - if (unlikely(state->work == ((void *)2UL))) { + if (unlikely(state == STATE(OBJECT_DEAD))) { _leave(" [dead]"); return; } @@ -1083,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object, } } EXPORT_SYMBOL(fscache_object_mark_killed); + +/* + * The object is dead. We can get here if an object gets queued by an event + * that would lead to its death (such as EV_KILL) when the dispatcher is + * already running (and so can be requeued) but hasn't yet cleared the event + * mask. + */ +static const struct fscache_state *fscache_object_dead(struct fscache_object *object, + int event) +{ + if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD, + &object->flags)) + return NO_TRANSIT; + + WARN(true, "FS-Cache object redispatched after death"); + return NO_TRANSIT; +} diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 13ba552e6c09..4c467ef50159 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -360,6 +360,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ #define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ +#define FSCACHE_OBJECT_RUN_AFTER_DEAD 8 /* T if object has been dispatched after death */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ -- cgit From aaaec6fc755447a1d056765b11b24d8ff2b81366 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 19:03:21 +0100 Subject: x86/irq: Make irq activate operations symmetric The recent commit which prevents double activation of interrupts unearthed interesting code in x86. The code (ab)uses irq_domain_activate_irq() to reconfigure an already activated interrupt. That trips over the prevention code now. Fix it by deactivating the interrupt before activating the new configuration. Fixes: 08d85f3ea99f1 "irqdomain: Avoid activating interrupts more than once" Reported-and-tested-by: Mike Galbraith Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andrey Ryabinin Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701311901580.3457@nanos --- arch/x86/kernel/apic/io_apic.c | 2 ++ arch/x86/kernel/hpet.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1e35dd06b090..52f352b063fd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2117,6 +2117,7 @@ static inline void __init check_timer(void) if (idx != -1 && irq_trigger(idx)) unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) @@ -2138,6 +2139,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 85e87b46c318..dc6ba5bda9fc 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); -- cgit From 0becc0ae5b42828785b589f686725ff5bc3b9b25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 09:37:34 +0100 Subject: x86/mce: Make timer handling more robust Erik reported that on a preproduction hardware a CMCI storm triggers the BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is started by the CMCI logic before the MCE CPU hotplug callback starts the timer with add_timer_on(). So the timer is already queued which triggers the BUG. Using add_timer_on() is pretty pointless in this code because the timer is strictlty per CPU, initialized as pinned and all operations which arm the timer happen on the CPU to which the timer belongs. Simplify the whole machinery by using mod_timer() instead of add_timer_on() which avoids the problem because mod_timer() can handle already queued timers. Use __start_timer() everywhere so the earliest armed expiry time is preserved. Reported-by: Erik Veijola Tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Tony Luck Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 00ef43233e03..537c6647d84c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void __restart_timer(struct timer_list *t, unsigned long interval) +static void __start_timer(struct timer_list *t, unsigned long interval) { unsigned long when = jiffies + interval; unsigned long flags; local_irq_save(flags); - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer(t, when); - } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); - } + if (!timer_pending(t) || time_before(when, t->expires)) + mod_timer(t, round_jiffies(when)); local_irq_restore(flags); } @@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); - __restart_timer(t, iv); + __start_timer(t, iv); } /* @@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); - __restart_timer(t, interval); + __start_timer(t, interval); if (interval < iv) __this_cpu_write(mce_next_interval, interval); @@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; if (mca_cfg.ignore_ce || !iv) return; - per_cpu(mce_next_interval, cpu) = iv; - - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, cpu); + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); } static void __mcheck_cpu_setup_timer(void) @@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void) unsigned int cpu = smp_processor_id(); setup_pinned_timer(t, mce_timer_fn, cpu); - mce_start_timer(cpu, t); + mce_start_timer(t); } /* Handle unconfigured int18 (should never happen) */ @@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); int ret; mce_device_create(cpu); @@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu) return ret; } mce_reenable_cpu(); - mce_start_timer(cpu, t); + mce_start_timer(t); return 0; } static int mce_cpu_pre_down(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); mce_disable_cpu(); del_timer_sync(t); -- cgit From 970d14e3989160ee9e97c7d75ecbc893fd29dab9 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 25 Jan 2017 00:54:07 +0530 Subject: nvdimm: constify device_type structures Declare device_type structure as const as it is only stored in the type field of a device structure. This field is of type const, so add const to declaration of device_type structure. File size before: text data bss dec hex filename 19278 3199 16 22493 57dd nvdimm/namespace_devs.o File size after: text data bss dec hex filename 19929 3160 16 23105 5a41 nvdimm/namespace_devs.o Signed-off-by: Bhumika Goyal Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index a518cb1b59d4..eda027db0918 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -52,17 +52,17 @@ static void namespace_blk_release(struct device *dev) kfree(nsblk); } -static struct device_type namespace_io_device_type = { +static const struct device_type namespace_io_device_type = { .name = "nd_namespace_io", .release = namespace_io_release, }; -static struct device_type namespace_pmem_device_type = { +static const struct device_type namespace_pmem_device_type = { .name = "nd_namespace_pmem", .release = namespace_pmem_release, }; -static struct device_type namespace_blk_device_type = { +static const struct device_type namespace_blk_device_type = { .name = "nd_namespace_blk", .release = namespace_blk_release, }; -- cgit From 9d032f4201d39e5cf43a8709a047e481f5723fdc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 25 Jan 2017 00:54:07 +0530 Subject: libnvdimm, namespace: do not delete namespace-id 0 Given that the naming of pmem devices changes from the pmemX form to the pmemX.Y form when namespace id is greater than 0, arrange for namespaces with id-0 to be exempt from deletion. Otherwise a simple reconfiguration of an existing namespace to a new mode results in a name change of the resulting block device: # ndctl list --namespace=namespace1.0 { "dev":"namespace1.0", "mode":"raw", "size":2147483648, "uuid":"3dadf3dc-89b9-4b24-b20e-abc8a4707ce3", "blockdev":"pmem1" } # ndctl create-namespace --reconfig=namespace1.0 --mode=memory --force { "dev":"namespace1.1", "mode":"memory", "size":2111832064, "uuid":"7b4a6341-7318-4219-a02c-fb57c0bbf613", "blockdev":"pmem1.1" } This change does require tooling changes to explicitly look for namespaceX.0 if the seed has already advanced to another namespace. Cc: Fixes: 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem-namespaces per region") Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index eda027db0918..ce3e8dfa10ad 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -962,8 +962,8 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) struct nvdimm_drvdata *ndd; struct nd_label_id label_id; u32 flags = 0, remainder; + int rc, i, id = -1; u8 *uuid = NULL; - int rc, i; if (dev->driver || ndns->claim) return -EBUSY; @@ -972,11 +972,13 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); uuid = nspm->uuid; + id = nspm->id; } else if (is_namespace_blk(dev)) { struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); uuid = nsblk->uuid; flags = NSLABEL_FLAG_LOCAL; + id = nsblk->id; } /* @@ -1039,10 +1041,11 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) /* * Try to delete the namespace if we deleted all of its - * allocation, this is not the seed device for the region, and - * it is not actively claimed by a btt instance. + * allocation, this is not the seed or 0th device for the + * region, and it is not actively claimed by a btt, pfn, or dax + * instance. */ - if (val == 0 && nd_region->ns_seed != dev && !ndns->claim) + if (val == 0 && id != 0 && nd_region->ns_seed != dev && !ndns->claim) nd_device_unregister(dev, ND_ASYNC); return rc; -- cgit From 4b3e6f2ef3722f1a6a97b6034ed492c1a21fd4ae Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 31 Jan 2017 18:35:37 -0800 Subject: xtensa: fix noMMU build on cores with MMU Commit bf15f86b343ed8 ("xtensa: initialize MMU before jumping to reset vector") calls MMU management functions even when CONFIG_MMU is not selected. That breaks noMMU build on cores with MMU. Don't manage MMU when CONFIG_MMU is not selected. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 88a044af7504..32cdc2c52e98 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -540,7 +540,7 @@ subsys_initcall(topology_init); void cpu_reset(void) { -#if XCHAL_HAVE_PTP_MMU +#if XCHAL_HAVE_PTP_MMU && IS_ENABLED(CONFIG_MMU) local_irq_disable(); /* * We have full MMU: all autoload ways, ways 7, 8 and 9 of DTLB must -- cgit From 2780f3c8f0233de90b6b47a23fc422b7780c5436 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Wed, 25 Jan 2017 22:07:06 -0200 Subject: scsi: qla2xxx: Avoid that issuing a LIP triggers a kernel crash Avoid that issuing a LIP as follows: find /sys -name 'issue_lip'|while read f; do echo 1 > $f; done triggers the following: BUG: unable to handle kernel NULL pointer dereference at (null) Call Trace: qla2x00_abort_all_cmds+0xed/0x140 [qla2xxx] qla2x00_abort_isp_cleanup+0x1e3/0x280 [qla2xxx] qla2x00_abort_isp+0xef/0x690 [qla2xxx] qla2x00_do_dpc+0x36c/0x880 [qla2xxx] kthread+0x10c/0x140 [mkp: consolidated Mauricio's and Bart's fixes] Signed-off-by: Mauricio Faria de Oliveira Reported-by: Bart Van Assche Fixes: 1535aa75a3d8 ("qla2xxx: fix invalid DMA access after command aborts in PCI device remove") Cc: Himanshu Madhani Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ad4edc13ebcf..1cd3c48a6350 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1616,7 +1616,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) /* Don't abort commands in adapter during EEH * recovery as it's not accessible/responding. */ - if (!ha->flags.eeh_busy) { + if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) { /* Get a reference to the sp and drop the lock. * The reference ensures this sp->done() call * - and not the call in qla2xxx_eh_abort() - -- cgit From f2e767bb5d6ee0d988cb7d4e54b0b21175802b6b Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 26 Jan 2017 16:37:01 -0200 Subject: scsi: mpt3sas: Force request partial completion alignment The firmware or device, possibly under a heavy I/O load, can return on a partial unaligned boundary. Scsi-ml expects these requests to be completed on an alignment boundary. Scsi-ml blindly requeues the I/O without checking the alignment boundary of the I/O request for the remaining bytes. This leads to errors, since devices cannot perform non-aligned read/write operations. This patch fixes the issue in the driver. It aligns unaligned completions of FS requests, by truncating them to the nearest alignment boundary. [mkp: simplified if statement] Reported-by: Mauricio Faria De Oliveira Signed-off-by: Guilherme G. Piccoli Signed-off-by: Ram Pai Acked-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 75f3fce1c867..268103182d34 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4657,6 +4657,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) struct MPT3SAS_DEVICE *sas_device_priv_data; u32 response_code = 0; unsigned long flags; + unsigned int sector_sz; mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply); scmd = _scsih_scsi_lookup_get_clear(ioc, smid); @@ -4715,6 +4716,20 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } xfer_cnt = le32_to_cpu(mpi_reply->TransferCount); + + /* In case of bogus fw or device, we could end up having + * unaligned partial completion. We can force alignment here, + * then scsi-ml does not need to handle this misbehavior. + */ + sector_sz = scmd->device->sector_size; + if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz && + xfer_cnt % sector_sz)) { + sdev_printk(KERN_INFO, scmd->device, + "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", + xfer_cnt, sector_sz); + xfer_cnt = round_down(xfer_cnt, sector_sz); + } + scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt); if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) log_info = le32_to_cpu(mpi_reply->IOCLogInfo); -- cgit From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:38 +0100 Subject: perf/x86/intel/rapl: Make package handling more robust The package management code in RAPL relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left RAPL in broken state. This was not noticed because on a regular boot all CPUs are online before RAPL is initialized. A possible fix would be to reintroduce the mess which allocates a package data structure in CPU prepare and when it turns out to already exist in starting throw it away later in the CPU online callback. But that's a horrible hack and not required at all because RAPL becomes functional for perf only in the CPU online callback. That's correct because user space is not yet informed about the CPU being onlined, so nothing caan rely on RAPL being available on that particular CPU. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. This also adds a missing check for available package data in the event_init() function. Reported-by: Yasuaki Ishimatsu Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 60 +++++++++++++++++++------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 17c3564d087a..22ef4f72cf32 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -161,7 +161,13 @@ static u64 rapl_timer_ms; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - return rapl_pmus->pmus[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event) /* must be done before validate_group */ pmu = cpu_to_rapl_pmu(event->cpu); + if (!pmu) + return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; event->hw.event_base = msr; @@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu) struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; + if (!pmu) { + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -ENOMEM; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + } + /* * Check if there is an online cpu in the package which collects rapl * events already. @@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu) return 0; } -static int rapl_cpu_prepare(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - - if (pmu) - return 0; - - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - pmu->cpu = -1; - rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; - return 0; -} - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -803,29 +804,21 @@ static int __init rapl_pmu_init(void) /* * Install callbacks. Core will call them for each online cpu. */ - - ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare", - rapl_cpu_prepare, NULL); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, "perf/x86/rapl:online", rapl_cpu_online, rapl_cpu_offline); if (ret) - goto out1; + goto out; ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out2; + goto out1; rapl_advertise(); return 0; -out2: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out1: - cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); @@ -836,7 +829,6 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d936a0021839..8329f3dc592c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,7 +10,6 @@ enum cpuhp_state { CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, - CPUHP_PERF_X86_RAPL_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, -- cgit From 1aa6cfd33df492939b0be15ebdbcff1f8ae5ddb6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:39 +0100 Subject: perf/x86/intel/uncore: Clean up hotplug conversion fallout The recent conversion to the hotplug state machine kept two mechanisms from the original code: 1) The first_init logic which adds the number of online CPUs in a package to the refcount. That's wrong because the callbacks are executed for all online CPUs. Remove it so the refcounting is correct. 2) The on_each_cpu() call to undo box->init() in the error handling path. That's bogus because when the prepare callback fails no box has been initialized yet. Remove it. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 1a246b9f58c6 ("perf/x86/intel/uncore: Convert to hotplug state machine") Link: http://lkml.kernel.org/r/20170131230141.298032324@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 44 ++++-------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 8c4ccdc3a3f3..56c5235dcc29 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -764,30 +764,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) -{ - struct intel_uncore_pmu *pmu = type->pmus; - struct intel_uncore_box *box; - int i, pkg; - - if (pmu) { - pkg = topology_physical_package_id(cpu); - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box) - uncore_box_exit(box); - } - } -} - -static void uncore_exit_boxes(void *dummy) -{ - struct intel_uncore_type **types; - - for (types = uncore_msr_uncores; *types; types++) - __uncore_exit_boxes(*types++, smp_processor_id()); -} - static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int pkg; @@ -1078,22 +1054,12 @@ static int uncore_cpu_dying(unsigned int cpu) return 0; } -static int first_init; - static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, ncpus = 1; - - if (first_init) { - /* - * On init we get the number of online cpus in the package - * and set refcount for all of them. - */ - ncpus = cpumask_weight(topology_core_cpumask(cpu)); - } + int i, pkg; pkg = topology_logical_package_id(cpu); for (; *types; types++) { @@ -1104,7 +1070,7 @@ static int uncore_cpu_starting(unsigned int cpu) if (!box) continue; /* The first cpu on a package activates the box */ - if (atomic_add_return(ncpus, &box->refcnt) == ncpus) + if (atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } @@ -1408,19 +1374,17 @@ static int __init intel_uncore_init(void) "perf/x86/intel/uncore:prepare", uncore_cpu_prepare, NULL); } - first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, "perf/x86/uncore:starting", uncore_cpu_starting, uncore_cpu_dying); - first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, "perf/x86/uncore:online", uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: - /* Undo box->init_box() */ - on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); return ret; -- cgit From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:40 +0100 Subject: perf/x86/intel/uncore: Make package handling more robust The package management code in uncore relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left uncore in broken state. This was not noticed because on a regular boot all CPUs are online before uncore is initialized. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 196 +++++++++++++++++++---------------------- include/linux/cpuhotplug.h | 2 - 2 files changed, 91 insertions(+), 107 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 56c5235dcc29..1ab45976474d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj, struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - return pmu->boxes[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -1034,76 +1040,6 @@ static void uncore_pci_exit(void) } } -static int uncore_cpu_dying(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } - return 0; -} - -static int uncore_cpu_starting(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (!box) - continue; - /* The first cpu on a package activates the box */ - if (atomic_inc_return(&box->refcnt) == 1) - uncore_box_init(box); - } - } - - return 0; -} - -static int uncore_cpu_prepare(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - if (pmu->boxes[pkg]) - continue; - /* First cpu of a package allocates the box */ - box = uncore_alloc_box(type, cpu_to_node(cpu)); - if (!box) - return -ENOMEM; - box->pmu = pmu; - box->pkgid = pkg; - pmu->boxes[pkg] = box; - } - } - return 0; -} - static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, int new_cpu) { @@ -1143,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores, static int uncore_event_cpu_offline(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, pkg, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return 0; - + goto unref; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1160,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + +unref: + /* Clear the references */ + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } return 0; } +static int allocate_boxes(struct intel_uncore_type **types, + unsigned int pkg, unsigned int cpu) +{ + struct intel_uncore_box *box, *tmp; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + LIST_HEAD(allocated); + int i; + + /* Try to allocate all required boxes */ + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + box = uncore_alloc_box(type, cpu_to_node(cpu)); + if (!box) + goto cleanup; + box->pmu = pmu; + box->pkgid = pkg; + list_add(&box->active_list, &allocated); + } + } + /* Install them in the pmus */ + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + box->pmu->boxes[pkg] = box; + } + return 0; + +cleanup: + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + kfree(box); + } + return -ENOMEM; +} + static int uncore_event_cpu_online(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, ret, pkg, target; + + pkg = topology_logical_package_id(cpu); + ret = allocate_boxes(types, pkg, cpu); + if (ret) + return ret; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box && atomic_inc_return(&box->refcnt) == 1) + uncore_box_init(box); + } + } /* * Check if there is an online cpu in the package @@ -1355,33 +1363,13 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - /* - * Install callbacks. Core will call them for each online cpu. - * - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required and - * uncore_cpu_prepare() is not called for each online cpu. - */ - if (!cret) { - ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - if (ret) - goto err; - } else { - cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - } - - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, - "perf/x86/uncore:starting", - uncore_cpu_starting, uncore_cpu_dying); - - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, - "perf/x86/uncore:online", - uncore_event_cpu_online, uncore_event_cpu_offline); + /* Install hotplug callbacks to setup the targets for each package */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "perf/x86/intel/uncore:online", + uncore_event_cpu_online, + uncore_event_cpu_offline); + if (ret) + goto err; return 0; err: @@ -1393,9 +1381,7 @@ module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8329f3dc592c..921acaaa1601 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -8,7 +8,6 @@ enum cpuhp_state { CPUHP_CREATE_THREADS, CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, - CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, @@ -85,7 +84,6 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_PERF_X86_UNCORE_STARTING, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, -- cgit From eeee74a4f6625b77c3e8db0693c2d4546507ba0d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 31 Jan 2017 10:21:30 +0100 Subject: drm/atomic: Unconditionally call prepare_fb. Atomic drivers may set properties like rotation on the same fb, which may require a call to prepare_fb even when framebuffer stays identical. Instead of handling all the special cases in the core, let the driver decide when prepare_fb and cleanup_fb are noops. This is a revert of: commit fcc60b413d14dd06ddbd79ec50e83c4fb2a097ba Author: Keith Packard Date: Sat Jun 4 01:16:22 2016 -0700 drm: Don't prepare or cleanup unchanging frame buffers [v3] The original commit mentions that this prevents waiting in i915 on all previous rendering during cursor updates, but there are better ways to fix this. Signed-off-by: Maarten Lankhorst Acked-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/6d82f9b6-9d16-91d1-d176-4a37b09afc44@linux.intel.com (cherry picked from commit 0532be078a207d7dd6ad26ebd0834e258acc4ee7) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485854491-27389-2-git-send-email-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/drm_atomic_helper.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 34f757bcabae..4594477dee00 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1666,9 +1666,6 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev, funcs = plane->helper_private; - if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc)) - continue; - if (funcs->prepare_fb) { ret = funcs->prepare_fb(plane, plane_state); if (ret) @@ -1685,9 +1682,6 @@ fail: if (j >= i) continue; - if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc)) - continue; - funcs = plane->helper_private; if (funcs->cleanup_fb) @@ -1954,9 +1948,6 @@ void drm_atomic_helper_cleanup_planes(struct drm_device *dev, for_each_plane_in_state(old_state, plane, plane_state, i) { const struct drm_plane_helper_funcs *funcs; - if (!drm_atomic_helper_framebuffer_changed(dev, old_state, plane_state->crtc)) - continue; - funcs = plane->helper_private; if (funcs->cleanup_fb) -- cgit From e8fe4f4b2b7b93048729538321c681c0cff33b39 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Jan 2017 10:21:31 +0100 Subject: drm/i915: Track pinned vma in intel_plane_state With atomic plane states we are able to track an allocation right from preparation, during use and through to the final free after being swapped out for a new plane. We can couple the VMA we pin for the framebuffer (and its rotation) to this lifetime and avoid all the clumsy lookups in between. v2: Remove residual vma on plane cleanup (Chris) v3: Add a description for the vma destruction in intel_plane_destroy_state (Maarten) References: https://bugs.freedesktop.org/show_bug.cgi?id=98829 Signed-off-by: Chris Wilson Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-1-chris@chris-wilson.co.uk Acked-by: Joonas Lahtinen (cherry picked from commit be1e341513ca23b0668b7b0f26fa6e2ffc46ba20) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485854491-27389-3-git-send-email-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 16 +--- drivers/gpu/drm/i915/intel_atomic_plane.c | 20 +++++ drivers/gpu/drm/i915/intel_display.c | 125 ++++++++++-------------------- drivers/gpu/drm/i915/intel_drv.h | 9 ++- drivers/gpu/drm/i915/intel_fbc.c | 52 +++++-------- drivers/gpu/drm/i915/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/intel_sprite.c | 8 +- 7 files changed, 99 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 69bc3b0c4390..8493e19b563a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1012,6 +1012,8 @@ struct intel_fbc { struct work_struct underrun_work; struct intel_fbc_state_cache { + struct i915_vma *vma; + struct { unsigned int mode_flags; uint32_t hsw_bdw_pixel_rate; @@ -1025,15 +1027,14 @@ struct intel_fbc { } plane; struct { - u64 ilk_ggtt_offset; uint32_t pixel_format; unsigned int stride; - int fence_reg; - unsigned int tiling_mode; } fb; } state_cache; struct intel_fbc_reg_params { + struct i915_vma *vma; + struct { enum pipe pipe; enum plane plane; @@ -1041,10 +1042,8 @@ struct intel_fbc { } crtc; struct { - u64 ggtt_offset; uint32_t pixel_format; unsigned int stride; - int fence_reg; } fb; int cfb_size; @@ -3168,13 +3167,6 @@ i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view); } -static inline unsigned long -i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); -} - /* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index dbe9fb41ae53..8d3e515f27ba 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -85,6 +85,8 @@ intel_plane_duplicate_state(struct drm_plane *plane) __drm_atomic_helper_plane_duplicate_state(plane, state); + intel_state->vma = NULL; + return state; } @@ -100,6 +102,24 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { + struct i915_vma *vma; + + vma = fetch_and_zero(&to_intel_plane_state(state)->vma); + + /* + * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma. + * We currently don't clear all planes during driver unload, so we have + * to be able to unpin vma here for now. + * + * Normally this can only happen during unload when kmscon is disabled + * and userspace doesn't attempt to set a framebuffer at all. + */ + if (vma) { + mutex_lock(&plane->dev->struct_mutex); + intel_unpin_fb_vma(vma); + mutex_unlock(&plane->dev->struct_mutex); + } + drm_atomic_helper_plane_destroy_state(plane, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f0b9aa7a0483..f1e4a21d4664 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2235,27 +2235,22 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) i915_vma_pin_fence(vma); } + i915_vma_get(vma); err: intel_runtime_pm_put(dev_priv); return vma; } -void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) +void intel_unpin_fb_vma(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct i915_ggtt_view view; - struct i915_vma *vma; - - WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); - - intel_fill_fb_ggtt_view(&view, fb, rotation); - vma = i915_gem_object_to_ggtt(obj, &view); + lockdep_assert_held(&vma->vm->dev->struct_mutex); if (WARN_ON_ONCE(!vma)) return; i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); } static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane, @@ -2750,7 +2745,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *c; - struct intel_crtc *i; struct drm_i915_gem_object *obj; struct drm_plane *primary = intel_crtc->base.primary; struct drm_plane_state *plane_state = primary->state; @@ -2775,20 +2769,20 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * an fb with another CRTC instead */ for_each_crtc(dev, c) { - i = to_intel_crtc(c); + struct intel_plane_state *state; if (c == &intel_crtc->base) continue; - if (!i->active) + if (!to_intel_crtc(c)->active) continue; - fb = c->primary->fb; - if (!fb) + state = to_intel_plane_state(c->primary->state); + if (!state->vma) continue; - obj = intel_fb_obj(fb); - if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) { + if (intel_plane_ggtt_offset(state) == plane_config->base) { + fb = c->primary->fb; drm_framebuffer_reference(fb); goto valid_fb; } @@ -2809,6 +2803,19 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: + mutex_lock(&dev->struct_mutex); + intel_state->vma = + intel_pin_and_fence_fb_obj(fb, primary->state->rotation); + mutex_unlock(&dev->struct_mutex); + if (IS_ERR(intel_state->vma)) { + DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", + intel_crtc->pipe, PTR_ERR(intel_state->vma)); + + intel_state->vma = NULL; + drm_framebuffer_unreference(fb); + return; + } + plane_state->src_x = 0; plane_state->src_y = 0; plane_state->src_w = fb->width << 16; @@ -3104,13 +3111,13 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DSPSURF(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } else { I915_WRITE(DSPADDR(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); } POSTING_READ(reg); @@ -3207,7 +3214,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_WRITE(DSPSURF(plane), - intel_fb_gtt_offset(fb, rotation) + + intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); @@ -3230,23 +3237,6 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, } } -u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, - unsigned int rotation) -{ - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct i915_ggtt_view view; - struct i915_vma *vma; - - intel_fill_fb_ggtt_view(&view, fb, rotation); - - vma = i915_gem_object_to_ggtt(obj, &view); - if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", - view.type)) - return -1; - - return i915_ggtt_offset(vma); -} - static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) { struct drm_device *dev = intel_crtc->base.dev; @@ -3441,7 +3431,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, } I915_WRITE(PLANE_SURF(pipe, 0), - intel_fb_gtt_offset(fb, rotation) + surf_addr); + intel_plane_ggtt_offset(plane_state) + surf_addr); POSTING_READ(PLANE_SURF(pipe, 0)); } @@ -11536,7 +11526,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) flush_work(&work->mmio_work); mutex_lock(&dev->struct_mutex); - intel_unpin_fb_obj(work->old_fb, primary->state->rotation); + intel_unpin_fb_vma(work->old_vma); i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); @@ -12246,8 +12236,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, goto cleanup_pending; } - work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation); - work->gtt_offset += intel_crtc->dspaddr_offset; + work->old_vma = to_intel_plane_state(primary->state)->vma; + to_intel_plane_state(primary->state)->vma = vma; + + work->gtt_offset = i915_ggtt_offset(vma) + intel_crtc->dspaddr_offset; work->rotation = crtc->primary->state->rotation; /* @@ -12301,7 +12293,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_request: i915_add_request_no_flush(request); cleanup_unpin: - intel_unpin_fb_obj(fb, crtc->primary->state->rotation); + to_intel_plane_state(primary->state)->vma = work->old_vma; + intel_unpin_fb_vma(vma); cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); unlock: @@ -14794,6 +14787,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, DRM_DEBUG_KMS("failed to pin object\n"); return PTR_ERR(vma); } + + to_intel_plane_state(new_state)->vma = vma; } return 0; @@ -14812,19 +14807,12 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct intel_plane_state *old_intel_state; - struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); - struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); - - old_intel_state = to_intel_plane_state(old_state); - - if (!obj && !old_obj) - return; + struct i915_vma *vma; - if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR || - !INTEL_INFO(dev_priv)->cursor_needs_physical)) - intel_unpin_fb_obj(old_state->fb, old_state->rotation); + /* Should only be called after a successful intel_prepare_plane_fb()! */ + vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); + if (vma) + intel_unpin_fb_vma(vma); } int @@ -15166,7 +15154,7 @@ intel_update_cursor_plane(struct drm_plane *plane, if (!obj) addr = 0; else if (!INTEL_INFO(dev_priv)->cursor_needs_physical) - addr = i915_gem_object_ggtt_offset(obj, NULL); + addr = intel_plane_ggtt_offset(state); else addr = obj->phys_handle->busaddr; @@ -17066,41 +17054,12 @@ void intel_display_resume(struct drm_device *dev) void intel_modeset_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *c; - struct drm_i915_gem_object *obj; intel_init_gt_powersave(dev_priv); intel_modeset_init_hw(dev); intel_setup_overlay(dev_priv); - - /* - * Make sure any fbs we allocated at startup are properly - * pinned & fenced. When we do the allocation it's too early - * for this. - */ - for_each_crtc(dev, c) { - struct i915_vma *vma; - - obj = intel_fb_obj(c->primary->fb); - if (obj == NULL) - continue; - - mutex_lock(&dev->struct_mutex); - vma = intel_pin_and_fence_fb_obj(c->primary->fb, - c->primary->state->rotation); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(vma)) { - DRM_ERROR("failed to pin boot fb on pipe %d\n", - to_intel_crtc(c)->pipe); - drm_framebuffer_unreference(c->primary->fb); - c->primary->fb = NULL; - c->primary->crtc = c->primary->state->crtc = NULL; - update_state_fb(c->primary); - c->state->plane_mask &= ~(1 << drm_plane_index(c->primary)); - } - } } int intel_connector_register(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cd72ae171eeb..03a2112004f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -377,6 +377,7 @@ struct intel_atomic_state { struct intel_plane_state { struct drm_plane_state base; struct drm_rect clip; + struct i915_vma *vma; struct { u32 offset; @@ -1046,6 +1047,7 @@ struct intel_flip_work { struct work_struct mmio_work; struct drm_crtc *crtc; + struct i915_vma *old_vma; struct drm_framebuffer *old_fb; struct drm_i915_gem_object *pending_flip_obj; struct drm_pending_vblank_event *event; @@ -1273,7 +1275,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx); struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); -void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); +void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, @@ -1362,7 +1364,10 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation); +static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) +{ + return i915_ggtt_offset(state->vma); +} u32 skl_plane_ctl_format(uint32_t pixel_format); u32 skl_plane_ctl_tiling(uint64_t fb_modifier); diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 62f215b12eb5..f3a1d6a5cabe 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -173,7 +173,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; - fbc_ctl |= params->fb.fence_reg; + fbc_ctl |= params->vma->fence->id; I915_WRITE(FBC_CONTROL, fbc_ctl); } @@ -193,8 +193,8 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { - dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; + if (params->vma->fence) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id; I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); } else { I915_WRITE(DPFC_FENCE_YOFF, 0); @@ -251,13 +251,14 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + if (params->vma->fence) { dpfc_ctl |= DPFC_CTL_FENCE_EN; if (IS_GEN5(dev_priv)) - dpfc_ctl |= params->fb.fence_reg; + dpfc_ctl |= params->vma->fence->id; if (IS_GEN6(dev_priv)) { I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + SNB_CPU_FENCE_ENABLE | + params->vma->fence->id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } @@ -269,7 +270,8 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) } I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); - I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID); + I915_WRITE(ILK_FBC_RT_BASE, + i915_ggtt_offset(params->vma) | ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -319,10 +321,11 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + if (params->vma->fence) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + SNB_CPU_FENCE_ENABLE | + params->vma->fence->id); I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); } else { I915_WRITE(SNB_DPFC_CTL_SA,0); @@ -727,14 +730,6 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) return effective_w <= max_w && effective_h <= max_h; } -/* XXX replace me when we have VMA tracking for intel_plane_state */ -static int get_fence_id(struct drm_framebuffer *fb) -{ - struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL); - - return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE; -} - static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) @@ -743,7 +738,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj; + + cache->vma = NULL; cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -758,16 +754,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, if (!cache->plane.visible) return; - obj = intel_fb_obj(fb); - - /* FIXME: We lack the proper locking here, so only run this on the - * platforms that need. */ - if (IS_GEN(dev_priv, 5, 6)) - cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL); cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; - cache->fb.fence_reg = get_fence_id(fb); - cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); + + cache->vma = plane_state->vma; } static bool intel_fbc_can_activate(struct intel_crtc *crtc) @@ -784,7 +774,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if (!cache->plane.visible) { + if (!cache->vma) { fbc->no_fbc_reason = "primary plane not visible"; return false; } @@ -807,8 +797,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) * so have no fence associated with it) due to aperture constaints * at the time of pinning. */ - if (cache->fb.tiling_mode != I915_TILING_X || - cache->fb.fence_reg == I915_FENCE_REG_NONE) { + if (!cache->vma->fence) { fbc->no_fbc_reason = "framebuffer not tiled or fenced"; return false; } @@ -888,17 +877,16 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, * zero. */ memset(params, 0, sizeof(*params)); + params->vma = cache->vma; + params->crtc.pipe = crtc->pipe; params->crtc.plane = crtc->plane; params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc); params->fb.pixel_format = cache->fb.pixel_format; params->fb.stride = cache->fb.stride; - params->fb.fence_reg = cache->fb.fence_reg; params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache); - - params->fb.ggtt_offset = cache->fb.ilk_ggtt_offset; } static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1, diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 8cf2d80f2254..f4a8c4fc57c4 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -284,7 +284,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + intel_unpin_fb_vma(vma); out_unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -549,7 +549,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) if (ifbdev->fb) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + intel_unpin_fb_vma(ifbdev->vma); mutex_unlock(&ifbdev->helper.dev->struct_mutex); drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 8f131a08d440..242a73e66d82 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -273,7 +273,7 @@ skl_update_plane(struct drm_plane *drm_plane, I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl); I915_WRITE(PLANE_SURF(pipe, plane), - intel_fb_gtt_offset(fb, rotation) + surf_addr); + intel_plane_ggtt_offset(plane_state) + surf_addr); POSTING_READ(PLANE_SURF(pipe, plane)); } @@ -458,7 +458,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w); I915_WRITE(SPCNTR(pipe, plane), sprctl); I915_WRITE(SPSURF(pipe, plane), - intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); POSTING_READ(SPSURF(pipe, plane)); } @@ -594,7 +594,7 @@ ivb_update_plane(struct drm_plane *plane, I915_WRITE(SPRSCALE(pipe), sprscale); I915_WRITE(SPRCTL(pipe), sprctl); I915_WRITE(SPRSURF(pipe), - intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); + intel_plane_ggtt_offset(plane_state) + sprsurf_offset); POSTING_READ(SPRSURF(pipe)); } @@ -721,7 +721,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSCALE(pipe), dvsscale); I915_WRITE(DVSCNTR(pipe), dvscntr); I915_WRITE(DVSSURF(pipe), - intel_fb_gtt_offset(fb, rotation) + dvssurf_offset); + intel_plane_ggtt_offset(plane_state) + dvssurf_offset); POSTING_READ(DVSSURF(pipe)); } -- cgit From 4993b39ab04b083ff6ee1147e7e7f120feb6bf7f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 31 Jan 2017 20:01:31 +0100 Subject: be2net: fix initial MAC setting Recent commit 34393529163a ("be2net: fix MAC addr setting on privileged BE3 VFs") allows privileged BE3 VFs to set its MAC address during initialization. Although the initial MAC for such VFs is already programmed by parent PF the subsequent setting performed by VF is OK, but in certain cases (after fresh boot) this command in VF can fail. The MAC should be initialized only when: 1) no MAC is programmed (always except BE3 VFs during first init) 2) programmed MAC is different from requested (e.g. MAC is set when interface is down). In this case the initial MAC programmed by PF needs to be deleted. The adapter->dev_mac contains MAC address currently programmed in HW so it should be zeroed when the MAC is deleted from HW and should not be filled when MAC is set when interface is down in be_mac_addr_set() as no programming is performed in this case. Example of failure without the fix (immediately after fresh boot): # ip link set eth0 up <- eth0 is BE3 PF be2net 0000:01:00.0 eth0: Link is Up # echo 1 > /sys/class/net/eth0/device/sriov_numvfs <- Create 1 VF ... be2net 0000:01:04.0: Emulex OneConnect(be3): VF port 0 # ip link set eth8 up <- eth8 is created privileged VF be2net 0000:01:04.0: opcode 59-1 failed:status 1-76 RTNETLINK answers: Input/output error # echo 0 > /sys/class/net/eth0/device/sriov_numvfs <- Delete VF iommu: Removing device 0000:01:04.0 from group 33 ... # echo 1 > /sys/class/net/eth0/device/sriov_numvfs <- Create it again iommu: Removing device 0000:01:04.0 from group 33 ... # ip link set eth8 up be2net 0000:01:04.0 eth8: Link is Up Initialization is now OK. v2 - Corrected the comment and condition check suggested by Suresh & Harsha Fixes: 34393529163a ("be2net: fix MAC addr setting on privileged BE3 VFs") Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Acked-by: Sriharsha Basavapatna Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 33 ++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 1a7f8ad7b9c6..cd49a54c538d 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -362,8 +362,10 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) status = -EPERM; goto err; } -done: + + /* Remember currently programmed MAC */ ether_addr_copy(adapter->dev_mac, addr->sa_data); +done: ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -3618,8 +3620,10 @@ static void be_disable_if_filters(struct be_adapter *adapter) { /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */ if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) + check_privilege(adapter, BE_PRIV_FILTMGMT)) { be_dev_mac_del(adapter, adapter->pmac_id[0]); + eth_zero_addr(adapter->dev_mac); + } be_clear_uc_list(adapter); be_clear_mc_list(adapter); @@ -3773,12 +3777,27 @@ static int be_enable_if_filters(struct be_adapter *adapter) if (status) return status; - /* Don't add MAC on BE3 VFs without FILTMGMT privilege */ - if (!BEx_chip(adapter) || !be_virtfn(adapter) || - check_privilege(adapter, BE_PRIV_FILTMGMT)) { + /* Normally this condition usually true as the ->dev_mac is zeroed. + * But on BE3 VFs the initial MAC is pre-programmed by PF and + * subsequent be_dev_mac_add() can fail (after fresh boot) + */ + if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) { + int old_pmac_id = -1; + + /* Remember old programmed MAC if any - can happen on BE3 VF */ + if (!is_zero_ether_addr(adapter->dev_mac)) + old_pmac_id = adapter->pmac_id[0]; + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + + /* Delete the old programmed MAC as we successfully programmed + * a new MAC + */ + if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0]) + be_dev_mac_del(adapter, old_pmac_id); + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } @@ -4552,6 +4571,10 @@ static int be_mac_setup(struct be_adapter *adapter) memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN); memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN); + + /* Initial MAC for BE3 VFs is already programmed by PF */ + if (BEx_chip(adapter) && be_virtfn(adapter)) + memcpy(adapter->dev_mac, mac, ETH_ALEN); } return 0; -- cgit From 2da64d20a0b20046d688e44f4033efd09157e29d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 1 Feb 2017 14:26:16 +1100 Subject: vfio/spapr: Fix missing mutex unlock when creating a window Commit d9c728949ddc ("vfio/spapr: Postpone default window creation") added an additional exit to the VFIO_IOMMU_SPAPR_TCE_CREATE case and made it possible to return from tce_iommu_ioctl() without unlocking container->lock; this fixes the issue. Fixes: d9c728949ddc ("vfio/spapr: Postpone default window creation") Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 128d10282d16..7690e5bf3cf1 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1123,12 +1123,11 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_lock(&container->lock); ret = tce_iommu_create_default_window(container); - if (ret) - return ret; - - ret = tce_iommu_create_window(container, create.page_shift, - create.window_size, create.levels, - &create.start_addr); + if (!ret) + ret = tce_iommu_create_window(container, + create.page_shift, + create.window_size, create.levels, + &create.start_addr); mutex_unlock(&container->lock); -- cgit From fd62d9f5c575f0792f150109f1fd24a0d4b3f854 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 31 Jan 2017 15:14:29 +0200 Subject: net/sched: matchall: Fix configuration race In the current version, the matchall internal state is split into two structs: cls_matchall_head and cls_matchall_filter. This makes little sense, as matchall instance supports only one filter, and there is no situation where one exists and the other does not. In addition, that led to some races when filter was deleted while packet was processed. Unify that two structs into one, thus simplifying the process of matchall creation and deletion. As a result, the new, delete and get callbacks have a dummy implementation where all the work is done in destroy and change callbacks, as was done in cls_cgroup. Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") Reported-by: Daniel Borkmann Signed-off-by: Yotam Gigi Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 127 +++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 82 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index f935429bd5ef..b12bc2abea93 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -16,16 +16,11 @@ #include #include -struct cls_mall_filter { +struct cls_mall_head { struct tcf_exts exts; struct tcf_result res; u32 handle; - struct rcu_head rcu; u32 flags; -}; - -struct cls_mall_head { - struct cls_mall_filter *filter; struct rcu_head rcu; }; @@ -33,38 +28,29 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_mall_head *head = rcu_dereference_bh(tp->root); - struct cls_mall_filter *f = head->filter; - if (tc_skip_sw(f->flags)) + if (tc_skip_sw(head->flags)) return -1; - return tcf_exts_exec(skb, &f->exts, res); + return tcf_exts_exec(skb, &head->exts, res); } static int mall_init(struct tcf_proto *tp) { - struct cls_mall_head *head; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (!head) - return -ENOBUFS; - - rcu_assign_pointer(tp->root, head); - return 0; } -static void mall_destroy_filter(struct rcu_head *head) +static void mall_destroy_rcu(struct rcu_head *rcu) { - struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu); + struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, + rcu); - tcf_exts_destroy(&f->exts); - - kfree(f); + tcf_exts_destroy(&head->exts); + kfree(head); } static int mall_replace_hw_filter(struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; @@ -74,7 +60,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, offload.type = TC_SETUP_MATCHALL; offload.cls_mall = &mall_offload; offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; - offload.cls_mall->exts = &f->exts; + offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, @@ -82,7 +68,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, } static void mall_destroy_hw_filter(struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; @@ -103,29 +89,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct net_device *dev = tp->q->dev_queue->dev; - struct cls_mall_filter *f = head->filter; - if (!force && f) - return false; + if (!head) + return true; - if (f) { - if (tc_should_offload(dev, tp, f->flags)) - mall_destroy_hw_filter(tp, f, (unsigned long) f); + if (tc_should_offload(dev, tp, head->flags)) + mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&f->rcu, mall_destroy_filter); - } - kfree_rcu(head, rcu); + call_rcu(&head->rcu, mall_destroy_rcu); return true; } static unsigned long mall_get(struct tcf_proto *tp, u32 handle) { - struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = head->filter; - - if (f && f->handle == handle) - return (unsigned long) f; - return 0; + return 0UL; } static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { @@ -134,7 +111,7 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { @@ -147,11 +124,11 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_MATCHALL_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); - tcf_bind_filter(tp, &f->res, base); + head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); + tcf_bind_filter(tp, &head->res, base); } - tcf_exts_change(tp, &f->exts, &e); + tcf_exts_change(tp, &head->exts, &e); return 0; } @@ -162,21 +139,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, unsigned long *arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg; struct net_device *dev = tp->q->dev_queue->dev; - struct cls_mall_filter *f; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; + struct cls_mall_head *new; u32 flags = 0; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - if (head->filter) - return -EBUSY; - - if (fold) - return -EINVAL; + if (head) + return -EEXIST; err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], mall_policy); @@ -189,23 +162,23 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) return -ENOBUFS; - tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0); + tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0); if (!handle) handle = 1; - f->handle = handle; - f->flags = flags; + new->handle = handle; + new->flags = flags; - err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); if (err) goto errout; if (tc_should_offload(dev, tp, flags)) { - err = mall_replace_hw_filter(tp, f, (unsigned long) f); + err = mall_replace_hw_filter(tp, new, (unsigned long) new); if (err) { if (tc_skip_sw(flags)) goto errout; @@ -214,39 +187,29 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, } } - *arg = (unsigned long) f; - rcu_assign_pointer(head->filter, f); - + *arg = (unsigned long) head; + rcu_assign_pointer(tp->root, new); + if (head) + call_rcu(&head->rcu, mall_destroy_rcu); return 0; errout: - kfree(f); + kfree(new); return err; } static int mall_delete(struct tcf_proto *tp, unsigned long arg) { - struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = (struct cls_mall_filter *) arg; - struct net_device *dev = tp->q->dev_queue->dev; - - if (tc_should_offload(dev, tp, f->flags)) - mall_destroy_hw_filter(tp, f, (unsigned long) f); - - RCU_INIT_POINTER(head->filter, NULL); - tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, mall_destroy_filter); - return 0; + return -EOPNOTSUPP; } static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = head->filter; if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) + if (arg->fn(tp, (unsigned long) head, arg) < 0) arg->stop = 1; skip: arg->count++; @@ -255,28 +218,28 @@ skip: static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_mall_filter *f = (struct cls_mall_filter *) fh; + struct cls_mall_head *head = (struct cls_mall_head *) fh; struct nlattr *nest; - if (!f) + if (!head) return skb->len; - t->tcm_handle = f->handle; + t->tcm_handle = head->handle; nest = nla_nest_start(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid)) + if (head->res.classid && + nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts)) + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts) < 0) + if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; -- cgit From 1a2a14444d32b89b28116daea86f63ced1716668 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Tue, 31 Jan 2017 16:03:13 -0800 Subject: net: fix ndo_features_check/ndo_fix_features comment ordering Commit cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()") inadvertently moved the doc comment for .ndo_fix_features instead of .ndo_features_check. Fix the comment ordering. Fixes: cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()") Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9bde9558b596..70ad0291d517 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -866,11 +866,15 @@ struct netdev_xdp { * of useless work if you return NETDEV_TX_BUSY. * Required; cannot be NULL. * - * netdev_features_t (*ndo_fix_features)(struct net_device *dev, - * netdev_features_t features); - * Adjusts the requested feature flags according to device-specific - * constraints, and returns the resulting flags. Must not modify - * the device state. + * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); + * Called by core transmit path to determine if device is capable of + * performing offload operations on a given packet. This is to give + * the device an opportunity to implement any restrictions that cannot + * be otherwise expressed by feature flags. The check is called with + * the set of features that the stack has calculated and it returns + * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); @@ -1028,6 +1032,12 @@ struct netdev_xdp { * Called to release previously enslaved netdev. * * Feature/offload setting functions. + * netdev_features_t (*ndo_fix_features)(struct net_device *dev, + * netdev_features_t features); + * Adjusts the requested feature flags according to device-specific + * constraints, and returns the resulting flags. Must not modify + * the device state. + * * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). @@ -1100,15 +1110,6 @@ struct netdev_xdp { * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, - * struct net_device *dev - * netdev_features_t features); - * Called by core transmit path to determine if device is capable of - * performing offload operations on a given packet. This is to give - * the device an opportunity to implement any restrictions that cannot - * be otherwise expressed by feature flags. The check is called with - * the set of features that the stack has calculated and it returns - * those the driver believes to be appropriate. * int (*ndo_set_tx_maxrate)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific -- cgit From 63117f09c768be05a0bf465911297dc76394f686 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 1 Feb 2017 11:46:32 +0300 Subject: ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim() Casting is a high precedence operation but "off" and "i" are in terms of bytes so we need to have some parenthesis here. Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()") Signed-off-by: Dan Carpenter Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ff8ee06491c3..75fac933c209 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -441,7 +441,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i; + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) -- cgit From 06425c308b92eaf60767bc71d359f4cbc7a561f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Feb 2017 08:33:53 -0800 Subject: tcp: fix 0 divide in __tcp_select_window() syszkaller fuzzer was able to trigger a divide by zero, when TCP window scaling is not enabled. SO_RCVBUF can be used not only to increase sk_rcvbuf, also to decrease it below current receive buffers utilization. If mss is negative or 0, just return a zero TCP window. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1d5331a1b1dc..8ce50dc3ab8c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2518,9 +2518,11 @@ u32 __tcp_select_window(struct sock *sk) int full_space = min_t(int, tp->window_clamp, allowed_space); int window; - if (mss > full_space) + if (unlikely(mss > full_space)) { mss = full_space; - + if (mss <= 0) + return 0; + } if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; -- cgit From 601bbbe0517303c9f8eb3d75e11d64efed1293c9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2017 14:56:43 -0800 Subject: Input: uinput - fix crash when mixing old and new init style If user tries to initialize uinput device mixing old and new style initialization (i.e. using old UI_SET_ABSBIT instead of UI_ABS_SETUP, we forget to allocate input->absinfo and will crash when trying to send absolute events: ioctl(ui, UI_DEV_SETUP, &us); ioctl(ui, UI_SET_PHYS, "Test"); ioctl(ui, UI_SET_EVBIT, EV_ABS); ioctl(ui, UI_SET_ABSBIT, ABS_X); ioctl(ui, UI_SET_ABSBIT, ABS_Y); ioctl(ui, UI_DEV_CREATE, 0); Reported-by: Rodrigo Rivas Costa Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=191811 Fixes: fbae10db0940 ("Input: uinput - rework ABS validation") Reviewed-by: Benjamin Tissoires Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 92595b98e7ed..022be0e22eba 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -263,13 +263,21 @@ static int uinput_create_device(struct uinput_device *udev) return -EINVAL; } - if (test_bit(ABS_MT_SLOT, dev->absbit)) { - nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1; - error = input_mt_init_slots(dev, nslot, 0); - if (error) + if (test_bit(EV_ABS, dev->evbit)) { + input_alloc_absinfo(dev); + if (!dev->absinfo) { + error = -EINVAL; goto fail1; - } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { - input_set_events_per_packet(dev, 60); + } + + if (test_bit(ABS_MT_SLOT, dev->absbit)) { + nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1; + error = input_mt_init_slots(dev, nslot, 0); + if (error) + goto fail1; + } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { + input_set_events_per_packet(dev, 60); + } } if (test_bit(EV_FF, dev->evbit) && !udev->ff_effects_max) { -- cgit From c8f325a59cfc718d13a50fbc746ed9b415c25e92 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Feb 2017 17:45:02 +0000 Subject: efi/fdt: Avoid FDT manipulation after ExitBootServices() Some AArch64 UEFI implementations disable the MMU in ExitBootServices(), after which unaligned accesses to RAM are no longer supported. Commit: abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel") fixed an issue in the memory map handling of the stub FDT code, but inadvertently created an issue with such firmware, by moving some of the FDT manipulation to after the invocation of ExitBootServices(). Given that the stub's libfdt implementation uses the ordinary, accelerated string functions, which rely on hardware handling of unaligned accesses, manipulating the FDT with the MMU off may result in alignment faults. So fix the situation by moving the update_fdt_memmap() call into the callback function invoked by efi_exit_boot_services() right before it calls the ExitBootServices() UEFI service (which is arguably a better place for it anyway) Note that disabling the MMU in ExitBootServices() is not compliant with the UEFI spec, and carries great risk due to the fact that switching from cached to uncached memory accesses halfway through compiler generated code (i.e., involving a stack) can never be done in a way that is architecturally safe. Fixes: abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel") Signed-off-by: Ard Biesheuvel Tested-by: Riku Voipio Cc: Cc: mark.rutland@arm.com Cc: linux-efi@vger.kernel.org Cc: matt@codeblueprint.co.uk Cc: leif.lindholm@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1485971102-23330-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/fdt.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 921dfa047202..260c4b4b492e 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -187,6 +187,7 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) struct exit_boot_struct { efi_memory_desc_t *runtime_map; int *runtime_entry_count; + void *new_fdt_addr; }; static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, @@ -202,7 +203,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, p->runtime_map, p->runtime_entry_count); - return EFI_SUCCESS; + return update_fdt_memmap(p->new_fdt_addr, map); } /* @@ -300,22 +301,13 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, priv.runtime_map = runtime_map; priv.runtime_entry_count = &runtime_entry_count; + priv.new_fdt_addr = (void *)*new_fdt_addr; status = efi_exit_boot_services(sys_table, handle, &map, &priv, exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; - status = update_fdt_memmap((void *)*new_fdt_addr, &map); - if (status != EFI_SUCCESS) { - /* - * The kernel won't get far without the memory map, but - * may still be able to print something meaningful so - * return success here. - */ - return EFI_SUCCESS; - } - /* Install the new virtual address map */ svam = sys_table->runtime->set_virtual_address_map; status = svam(runtime_entry_count * desc_size, desc_size, -- cgit From 3484ecbe0e9deb94afb0b9b6172d77e98eb72b94 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 22 Dec 2016 15:00:12 +0000 Subject: crypto: qat - fix bar discovery for c62x Some accelerators of the c62x series have only two bars. This patch skips BAR0 if the accelerator does not have it. Cc: Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c62x/adf_drv.c | 2 +- drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index bc5cbc193aae..5b2d78a5b5aa 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -233,7 +233,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) &hw_data->accel_capabilities_mask); /* Find and map all the device's BARS */ - i = 0; + i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, ADF_PCI_MAX_BARS * 2) { diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index e8822536530b..33f0a6251e38 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -69,6 +69,7 @@ #define ADF_ERRSOU5 (0x3A000 + 0xD8) #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C +#define ADF_DEVICE_FUSECTL_MASK 0x80000000 #define ADF_PCI_MAX_BARS 3 #define ADF_DEVICE_NAME_LENGTH 32 #define ADF_ETR_MAX_RINGS_PER_BANK 16 -- cgit From 685ce0626840e2673fe64ea8807684f7324fec5f Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 22 Dec 2016 15:00:24 +0000 Subject: crypto: qat - zero esram only for DH85x devices Zero embedded ram in DH85x devices. This is not needed for newer generations as it is done by HW. Cc: Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_hal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index 1e480f140663..8c4fd255a601 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -456,7 +456,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) unsigned int csr_val; int times = 30; - if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID) + if (handle->pci_dev->device != ADF_DH895XCC_PCI_DEVICE_ID) return 0; csr_val = ADF_CSR_RD(csr_addr, 0); @@ -716,7 +716,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev) (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + LOCAL_TO_XFER_REG_OFFSET); handle->pci_dev = pci_info->pci_dev; - if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) { + if (handle->pci_dev->device == ADF_DH895XCC_PCI_DEVICE_ID) { sram_bar = &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)]; handle->hal_sram_addr_v = sram_bar->virt_addr; -- cgit From 8381cdd0e32dd748bd34ca3ace476949948bd793 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 Jan 2017 14:14:56 +0900 Subject: perf diff: Fix segfault on 'perf diff -o N' option The -o/--order option is to select column number to sort a diff result. It does the job by adding a hpp field at the beginning of the sort list. But it should not be added to the output field list as it has no callbacks required by a output field. During the setup_sorting(), the perf_hpp__setup_output_field() appends the given sort keys to the output field if it's not there already. Originally it was checked by fmt->list being non-empty. But commit 3f931f2c4274 ("perf hists: Make hpp setup function generic") changed it to check the ->equal callback. Anyways, we don't need to add the pseudo hpp field to the output field list since it won't be used for output. So just skip fields if they have no ->color or ->entry callbacks. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Fixes: 3f931f2c4274 ("perf hists: Make hpp setup function generic") Link: http://lkml.kernel.org/r/20170118051457.30946-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 37388397b5bc..4ec79b2f9416 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -560,6 +560,10 @@ void perf_hpp__setup_output_field(struct perf_hpp_list *list) perf_hpp_list__for_each_sort_list(list, fmt) { struct perf_hpp_fmt *pos; + /* skip sort-only fields ("sort_compute" in perf diff) */ + if (!fmt->entry && !fmt->color) + continue; + perf_hpp_list__for_each_format(list, pos) { if (fmt_equal(fmt, pos)) goto next; -- cgit From a1c9f97f0b64e6337d9cfcc08c134450934fdd90 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 Jan 2017 14:14:57 +0900 Subject: perf diff: Fix -o/--order option behavior (again) Commit 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field interface") changed list_add() to perf_hpp__register_sort_field(). This resulted in a behavior change since the field was added to the tail instead of the head. So the -o option is mostly ignored due to its order in the list. This patch fixes it by adding perf_hpp__prepend_sort_field(). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Fixes: 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field interface") Link: http://lkml.kernel.org/r/20170118051457.30946-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 +- tools/perf/ui/hist.c | 6 ++++++ tools/perf/util/hist.h | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9ff0db4e2d0c..933aeec46f4a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1199,7 +1199,7 @@ static int ui_init(void) BUG_ON(1); } - perf_hpp__register_sort_field(fmt); + perf_hpp__prepend_sort_field(fmt); return 0; } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 4ec79b2f9416..18cfcdc90356 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -521,6 +521,12 @@ void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, list_add_tail(&format->sort_list, &list->sorts); } +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) +{ + list_add(&format->sort_list, &list->sorts); +} + void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del(&format->list); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4b6514eeef5..28c216e3d5b7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -283,6 +283,8 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, struct perf_hpp_fmt *format); void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format); +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) { @@ -294,6 +296,11 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) perf_hpp_list__register_sort_field(&perf_hpp_list, format); } +static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__prepend_sort_field(&perf_hpp_list, format); +} + #define perf_hpp_list__for_each_format(_list, format) \ list_for_each_entry(format, &(_list)->fields, list) -- cgit From aa33b9b9a2ebb00d33c83a5312d4fbf2d5aeba36 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 5 Jan 2017 22:23:31 -0800 Subject: perf callchain: Reference count maps If dso__load_kcore frees all of the existing maps, but one has already been attached to a callchain cursor node, then we can get a SIGSEGV in any function that happens to try to use this invalid cursor. Use the existing map refcount mechanism to forestall cleanup of a map until the cursor iterates past the node. Signed-off-by: Krister Johansen Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: stable@kernel.org Fixes: 84c2cafa2889 ("perf tools: Reference count struct map") Link: http://lkml.kernel.org/r/20170106062331.GB2707@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 11 +++++++++-- tools/perf/util/callchain.h | 6 ++++++ tools/perf/util/hist.c | 7 +++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 42922512c1c6..8b610dd9e2f6 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; - call->ms.map = cursor_node->map; + call->ms.map = map__get(cursor_node->map); if (cursor_node->branch) { call->branch_count = 1; @@ -477,6 +477,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del(&call->list); + map__zput(call->ms.map); free(call); } free(new); @@ -761,6 +762,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list->ms.map, list->ms.sym, false, NULL, 0, 0); list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -811,7 +813,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - node->map = map; + map__zput(node->map); + node->map = map__get(map); node->sym = sym; node->branch = branch; node->nr_loop_iter = nr_loop_iter; @@ -1142,11 +1145,13 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -1210,6 +1215,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; + map__get(new->ms.map); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1230,6 +1236,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del(&chain->list); + map__zput(chain->ms.map); free(chain); } return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 35c8e379530f..4f4b60f1558a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,6 +5,7 @@ #include #include #include "event.h" +#include "map.h" #include "symbol.h" #define HELP_PAD "\t\t\t\t" @@ -184,8 +185,13 @@ int callchain_merge(struct callchain_cursor *cursor, */ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) { + struct callchain_cursor_node *node; + cursor->nr = 0; cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6770a9645609..7d1b7d33e644 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,7 @@ #include "util.h" #include "build-id.h" #include "hist.h" +#include "map.h" #include "session.h" #include "sort.h" #include "evlist.h" @@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg) { int err, err2; + struct map *alm = NULL; + + if (al && al->map) + alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); @@ -1058,6 +1063,8 @@ out: if (!err) err = err2; + map__put(alm); + return err; } -- cgit From 26a346f23c5291d1d9521e72763103daf2c6f0d1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Feb 2017 17:57:56 +0100 Subject: tracing/kprobes: Fix __init annotation clang complains about "__init" being attached to a struct name: kernel/trace/trace_kprobe.c:1375:15: error: '__section__' attribute only applies to functions and global variables The intention must have been to mark the function as __init instead of the type, so move the attribute there. Link: http://lkml.kernel.org/r/20170201165826.2625888-1-arnd@arndb.de Fixes: f18f97ac43d7 ("tracing/kprobes: Add a helper method to return number of probe hits") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a133ecd741e4..7ad9e53ad174 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1372,7 +1372,7 @@ kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6) return a1 + a2 + a3 + a4 + a5 + a6; } -static struct __init trace_event_file * +static __init struct trace_event_file * find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { struct trace_event_file *file; -- cgit From 57bcd0a6364cd4eaa362d7ff1777e88ddf501602 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 27 Jan 2017 10:31:52 -0500 Subject: drm/amdgpu/si: fix crash on headless asics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing check for crtcs present. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193341 https://bugs.freedesktop.org/show_bug.cgi?id=99387 Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index e2b0b1646f99..0635829b18cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -254,6 +254,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); + if (adev->mode_info.num_crtc) + amdgpu_display_set_vga_render_state(adev, false); + gmc_v6_0_mc_stop(adev, &save); if (gmc_v6_0_wait_for_idle((void *)adev)) { @@ -283,7 +286,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } gmc_v6_0_mc_resume(adev, &save); - amdgpu_display_set_vga_render_state(adev, false); } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) -- cgit From 51964e9e12d0a054002a1a0d1dec4f661c7aaf28 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 30 Jan 2017 12:06:35 +0900 Subject: drm/radeon: Fix vram_size/visible values in DRM_RADEON_GEM_INFO ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vram_size is supposed to be the total amount of VRAM that can be used by userspace, which corresponds to the TTM VRAM manager size (which is normally the full amount of VRAM, but can be just the visible VRAM when DMA can't be used for BO migration for some reason). The above was incorrectly used for vram_visible before, resulting in generally too large values being reported. Reviewed-by: Christian König Reviewed-by: Nicolai Hähnle Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- drivers/gpu/drm/radeon/radeon_gem.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e0c143b865f3..30bd4a6a9d46 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -97,9 +97,10 @@ * 2.46.0 - Add PFP_SYNC_ME support on evergreen * 2.47.0 - Add UVD_NO_OP register support * 2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI + * 2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible values */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 48 +#define KMS_DRIVER_MINOR 49 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 0bcffd8a7bd3..96683f5b2b1b 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -220,8 +220,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, man = &rdev->mman.bdev.man[TTM_PL_VRAM]; - args->vram_size = rdev->mc.real_vram_size; - args->vram_visible = (u64)man->size << PAGE_SHIFT; + args->vram_size = (u64)man->size << PAGE_SHIFT; + args->vram_visible = rdev->mc.visible_vram_size; args->vram_visible -= rdev->vram_pin_size; args->gart_size = rdev->mc.gtt_size; args->gart_size -= rdev->gart_pin_size; -- cgit From dfef358bd1beb4e7b5c94eca944be9cd23dfc752 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jan 2017 13:15:41 +0100 Subject: PCI/MSI: Don't apply affinity if there aren't enough vectors left MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bart reported a problem wіth an out of bounds access in the low-level IRQ affinity code, which we root caused to the qla2xxx driver assigning all its MSI-X vectors to the pre and post vectors, and not having any left for the actually spread IRQs. Fix this issue by not asking for affinity assignment when there are no vectors to assign left. Fixes: 402723ad5c62 ("PCI/MSI: Provide pci_alloc_irq_vectors_affinity()") Link: https://lkml.kernel.org/r/1485359225.3093.3.camel@sandisk.com Reported-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- drivers/pci/msi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 50c5003295ca..7f73bacf13ed 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1206,6 +1206,16 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, if (flags & PCI_IRQ_AFFINITY) { if (!affd) affd = &msi_default_affd; + + if (affd->pre_vectors + affd->post_vectors > min_vecs) + return -EINVAL; + + /* + * If there aren't any vectors left after applying the pre/post + * vectors don't bother with assigning affinity. + */ + if (affd->pre_vectors + affd->post_vectors == min_vecs) + affd = NULL; } else { if (WARN_ON(affd)) affd = NULL; -- cgit From 1a902f6b70c55171ca2419d946b85274e35c9757 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 12:38:33 +0100 Subject: ARM: orion5x: fix Makefile for linkstation-lschl.dtb The rename of orion5x-lschl.dts needs to be reflected in the Makefile: make[3]: *** No rule to make target 'arch/arm/boot/dts/orion5x-lschl.dtb', needed by '__build'. Fixes: 6cfd3cd8d836 ("ARM: dts: orion5x-lschl: More consistent naming on linkstation series") Signed-off-by: Arnd Bergmann Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index cccdbcb557b6..20fe4a54ee5e 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -616,7 +616,7 @@ dtb-$(CONFIG_ARCH_ORION5X) += \ orion5x-lacie-ethernet-disk-mini-v2.dtb \ orion5x-linkstation-lsgl.dtb \ orion5x-linkstation-lswtgl.dtb \ - orion5x-lschl.dtb \ + orion5x-linkstation-lschl.dtb \ orion5x-lswsgl.dtb \ orion5x-maxtor-shared-storage-2.dtb \ orion5x-netgear-wnr854t.dtb \ -- cgit From f32b20e89e82c9ff1825fc5c5d69753ff5558ccd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 31 Jan 2017 22:35:32 -0800 Subject: mlx4: Fix memory leak after mlx4_en_update_priv() In mlx4_en_update_priv(), dst->tx_ring[t] and dst->tx_cq[t] are over-written by src->tx_ring[t] and src->tx_cq[t] without first calling kfree. One of the reproducible code paths is by doing 'ethtool -L'. The fix is to do the kfree in mlx4_en_free_resources(). Here is the kmemleak report: unreferenced object 0xffff880841211800 (size 2048): comm "ethtool", pid 3096, jiffies 4294716940 (age 528.353s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x28/0x50 [] kmem_cache_alloc_trace+0x103/0x260 [] mlx4_en_try_alloc_resources+0x118/0x1a0 [] mlx4_en_set_ringparam+0x169/0x210 [] dev_ethtool+0xae5/0x2190 [] dev_ioctl+0x168/0x6f0 [] sock_do_ioctl+0x42/0x50 [] sock_ioctl+0x21b/0x2d0 [] do_vfs_ioctl+0x93/0x6a0 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x18/0xad [] 0xffffffffffffffff unreferenced object 0xffff880841213000 (size 2048): comm "ethtool", pid 3096, jiffies 4294716940 (age 528.353s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x28/0x50 [] kmem_cache_alloc_trace+0x103/0x260 [] mlx4_en_try_alloc_resources+0x13b/0x1a0 [] mlx4_en_set_ringparam+0x169/0x210 [] dev_ethtool+0xae5/0x2190 [] dev_ioctl+0x168/0x6f0 [] sock_do_ioctl+0x42/0x50 [] sock_ioctl+0x21b/0x2d0 [] do_vfs_ioctl+0x93/0x6a0 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x18/0xad [] 0xffffffffffffffff (gdb) list *mlx4_en_try_alloc_resources+0x118 0xffffffff8170e0a8 is in mlx4_en_try_alloc_resources (drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2145). 2140 if (!dst->tx_ring_num[t]) 2141 continue; 2142 2143 dst->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) * 2144 MAX_TX_RINGS, GFP_KERNEL); 2145 if (!dst->tx_ring[t]) 2146 goto err_free_tx; 2147 2148 dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * 2149 MAX_TX_RINGS, GFP_KERNEL); (gdb) list *mlx4_en_try_alloc_resources+0x13b 0xffffffff8170e0cb is in mlx4_en_try_alloc_resources (drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2150). 2145 if (!dst->tx_ring[t]) 2146 goto err_free_tx; 2147 2148 dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * 2149 MAX_TX_RINGS, GFP_KERNEL); 2150 if (!dst->tx_cq[t]) { 2151 kfree(dst->tx_ring[t]); 2152 goto err_free_tx; 2153 } 2154 } Fixes: ec25bc04ed8e ("net/mlx4_en: Add resilience in low memory systems") Cc: Eugenia Emantayev Cc: Saeed Mahameed Cc: Tariq Toukan Signed-off-by: Martin KaFai Lau Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 761f8b12399c..3abcead208d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2042,6 +2042,8 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv) if (priv->tx_cq[t] && priv->tx_cq[t][i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]); } + kfree(priv->tx_ring[t]); + kfree(priv->tx_cq[t]); } for (i = 0; i < priv->rx_ring_num; i++) { @@ -2214,7 +2216,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int t; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2248,11 +2249,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mlx4_en_free_resources(priv); mutex_unlock(&mdev->state_lock); - for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) { - kfree(priv->tx_ring[t]); - kfree(priv->tx_cq[t]); - } - free_netdev(dev); } -- cgit From 770f82253dbd7e6892a88018f2f6cd395f48d214 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 31 Jan 2017 22:35:33 -0800 Subject: mlx4: xdp_prog becomes inactive after ethtool '-L' or '-G' After calling mlx4_en_try_alloc_resources (e.g. by changing the number of rx-queues with ethtool -L), the existing xdp_prog becomes inactive. The bug is that the xdp_prog ptr has not been carried over from the old rx-queues to the new rx-queues Fixes: 47a38e155037 ("net/mlx4_en: add support for fast rx drop bpf program") Cc: Brenden Blanco Cc: Saeed Mahameed Cc: Tariq Toukan Signed-off-by: Martin KaFai Lau Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 27 +++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 ++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d5a9372ed84d..9aa422691954 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1099,7 +1099,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev, memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); new_prof.tx_ring_size = tx_size; new_prof.rx_ring_size = rx_size; - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; @@ -1774,7 +1774,7 @@ static int mlx4_en_set_channels(struct net_device *dev, new_prof.tx_ring_num[TX_XDP] = xdp_count; new_prof.rx_ring_num = channel->rx_count; - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3abcead208d2..3b4961a8e8e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2186,9 +2186,11 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst, int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp, - struct mlx4_en_port_profile *prof) + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog) { - int t; + struct bpf_prog *xdp_prog; + int i, t; mlx4_en_copy_priv(tmp, priv, prof); @@ -2202,6 +2204,23 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, } return -ENOMEM; } + + /* All rx_rings has the same xdp_prog. Pick the first one. */ + xdp_prog = rcu_dereference_protected( + priv->rx_ring[0]->xdp_prog, + lockdep_is_held(&priv->mdev->state_lock)); + + if (xdp_prog && carry_xdp_prog) { + xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num); + if (IS_ERR(xdp_prog)) { + mlx4_en_free_resources(tmp); + return PTR_ERR(xdp_prog); + } + for (i = 0; i < tmp->rx_ring_num; i++) + rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog, + xdp_prog); + } + return 0; } @@ -2751,7 +2770,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); } - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, false); if (err) { if (prog) bpf_prog_sub(prog, priv->rx_ring_num - 1); @@ -3495,7 +3514,7 @@ int mlx4_en_reset_config(struct net_device *dev, memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); - err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index ba1c6cd0cc79..cec59bc264c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -679,7 +679,8 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp, - struct mlx4_en_port_profile *prof); + struct mlx4_en_port_profile *prof, + bool carry_xdp_prog); void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, struct mlx4_en_priv *tmp); -- cgit From cafe8df8b9bc9aa3dffa827c1a6757c6cd36f657 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Tue, 31 Jan 2017 18:46:43 -0800 Subject: net: phy: Fix lack of reference count on PHY driver There is currently no reference count being held on the PHY driver, which makes it possible to remove the PHY driver module while the PHY state machine is running and polling the PHY. This could cause crashes similar to this one to show up: [ 43.361162] BUG: unable to handle kernel NULL pointer dereference at 0000000000000140 [ 43.361162] IP: phy_state_machine+0x32/0x490 [ 43.361162] PGD 59dc067 [ 43.361162] PUD 0 [ 43.361162] [ 43.361162] Oops: 0000 [#1] SMP [ 43.361162] Modules linked in: dsa_loop [last unloaded: broadcom] [ 43.361162] CPU: 0 PID: 1299 Comm: kworker/0:3 Not tainted 4.10.0-rc5+ #415 [ 43.361162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 43.361162] Workqueue: events_power_efficient phy_state_machine [ 43.361162] task: ffff880006782b80 task.stack: ffffc90000184000 [ 43.361162] RIP: 0010:phy_state_machine+0x32/0x490 [ 43.361162] RSP: 0018:ffffc90000187e18 EFLAGS: 00000246 [ 43.361162] RAX: 0000000000000000 RBX: ffff8800059e53c0 RCX: ffff880006a15c60 [ 43.361162] RDX: ffff880006782b80 RSI: 0000000000000000 RDI: ffff8800059e5428 [ 43.361162] RBP: ffffc90000187e48 R08: ffff880006a15c40 R09: 0000000000000000 [ 43.361162] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800059e5428 [ 43.361162] R13: ffff8800059e5000 R14: 0000000000000000 R15: ffff880006a15c40 [ 43.361162] FS: 0000000000000000(0000) GS:ffff880006a00000(0000) knlGS:0000000000000000 [ 43.361162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 43.361162] CR2: 0000000000000140 CR3: 0000000005979000 CR4: 00000000000006f0 [ 43.361162] Call Trace: [ 43.361162] process_one_work+0x1b4/0x3e0 [ 43.361162] worker_thread+0x43/0x4d0 [ 43.361162] ? __schedule+0x17f/0x4e0 [ 43.361162] kthread+0xf7/0x130 [ 43.361162] ? process_one_work+0x3e0/0x3e0 [ 43.361162] ? kthread_create_on_node+0x40/0x40 [ 43.361162] ret_from_fork+0x29/0x40 [ 43.361162] Code: 56 41 55 41 54 4c 8d 67 68 53 4c 8d af 40 fc ff ff 48 89 fb 4c 89 e7 48 83 ec 08 e8 c9 9d 27 00 48 8b 83 60 ff ff ff 44 8b 73 98 <48> 8b 90 40 01 00 00 44 89 f0 48 85 d2 74 08 4c 89 ef ff d2 8b Keep references on the PHY driver module right before we are going to utilize it in phy_attach_direct(), and conversely when we don't use it anymore in phy_detach(). Signed-off-by: Mao Wenan [florian: rebase, rework commit message] Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 92b08383cafa..0d8f4d3847f6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -920,6 +920,11 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return -EIO; } + if (!try_module_get(d->driver->owner)) { + dev_err(&dev->dev, "failed to get the device driver module\n"); + return -EIO; + } + get_device(d); /* Assume that if there is no driver, that it doesn't @@ -977,6 +982,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, error: phy_detach(phydev); put_device(d); + module_put(d->driver->owner); if (ndev_owner != bus->owner) module_put(bus->owner); return err; @@ -1059,6 +1065,7 @@ void phy_detach(struct phy_device *phydev) bus = phydev->mdio.bus; put_device(&phydev->mdio.dev); + module_put(phydev->mdio.dev.driver->owner); if (ndev_owner != bus->owner) module_put(bus->owner); } -- cgit From 94e1dab1c94715e18bb0bada503de3f3d7593076 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Tue, 24 Jan 2017 10:34:32 +0530 Subject: crypto: chcr - Fix panic on dma_unmap_sg Save DMA mapped sg list addresses to request context buffer. Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 49 +++++++++++++++++++----------------- drivers/crypto/chelsio/chcr_crypto.h | 3 +++ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 2ed1e24b44a8..d29c2b45d3ab 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -158,7 +158,7 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, case CRYPTO_ALG_TYPE_AEAD: ctx_req.req.aead_req = (struct aead_request *)req; ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req); - dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.aead_req->dst, + dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst, ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE); if (ctx_req.ctx.reqctx->skb) { kfree_skb(ctx_req.ctx.reqctx->skb); @@ -1362,8 +1362,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; struct phys_sge_parm sg_param; - struct scatterlist *src, *dst; - struct scatterlist src_sg[2], dst_sg[2]; + struct scatterlist *src; unsigned int frags = 0, transhdr_len; unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0; unsigned int kctx_len = 0; @@ -1383,19 +1382,21 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) goto err; - src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); - dst = src; + src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen); + reqctx->dst = src; + if (req->src != req->dst) { err = chcr_copy_assoc(req, aeadctx); if (err) return ERR_PTR(err); - dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst, + req->assoclen); } if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) { null = 1; assoclen = 0; } - reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen + (op_type ? -authsize : authsize)); if (reqctx->dst_nents <= 0) { pr_err("AUTHENC:Invalid Destination sg entries\n"); @@ -1460,7 +1461,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; sg_param.align = 0; - if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst, &sg_param)) goto dstmap_fail; @@ -1711,8 +1712,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; struct phys_sge_parm sg_param; - struct scatterlist *src, *dst; - struct scatterlist src_sg[2], dst_sg[2]; + struct scatterlist *src; unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE; unsigned int dst_size = 0, kctx_len; unsigned int sub_type; @@ -1728,17 +1728,19 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) goto err; sub_type = get_aead_subtype(tfm); - src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); - dst = src; + src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen); + reqctx->dst = src; + if (req->src != req->dst) { err = chcr_copy_assoc(req, aeadctx); if (err) { pr_err("AAD copy to destination buffer fails\n"); return ERR_PTR(err); } - dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst, + req->assoclen); } - reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen + (op_type ? -authsize : authsize)); if (reqctx->dst_nents <= 0) { pr_err("CCM:Invalid Destination sg entries\n"); @@ -1777,7 +1779,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; sg_param.align = 0; - if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst, &sg_param)) goto dstmap_fail; @@ -1809,8 +1811,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; struct phys_sge_parm sg_param; - struct scatterlist *src, *dst; - struct scatterlist src_sg[2], dst_sg[2]; + struct scatterlist *src; unsigned int frags = 0, transhdr_len; unsigned int ivsize = AES_BLOCK_SIZE; unsigned int dst_size = 0, kctx_len; @@ -1832,13 +1833,14 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) goto err; - src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); - dst = src; + src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen); + reqctx->dst = src; if (req->src != req->dst) { err = chcr_copy_assoc(req, aeadctx); if (err) return ERR_PTR(err); - dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst, + req->assoclen); } if (!req->cryptlen) @@ -1848,7 +1850,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, crypt_len = AES_BLOCK_SIZE; else crypt_len = req->cryptlen; - reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen + (op_type ? -authsize : authsize)); if (reqctx->dst_nents <= 0) { pr_err("GCM:Invalid Destination sg entries\n"); @@ -1923,7 +1925,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; sg_param.align = 0; - if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst, &sg_param)) goto dstmap_fail; @@ -1937,7 +1939,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, write_sg_to_skb(skb, &frags, src, req->cryptlen); } else { aes_gcm_empty_pld_pad(req->dst, authsize - 1); - write_sg_to_skb(skb, &frags, dst, crypt_len); + write_sg_to_skb(skb, &frags, reqctx->dst, crypt_len); + } create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1, diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index d5af7d64a763..7ec0a8f12475 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -158,6 +158,9 @@ struct ablk_ctx { }; struct chcr_aead_reqctx { struct sk_buff *skb; + struct scatterlist *dst; + struct scatterlist srcffwd[2]; + struct scatterlist dstffwd[2]; short int dst_nents; u16 verify; u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; -- cgit From f5f7bebc91ab378dea5aad5277c4d283e46472d9 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Tue, 24 Jan 2017 10:34:33 +0530 Subject: crypto: chcr - Check device is allocated before use Ensure dev is allocated for crypto uld context before using the device for crypto operations. Cc: Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_core.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 918da8e6e2d8..1c65f07e1cc9 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -52,6 +52,7 @@ static struct cxgb4_uld_info chcr_uld_info = { int assign_chcr_device(struct chcr_dev **dev) { struct uld_ctx *u_ctx; + int ret = -ENXIO; /* * Which device to use if multiple devices are available TODO @@ -59,15 +60,14 @@ int assign_chcr_device(struct chcr_dev **dev) * must go to the same device to maintain the ordering. */ mutex_lock(&dev_mutex); /* TODO ? */ - u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry); - if (!u_ctx) { - mutex_unlock(&dev_mutex); - return -ENXIO; + list_for_each_entry(u_ctx, &uld_ctx_list, entry) + if (u_ctx && u_ctx->dev) { + *dev = u_ctx->dev; + ret = 0; + break; } - - *dev = u_ctx->dev; mutex_unlock(&dev_mutex); - return 0; + return ret; } static int chcr_dev_add(struct uld_ctx *u_ctx) @@ -202,10 +202,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { - if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) { + if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) pr_err("ULD register fail: No chcr crypto support in cxgb4"); - return -1; - } return 0; } -- cgit From 500c0106e638e08c2c661c305ed57d6b67e10908 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 27 Jan 2017 15:28:45 -0600 Subject: crypto: ccp - Fix DMA operations when IOMMU is enabled An I/O page fault occurs when the IOMMU is enabled on a system that supports the v5 CCP. DMA operations use a Request ID value that does not match what is expected by the IOMMU, resulting in the I/O page fault. Setting the Request ID value to 0 corrects this issue. Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev-v5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index e2ce8190ecc9..612898b4aaad 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -959,7 +959,7 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data) static void ccp5_config(struct ccp_device *ccp) { /* Public side */ - iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET); + iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET); } static void ccp5other_config(struct ccp_device *ccp) -- cgit From e5da5c5667381d2772374ee6a2967b3576c9483d Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 27 Jan 2017 17:09:04 -0600 Subject: crypto: ccp - Fix double add when creating new DMA command Eliminate a double-add by creating a new list to manage command descriptors when created; move the descriptor to the pending list when the command is submitted. Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.h | 1 + drivers/crypto/ccp/ccp-dmaengine.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 830f35e6005f..649e5610a5ce 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -238,6 +238,7 @@ struct ccp_dma_chan { struct ccp_device *ccp; spinlock_t lock; + struct list_head created; struct list_head pending; struct list_head active; struct list_head complete; diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 6553912804f7..e5d9278f4019 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -63,6 +63,7 @@ static void ccp_free_chan_resources(struct dma_chan *dma_chan) ccp_free_desc_resources(chan->ccp, &chan->complete); ccp_free_desc_resources(chan->ccp, &chan->active); ccp_free_desc_resources(chan->ccp, &chan->pending); + ccp_free_desc_resources(chan->ccp, &chan->created); spin_unlock_irqrestore(&chan->lock, flags); } @@ -273,6 +274,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc) spin_lock_irqsave(&chan->lock, flags); cookie = dma_cookie_assign(tx_desc); + list_del(&desc->entry); list_add_tail(&desc->entry, &chan->pending); spin_unlock_irqrestore(&chan->lock, flags); @@ -426,7 +428,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, spin_lock_irqsave(&chan->lock, sflags); - list_add_tail(&desc->entry, &chan->pending); + list_add_tail(&desc->entry, &chan->created); spin_unlock_irqrestore(&chan->lock, sflags); @@ -610,6 +612,7 @@ static int ccp_terminate_all(struct dma_chan *dma_chan) /*TODO: Purge the complete list? */ ccp_free_desc_resources(chan->ccp, &chan->active); ccp_free_desc_resources(chan->ccp, &chan->pending); + ccp_free_desc_resources(chan->ccp, &chan->created); spin_unlock_irqrestore(&chan->lock, flags); @@ -679,6 +682,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp) chan->ccp = ccp; spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->created); INIT_LIST_HEAD(&chan->pending); INIT_LIST_HEAD(&chan->active); INIT_LIST_HEAD(&chan->complete); -- cgit From c26819900036f5b91608051a0fc7c76f6b4ffc7b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Feb 2017 22:17:39 +0800 Subject: crypto: aesni - Fix failure when pcbc module is absent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When aesni is built as a module together with pcbc, the pcbc module must be present for aesni to load. However, the pcbc module may not be present for reasons such as its absence on initramfs. This patch allows the aesni to function even if the pcbc module is enabled but not present. Reported-by: Arkadiusz Miśkiewicz Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 6ef688a1ef3e..7ff1b0c86a8e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1085,9 +1085,9 @@ static void aesni_free_simds(void) aesni_simd_skciphers[i]; i++) simd_skcipher_free(aesni_simd_skciphers[i]); - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) && - aesni_simd_skciphers2[i].simd; i++) - simd_skcipher_free(aesni_simd_skciphers2[i].simd); + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) + if (aesni_simd_skciphers2[i].simd) + simd_skcipher_free(aesni_simd_skciphers2[i].simd); } static int __init aesni_init(void) @@ -1168,7 +1168,7 @@ static int __init aesni_init(void) simd = simd_skcipher_create_compat(algname, drvname, basename); err = PTR_ERR(simd); if (IS_ERR(simd)) - goto unregister_simds; + continue; aesni_simd_skciphers2[i].simd = simd; } -- cgit From 0b529f143e8baad441a5aac9ad55ec2434d8fb46 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Wed, 1 Feb 2017 21:10:28 +0530 Subject: crypto: algif_aead - Fix kernel panic on list_del MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel panics when userspace program try to access AEAD interface. Remove node from Linked List before freeing its memory. Cc: Signed-off-by: Harsh Jain Reviewed-by: Stephan Müller Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index f849311e9fd4..533265f110e0 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -661,9 +661,9 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags) unlock: list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) { af_alg_free_sg(&rsgl->sgl); + list_del(&rsgl->list); if (rsgl != &ctx->first_rsgl) sock_kfree_s(sk, rsgl, sizeof(*rsgl)); - list_del(&rsgl->list); } INIT_LIST_HEAD(&ctx->list); aead_wmem_wakeup(sk); -- cgit From 7c2cf1c4615cc2f576d0604406cdf0065f00b83b Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:06 +0530 Subject: crypto: chcr - Fix key length for RFC4106 Check keylen before copying salt to avoid wrap around of Integer. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index d29c2b45d3ab..b4b78b37f8a6 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2192,8 +2192,8 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int ck_size; int ret = 0, key_ctx_size = 0; - if (get_aead_subtype(aead) == - CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) { + if (get_aead_subtype(aead) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106 && + keylen > 3) { keylen -= 4; /* nonce/salt is present in the last 4 bytes */ memcpy(aeadctx->salt, key + keylen, 4); } -- cgit From d98e0929071e7ef63d35c1838b0ad0805ae366dd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 3 Feb 2017 08:53:51 -0600 Subject: Revert "PCI: pciehp: Add runtime PM support for PCIe hotplug ports" This reverts commit 68db9bc814362e7f24371c27d12a4f34477d9356. Yinghai reported that the following manual hotplug sequence: # echo 0 > /sys/bus/pci/slots/8/power # echo 1 > /sys/bus/pci/slots/8/power worked in v4.9, but fails in v4.10-rc1, and that reverting 68db9bc81436 ("PCI: pciehp: Add runtime PM support for PCIe hotplug ports") makes it work again. Fixes: 68db9bc81436 ("PCI: pciehp: Add runtime PM support for PCIe hotplug ports") Link: https://lkml.kernel.org/r/CAE9FiQVCMCa7iVyuwp9z6VrY0cE7V_xghuXip28Ft52=8QmTWw@mail.gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=193951 Reported-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/pciehp_ctrl.c | 6 ------ drivers/pci/pci.c | 12 ++++++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 10c9c0ba8ff2..ec0b4c11ccd9 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "../pci.h" #include "pciehp.h" @@ -99,7 +98,6 @@ static int board_added(struct slot *p_slot) pciehp_green_led_blink(p_slot); /* Check link training status */ - pm_runtime_get_sync(&ctrl->pcie->port->dev); retval = pciehp_check_link_status(ctrl); if (retval) { ctrl_err(ctrl, "Failed to check link status\n"); @@ -120,14 +118,12 @@ static int board_added(struct slot *p_slot) if (retval != -EEXIST) goto err_exit; } - pm_runtime_put(&ctrl->pcie->port->dev); pciehp_green_led_on(p_slot); pciehp_set_attention_status(p_slot, 0); return 0; err_exit: - pm_runtime_put(&ctrl->pcie->port->dev); set_slot_off(ctrl, p_slot); return retval; } @@ -141,9 +137,7 @@ static int remove_board(struct slot *p_slot) int retval; struct controller *ctrl = p_slot->ctrl; - pm_runtime_get_sync(&ctrl->pcie->port->dev); retval = pciehp_unconfigure_device(p_slot); - pm_runtime_put(&ctrl->pcie->port->dev); if (retval) return retval; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a881c0d3d2e8..7904d02ffdb9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2241,10 +2241,13 @@ bool pci_bridge_d3_possible(struct pci_dev *bridge) return false; /* - * Hotplug ports handled by firmware in System Management Mode + * Hotplug interrupts cannot be delivered if the link is down, + * so parents of a hotplug port must stay awake. In addition, + * hotplug ports handled by firmware in System Management Mode * may not be put into D3 by the OS (Thunderbolt on non-Macs). + * For simplicity, disallow in general for now. */ - if (bridge->is_hotplug_bridge && !pciehp_is_native(bridge)) + if (bridge->is_hotplug_bridge) return false; if (pci_bridge_d3_force) @@ -2276,10 +2279,7 @@ static int pci_dev_check_d3cold(struct pci_dev *dev, void *data) !pci_pme_capable(dev, PCI_D3cold)) || /* If it is a bridge it must be allowed to go to D3. */ - !pci_power_manageable(dev) || - - /* Hotplug interrupts cannot be delivered if the link is down. */ - dev->is_hotplug_bridge) + !pci_power_manageable(dev)) *d3cold_ok = false; -- cgit From d19a55ccad15a486ffe03030570744e5d5bd9f8e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 6 Jan 2017 15:33:14 -0500 Subject: dm mpath: cleanup -Wbool-operation warning in choose_pgpath() Reported-by: David Binderman Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 6400cffb986d..3570bcb7a4a4 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -427,7 +427,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned long flags; struct priority_group *pg; struct pgpath *pgpath; - bool bypassed = true; + unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { clear_bit(MPATHF_QUEUE_IO, &m->flags); @@ -466,7 +466,7 @@ check_current_pg: */ do { list_for_each_entry(pg, &m->priority_groups, list) { - if (pg->bypassed == bypassed) + if (pg->bypassed == !!bypassed) continue; pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { -- cgit From 4087a1fffe38106e10646606a27f10d40451862d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 25 Jan 2017 16:24:52 +0100 Subject: dm rq: cope with DM device destruction while in dm_old_request_fn() Fixes a crash in dm_table_find_target() due to a NULL struct dm_table being passed from dm_old_request_fn() that races with DM device destruction. Reported-by: artem@flashgrid.io Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-rq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 9d7275fb541a..6e702fc69a83 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -779,6 +779,10 @@ static void dm_old_request_fn(struct request_queue *q) int srcu_idx; struct dm_table *map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + dm_put_live_table(md, srcu_idx); + return; + } ti = dm_table_find_target(map, pos); dm_put_live_table(md, srcu_idx); } -- cgit From f5b0cba8f23915e92932f11eb063e37d70556a89 Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Tue, 31 Jan 2017 15:47:11 +0100 Subject: dm crypt: replace RCU read-side section with rwsem The lockdep splat below hints at a bug in RCU usage in dm-crypt that was introduced with commit c538f6ec9f56 ("dm crypt: add ability to use keys from the kernel key retention service"). The kernel keyring function user_key_payload() is in fact a wrapper for rcu_dereference_protected() which must not be called with only rcu_read_lock() section mark. Unfortunately the kernel keyring subsystem doesn't currently provide an interface that allows the use of an RCU read-side section. So for now we must drop RCU in favour of rwsem until a proper function is made available in the kernel keyring subsystem. =============================== [ INFO: suspicious RCU usage. ] 4.10.0-rc5 #2 Not tainted ------------------------------- ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by cryptsetup/6464: #0: (&md->type_lock){+.+.+.}, at: [] dm_lock_md_type+0x12/0x20 [dm_mod] #1: (rcu_read_lock){......}, at: [] crypt_set_key+0x1d8/0x4b0 [dm_crypt] stack backtrace: CPU: 1 PID: 6464 Comm: cryptsetup Not tainted 4.10.0-rc5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 Call Trace: dump_stack+0x67/0x92 lockdep_rcu_suspicious+0xc5/0x100 crypt_set_key+0x351/0x4b0 [dm_crypt] ? crypt_set_key+0x1d8/0x4b0 [dm_crypt] crypt_ctr+0x341/0xa53 [dm_crypt] dm_table_add_target+0x147/0x330 [dm_mod] table_load+0x111/0x350 [dm_mod] ? retrieve_status+0x1c0/0x1c0 [dm_mod] ctl_ioctl+0x1f5/0x510 [dm_mod] dm_ctl_ioctl+0xe/0x20 [dm_mod] do_vfs_ioctl+0x8e/0x690 ? ____fput+0x9/0x10 ? task_work_run+0x7e/0xa0 ? trace_hardirqs_on_caller+0x122/0x1b0 SyS_ioctl+0x3c/0x70 entry_SYSCALL_64_fastpath+0x18/0xad RIP: 0033:0x7f392c9a4ec7 RSP: 002b:00007ffef6383378 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffef63830a0 RCX: 00007f392c9a4ec7 RDX: 000000000124fcc0 RSI: 00000000c138fd09 RDI: 0000000000000005 RBP: 00007ffef6383090 R08: 00000000ffffffff R09: 00000000012482b0 R10: 2a28205d34383336 R11: 0000000000000246 R12: 00007f392d803a08 R13: 00007ffef63831e0 R14: 0000000000000000 R15: 00007f392d803a0b Fixes: c538f6ec9f56 ("dm crypt: add ability to use keys from the kernel key retention service") Reported-by: Milan Broz Signed-off-by: Ondrej Kozina Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 7c6c57216bf2..8a9f742d8ed7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1534,18 +1534,18 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string return PTR_ERR(key); } - rcu_read_lock(); + down_read(&key->sem); ukp = user_key_payload(key); if (!ukp) { - rcu_read_unlock(); + up_read(&key->sem); key_put(key); kzfree(new_key_string); return -EKEYREVOKED; } if (cc->key_size != ukp->datalen) { - rcu_read_unlock(); + up_read(&key->sem); key_put(key); kzfree(new_key_string); return -EINVAL; @@ -1553,7 +1553,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string memcpy(cc->key, ukp->data, cc->key_size); - rcu_read_unlock(); + up_read(&key->sem); key_put(key); /* clear the flag since following operations may invalidate previously valid key */ -- cgit From 013e8167899d389075160412a8c0c5e0581e1f13 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Thu, 2 Feb 2017 11:29:38 +0100 Subject: ipv6: sr: remove cleanup flag and fix HMAC computation In the latest version of the IPv6 Segment Routing IETF draft [1] the cleanup flag is removed and the flags field length is shrunk from 16 bits to 8 bits. As a consequence, the input of the HMAC computation is modified in a non-backward compatible way by covering the whole octet of flags instead of only the cleanup bit. As such, if an implementation compatible with the latest draft computes the HMAC of an SRH who has other flags set to 1, then the HMAC result would differ from the current implementation. This patch carries those modifications to prevent conflict with other implementations of IPv6 SR. [1] https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-05 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/seg6.h | 9 +++------ net/ipv6/exthdrs.c | 31 +++---------------------------- net/ipv6/seg6_hmac.c | 8 ++++---- 3 files changed, 10 insertions(+), 38 deletions(-) diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h index c396a8052f73..052799e4d751 100644 --- a/include/uapi/linux/seg6.h +++ b/include/uapi/linux/seg6.h @@ -23,14 +23,12 @@ struct ipv6_sr_hdr { __u8 type; __u8 segments_left; __u8 first_segment; - __u8 flag_1; - __u8 flag_2; - __u8 reserved; + __u8 flags; + __u16 reserved; struct in6_addr segments[0]; }; -#define SR6_FLAG1_CLEANUP (1 << 7) #define SR6_FLAG1_PROTECTED (1 << 6) #define SR6_FLAG1_OAM (1 << 5) #define SR6_FLAG1_ALERT (1 << 4) @@ -42,8 +40,7 @@ struct ipv6_sr_hdr { #define SR6_TLV_PADDING 4 #define SR6_TLV_HMAC 5 -#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) -#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) +#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC) struct sr6_tlv { __u8 type; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e4198502fd98..275cac628a95 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -327,7 +327,6 @@ static int ipv6_srh_rcv(struct sk_buff *skb) struct ipv6_sr_hdr *hdr; struct inet6_dev *idev; struct in6_addr *addr; - bool cleanup = false; int accept_seg6; hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); @@ -351,11 +350,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb) #endif looped_back: - if (hdr->segments_left > 0) { - if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && - sr_has_cleanup(hdr)) - cleanup = true; - } else { + if (hdr->segments_left == 0) { if (hdr->nexthdr == NEXTHDR_IPV6) { int offset = (hdr->hdrlen + 1) << 3; @@ -418,21 +413,6 @@ looped_back: ipv6_hdr(skb)->daddr = *addr; - if (cleanup) { - int srhlen = (hdr->hdrlen + 1) << 3; - int nh = hdr->nexthdr; - - skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); - memmove(skb_network_header(skb) + srhlen, - skb_network_header(skb), - (unsigned char *)hdr - skb_network_header(skb)); - skb->network_header += srhlen; - ipv6_hdr(skb)->nexthdr = nh; - ipv6_hdr(skb)->payload_len = htons(skb->len - - sizeof(struct ipv6hdr)); - skb_push_rcsum(skb, sizeof(struct ipv6hdr)); - } - skb_dst_drop(skb); ip6_route_input(skb); @@ -453,13 +433,8 @@ looped_back: } ipv6_hdr(skb)->hop_limit--; - /* be sure that srh is still present before reinjecting */ - if (!cleanup) { - skb_pull(skb, sizeof(struct ipv6hdr)); - goto looped_back; - } - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; } dst_input(skb); diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 03a064803626..6ef3dfb6e811 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -174,7 +174,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, * hash function (RadioGatun) with up to 1216 bits */ - /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + /* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */ plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; /* this limit allows for 14 segments */ @@ -186,7 +186,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, * * 1. Source IPv6 address (128 bits) * 2. first_segment value (8 bits) - * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 3. Flags (8 bits) * 4. HMAC Key ID (32 bits) * 5. All segments in the segments list (n * 128 bits) */ @@ -202,8 +202,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, /* first_segment value */ *off++ = hdr->first_segment; - /* cleanup flag */ - *off++ = !!(sr_has_cleanup(hdr)) << 7; + /* flags */ + *off++ = hdr->flags; /* HMAC Key ID */ memcpy(off, &hmackeyid, 4); -- cgit From 3808d34838184fd29088d6b3a364ba2f1c018fb6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 2 Feb 2017 13:32:10 +0100 Subject: ethtool: do not vzalloc(0) on registers dump If ->get_regs_len() callback return 0, we allocate 0 bytes of memory, what print ugly warning in dmesg, which can be found further below. This happen on mac80211 devices where ieee80211_get_regs_len() just return 0 and driver only fills ethtool_regs structure and actually do not provide any dump. However I assume this can happen on other drivers i.e. when for some devices driver provide regs dump and for others do not. Hence preventing to to print warning in ethtool code seems to be reasonable. ethtool: vmalloc: allocation failure: 0 bytes, mode:0x24080c2(GFP_KERNEL|__GFP_HIGHMEM|__GFP_ZERO) Call Trace: [] dump_stack+0x63/0x8c [] warn_alloc+0x13f/0x170 [] __vmalloc_node_range+0x1e6/0x2c0 [] vzalloc+0x54/0x60 [] dev_ethtool+0xb4c/0x1b30 [] dev_ioctl+0x181/0x520 [] sock_do_ioctl+0x42/0x50 Mem-Info: active_anon:435809 inactive_anon:173951 isolated_anon:0 active_file:835822 inactive_file:196932 isolated_file:0 unevictable:0 dirty:8 writeback:0 unstable:0 slab_reclaimable:157732 slab_unreclaimable:10022 mapped:83042 shmem:306356 pagetables:9507 bounce:0 free:130041 free_pcp:1080 free_cma:0 Node 0 active_anon:1743236kB inactive_anon:695804kB active_file:3343288kB inactive_file:787728kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:332168kB dirty:32kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 1225424kB writeback_tmp:0kB unstable:0kB pages_scanned:0 all_unreclaimable? no Node 0 DMA free:15900kB min:136kB low:168kB high:200kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15984kB managed:15900kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 3187 7643 7643 Node 0 DMA32 free:419732kB min:28124kB low:35152kB high:42180kB active_anon:541180kB inactive_anon:248988kB active_file:1466388kB inactive_file:389632kB unevictable:0kB writepending:0kB present:3370280kB managed:3290932kB mlocked:0kB slab_reclaimable:217184kB slab_unreclaimable:4180kB kernel_stack:160kB pagetables:984kB bounce:0kB free_pcp:2236kB local_pcp:660kB free_cma:0kB lowmem_reserve[]: 0 0 4456 4456 Signed-off-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- net/core/ethtool.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 236a21e3c878..d92de0a1f0a4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1405,9 +1405,12 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = vzalloc(reglen); - if (reglen && !regbuf) - return -ENOMEM; + regbuf = NULL; + if (reglen) { + regbuf = vzalloc(reglen); + if (!regbuf) + return -ENOMEM; + } ops->get_regs(dev, ®s, regbuf); -- cgit From 56067812d5b0e737ac2063e94a50f76b810d6ca3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 09:54:05 +0000 Subject: kbuild: modversions: add infrastructure for emitting relative CRCs This add the kbuild infrastructure that will allow architectures to emit vmlinux symbol CRCs as 32-bit offsets to another location in the kernel where the actual value is stored. This works around problems with CRCs being mistaken for relocatable symbols on kernels that self relocate at runtime (i.e., powerpc with CONFIG_RELOCATABLE=y) For the kbuild side of things, this comes down to the following: - introducing a Kconfig symbol MODULE_REL_CRCS - adding a -R switch to genksyms to instruct it to emit the CRC symbols as references into the .rodata section - making modpost distinguish such references from absolute CRC symbols by the section index (SHN_ABS) - making kallsyms disregard non-absolute symbols with a __crc_ prefix Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- init/Kconfig | 4 ++++ scripts/Makefile.build | 2 ++ scripts/genksyms/genksyms.c | 19 ++++++++++++++----- scripts/kallsyms.c | 12 ++++++++++++ scripts/mod/modpost.c | 10 ++++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index e1a937348a3e..4dd8bd232a1d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1987,6 +1987,10 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +config MODULE_REL_CRCS + bool + depends on MODVERSIONS + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help diff --git a/scripts/Makefile.build b/scripts/Makefile.build index eadcd4d359d9..d883116ebaa4 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -164,6 +164,7 @@ cmd_gensymtypes_c = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ + $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS)) \ $(if $(KBUILD_PRESERVE),-p) \ -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) @@ -337,6 +338,7 @@ cmd_gensymtypes_S = \ $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ + $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS)) \ $(if $(KBUILD_PRESERVE),-p) \ -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 06121ce524a7..c9235d8340f1 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -44,7 +44,7 @@ char *cur_filename, *source_file; int in_source_file; static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types, - flag_preserve, flag_warnings; + flag_preserve, flag_warnings, flag_rel_crcs; static const char *mod_prefix = ""; static int errors; @@ -693,7 +693,10 @@ void export_symbol(const char *name) fputs(">\n", debugfile); /* Used as a linker script. */ - printf("%s__crc_%s = 0x%08lx ;\n", mod_prefix, name, crc); + printf(!flag_rel_crcs ? "%s__crc_%s = 0x%08lx;\n" : + "SECTIONS { .rodata : ALIGN(4) { " + "%s__crc_%s = .; LONG(0x%08lx); } }\n", + mod_prefix, name, crc); } } @@ -730,7 +733,7 @@ void error_with_pos(const char *fmt, ...) static void genksyms_usage(void) { - fputs("Usage:\n" "genksyms [-adDTwqhV] > /path/to/.tmp_obj.ver\n" "\n" + fputs("Usage:\n" "genksyms [-adDTwqhVR] > /path/to/.tmp_obj.ver\n" "\n" #ifdef __GNU_LIBRARY__ " -s, --symbol-prefix Select symbol prefix\n" " -d, --debug Increment the debug level (repeatable)\n" @@ -742,6 +745,7 @@ static void genksyms_usage(void) " -q, --quiet Disable warnings (default)\n" " -h, --help Print this message\n" " -V, --version Print the release version\n" + " -R, --relative-crc Emit section relative symbol CRCs\n" #else /* __GNU_LIBRARY__ */ " -s Select symbol prefix\n" " -d Increment the debug level (repeatable)\n" @@ -753,6 +757,7 @@ static void genksyms_usage(void) " -q Disable warnings (default)\n" " -h Print this message\n" " -V Print the release version\n" + " -R Emit section relative symbol CRCs\n" #endif /* __GNU_LIBRARY__ */ , stderr); } @@ -774,13 +779,14 @@ int main(int argc, char **argv) {"preserve", 0, 0, 'p'}, {"version", 0, 0, 'V'}, {"help", 0, 0, 'h'}, + {"relative-crc", 0, 0, 'R'}, {0, 0, 0, 0} }; - while ((o = getopt_long(argc, argv, "s:dwqVDr:T:ph", + while ((o = getopt_long(argc, argv, "s:dwqVDr:T:phR", &long_opts[0], NULL)) != EOF) #else /* __GNU_LIBRARY__ */ - while ((o = getopt(argc, argv, "s:dwqVDr:T:ph")) != EOF) + while ((o = getopt(argc, argv, "s:dwqVDr:T:phR")) != EOF) #endif /* __GNU_LIBRARY__ */ switch (o) { case 's': @@ -823,6 +829,9 @@ int main(int argc, char **argv) case 'h': genksyms_usage(); return 0; + case 'R': + flag_rel_crcs = 1; + break; default: genksyms_usage(); return 1; diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 299b92ca1ae0..5d554419170b 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -219,6 +219,10 @@ static int symbol_valid(struct sym_entry *s) "_SDA2_BASE_", /* ppc */ NULL }; + static char *special_prefixes[] = { + "__crc_", /* modversions */ + NULL }; + static char *special_suffixes[] = { "_veneer", /* arm */ "_from_arm", /* arm */ @@ -259,6 +263,14 @@ static int symbol_valid(struct sym_entry *s) if (strcmp(sym_name, special_symbols[i]) == 0) return 0; + for (i = 0; special_prefixes[i]; i++) { + int l = strlen(special_prefixes[i]); + + if (l <= strlen(sym_name) && + strncmp(sym_name, special_prefixes[i], l) == 0) + return 0; + } + for (i = 0; special_suffixes[i]; i++) { int l = strlen(sym_name) - strlen(special_suffixes[i]); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 29c89a6bad3d..4dedd0d3d3a7 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -621,6 +621,16 @@ static void handle_modversions(struct module *mod, struct elf_info *info, if (strncmp(symname, CRC_PFX, strlen(CRC_PFX)) == 0) { is_crc = true; crc = (unsigned int) sym->st_value; + if (sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_ABS) { + unsigned int *crcp; + + /* symbol points to the CRC in the ELF object */ + crcp = (void *)info->hdr + sym->st_value + + info->sechdrs[sym->st_shndx].sh_offset - + (info->hdr->e_type != ET_REL ? + info->sechdrs[sym->st_shndx].sh_addr : 0); + crc = *crcp; + } sym_update_crc(symname + strlen(CRC_PFX), mod, crc, export); } -- cgit From 71810db27c1c853b335675bee335d893bc3d324b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 09:54:06 +0000 Subject: modversions: treat symbol CRCs as 32 bit quantities The modversion symbol CRCs are emitted as ELF symbols, which allows us to easily populate the kcrctab sections by relying on the linker to associate each kcrctab slot with the correct value. This has a couple of downsides: - Given that the CRCs are treated as memory addresses, we waste 4 bytes for each CRC on 64 bit architectures, - On architectures that support runtime relocation, a R__RELATIVE relocation entry is emitted for each CRC value, which identifies it as a quantity that requires fixing up based on the actual runtime load offset of the kernel. This results in corrupted CRCs unless we explicitly undo the fixup (and this is currently being handled in the core module code) - Such runtime relocation entries take up 24 bytes of __init space each, resulting in a x8 overhead in [uncompressed] kernel size for CRCs. Switching to explicit 32 bit values on 64 bit architectures fixes most of these issues, given that 32 bit values are not treated as quantities that require fixing up based on the actual runtime load offset. Note that on some ELF64 architectures [such as PPC64], these 32-bit values are still emitted as [absolute] runtime relocatable quantities, even if the value resolves to a build time constant. Since relative relocations are always resolved at build time, this patch enables MODULE_REL_CRCS on powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC references into relative references into .rodata where the actual CRC value is stored. So redefine all CRC fields and variables as u32, and redefine the __CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using inline assembler (which is necessary since 64-bit C code cannot use 32-bit types to hold memory addresses, even if they are ultimately resolved using values that do not exceed 0xffffffff). To avoid potential problems with legacy 32-bit architectures using legacy toolchains, the equivalent C definition of the kcrctab entry is retained for 32-bit architectures. Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y") Acked-by: Rusty Russell Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/module.h | 4 --- arch/powerpc/kernel/module_64.c | 8 ------ include/asm-generic/export.h | 11 ++++---- include/linux/export.h | 14 +++++++++++ include/linux/module.h | 14 +++++------ kernel/module.c | 53 ++++++++++++++++++--------------------- 7 files changed, 53 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a8ee573fe610..db8a1ef6bfaf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -484,6 +484,7 @@ config RELOCATABLE bool "Build a relocatable kernel" depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) select NONSTATIC_KERNEL + select MODULE_REL_CRCS if MODVERSIONS help This builds a kernel image that is capable of running at the location the kernel is loaded at. For ppc32, there is no any diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index cc12c61ef315..53885512b8d3 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -90,9 +90,5 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec } #endif -#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) -#define ARCH_RELOCATES_KCRCTAB -#define reloc_start PHYSICAL_START -#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index bb1807184bad..0b0f89685b67 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -286,14 +286,6 @@ static void dedotify_versions(struct modversion_info *vers, for (end = (void *)vers + size; vers < end; vers++) if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); -#ifdef ARCH_RELOCATES_KCRCTAB - /* The TOC symbol has no CRC computed. To avoid CRC - * check failing, we must force it to the expected - * value (see CRC check in module.c). - */ - if (!strcmp(vers->name, "TOC.")) - vers->crc = -(unsigned long)reloc_start; -#endif } } diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 63554e9f6e0c..719db1968d81 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -9,18 +9,15 @@ #ifndef KSYM_ALIGN #define KSYM_ALIGN 8 #endif -#ifndef KCRC_ALIGN -#define KCRC_ALIGN 8 -#endif #else #define __put .long #ifndef KSYM_ALIGN #define KSYM_ALIGN 4 #endif +#endif #ifndef KCRC_ALIGN #define KCRC_ALIGN 4 #endif -#endif #ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX #define KSYM(name) _##name @@ -52,7 +49,11 @@ KSYM(__kstrtab_\name): .section ___kcrctab\sec+\name,"a" .balign KCRC_ALIGN KSYM(__kcrctab_\name): - __put KSYM(__crc_\name) +#if defined(CONFIG_MODULE_REL_CRCS) + .long KSYM(__crc_\name) - . +#else + .long KSYM(__crc_\name) +#endif .weak KSYM(__crc_\name) .previous #endif diff --git a/include/linux/export.h b/include/linux/export.h index 2a0f61fbc731..7473fba6a60c 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -43,6 +43,13 @@ extern struct module __this_module; #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ +#if defined(CONFIG_MODULE_REL_CRCS) +#define __CRC_SYMBOL(sym, sec) \ + asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ + " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " - . \n" \ + " .previous \n"); +#elif !defined(CONFIG_64BIT) #define __CRC_SYMBOL(sym, sec) \ extern __visible void *__crc_##sym __attribute__((weak)); \ static const unsigned long __kcrctab_##sym \ @@ -50,6 +57,13 @@ extern struct module __this_module; __attribute__((section("___kcrctab" sec "+" #sym), used)) \ = (unsigned long) &__crc_##sym; #else +#define __CRC_SYMBOL(sym, sec) \ + asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ + " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .previous \n"); +#endif +#else #define __CRC_SYMBOL(sym, sec) #endif diff --git a/include/linux/module.h b/include/linux/module.h index 7c84273d60b9..cc7cba219b20 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -346,7 +346,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const unsigned long *crcs; + const s32 *crcs; unsigned int num_syms; /* Kernel parameters. */ @@ -359,18 +359,18 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const unsigned long *gpl_crcs; + const s32 *gpl_crcs; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ const struct kernel_symbol *unused_syms; - const unsigned long *unused_crcs; + const s32 *unused_crcs; unsigned int num_unused_syms; /* GPL-only, unused exported symbols. */ unsigned int num_unused_gpl_syms; const struct kernel_symbol *unused_gpl_syms; - const unsigned long *unused_gpl_crcs; + const s32 *unused_gpl_crcs; #endif #ifdef CONFIG_MODULE_SIG @@ -382,7 +382,7 @@ struct module { /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; - const unsigned long *gpl_future_crcs; + const s32 *gpl_future_crcs; unsigned int num_gpl_future_syms; /* Exception table */ @@ -523,7 +523,7 @@ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; - const unsigned long *crcs; + const s32 *crcs; enum { NOT_GPL_ONLY, GPL_ONLY, @@ -539,7 +539,7 @@ struct symsearch { */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, - const unsigned long **crc, + const s32 **crc, bool gplok, bool warn); diff --git a/kernel/module.c b/kernel/module.c index 38d4270925d4..3d8f126208e3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -389,16 +389,16 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const unsigned long __start___kcrctab[]; -extern const unsigned long __start___kcrctab_gpl[]; -extern const unsigned long __start___kcrctab_gpl_future[]; +extern const s32 __start___kcrctab[]; +extern const s32 __start___kcrctab_gpl[]; +extern const s32 __start___kcrctab_gpl_future[]; #ifdef CONFIG_UNUSED_SYMBOLS extern const struct kernel_symbol __start___ksymtab_unused[]; extern const struct kernel_symbol __stop___ksymtab_unused[]; extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; -extern const unsigned long __start___kcrctab_unused[]; -extern const unsigned long __start___kcrctab_unused_gpl[]; +extern const s32 __start___kcrctab_unused[]; +extern const s32 __start___kcrctab_unused_gpl[]; #endif #ifndef CONFIG_MODVERSIONS @@ -497,7 +497,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const unsigned long *crc; + const s32 *crc; const struct kernel_symbol *sym; }; @@ -563,7 +563,7 @@ static bool find_symbol_in_section(const struct symsearch *syms, * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, - const unsigned long **crc, + const s32 **crc, bool gplok, bool warn) { @@ -1249,23 +1249,17 @@ static int try_to_force_load(struct module *mod, const char *reason) } #ifdef CONFIG_MODVERSIONS -/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */ -static unsigned long maybe_relocated(unsigned long crc, - const struct module *crc_owner) + +static u32 resolve_rel_crc(const s32 *crc) { -#ifdef ARCH_RELOCATES_KCRCTAB - if (crc_owner == NULL) - return crc - (unsigned long)reloc_start; -#endif - return crc; + return *(u32 *)((void *)crc + *crc); } static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc, - const struct module *crc_owner) + const s32 *crc) { unsigned int i, num_versions; struct modversion_info *versions; @@ -1283,13 +1277,19 @@ static int check_version(Elf_Shdr *sechdrs, / sizeof(struct modversion_info); for (i = 0; i < num_versions; i++) { + u32 crcval; + if (strcmp(versions[i].name, symname) != 0) continue; - if (versions[i].crc == maybe_relocated(*crc, crc_owner)) + if (IS_ENABLED(CONFIG_MODULE_REL_CRCS)) + crcval = resolve_rel_crc(crc); + else + crcval = *crc; + if (versions[i].crc == crcval) return 1; - pr_debug("Found checksum %lX vs module %lX\n", - maybe_relocated(*crc, crc_owner), versions[i].crc); + pr_debug("Found checksum %X vs module %lX\n", + crcval, versions[i].crc); goto bad_version; } @@ -1307,7 +1307,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, unsigned int versindex, struct module *mod) { - const unsigned long *crc; + const s32 *crc; /* * Since this should be found in kernel (which can't be removed), no @@ -1321,8 +1321,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, } preempt_enable(); return check_version(sechdrs, versindex, - VMLINUX_SYMBOL_STR(module_layout), mod, crc, - NULL); + VMLINUX_SYMBOL_STR(module_layout), mod, crc); } /* First part is kernel version, which we ignore if module has crcs. */ @@ -1340,8 +1339,7 @@ static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc, - const struct module *crc_owner) + const s32 *crc) { return 1; } @@ -1368,7 +1366,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, { struct module *owner; const struct kernel_symbol *sym; - const unsigned long *crc; + const s32 *crc; int err; /* @@ -1383,8 +1381,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, if (!sym) goto unlock; - if (!check_version(info->sechdrs, info->index.vers, name, mod, crc, - owner)) { + if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) { sym = ERR_PTR(-EINVAL); goto getname; } -- cgit From 4b9eee96fcb361a5e16a8d2619825e8a048f81f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 09:54:07 +0000 Subject: module: unify absolute krctab definitions for 32-bit and 64-bit The previous patch introduced a separate inline asm version of the krcrctab declaration template for use with 64-bit architectures, which cannot refer to ELF symbols using 32-bit quantities. This declaration should be equivalent to the C one for 32-bit architectures, but just in case - unify them in a separate patch, which can simply be dropped if it turns out to break anything. Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/export.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/linux/export.h b/include/linux/export.h index 7473fba6a60c..1a1dfdb2a5c6 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -49,13 +49,6 @@ extern struct module __this_module; " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " - . \n" \ " .previous \n"); -#elif !defined(CONFIG_64BIT) -#define __CRC_SYMBOL(sym, sec) \ - extern __visible void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ - __used \ - __attribute__((section("___kcrctab" sec "+" #sym), used)) \ - = (unsigned long) &__crc_##sym; #else #define __CRC_SYMBOL(sym, sec) \ asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ -- cgit From 00c87e9a70a17b355b81c36adedf05e84f54e10d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Wed, 1 Feb 2017 14:19:53 +0100 Subject: KVM: x86: do not save guest-unsupported XSAVE state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saving unsupported state prevents migration when the new host does not support a XSAVE feature of the original host, even if the feature is not exposed to the guest. We've masked host features with guest-visible features before, with 4344ee981e21 ("KVM: x86: only copy XSAVE state for the supported features") and dropped it when implementing XSAVES. Do it again. Fixes: df1daba7d1cb ("KVM: x86: support XSAVES usage in the host") Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d153be8929a6..e52c9088660f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3182,6 +3182,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) memcpy(dest, xsave, XSAVE_HDR_OFFSET); /* Set XSTATE_BV */ + xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; /* -- cgit From 29905b52fad0854351f57bab867647e4982285bf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 18:05:26 +0000 Subject: log2: make order_base_2() behave correctly on const input value zero The function order_base_2() is defined (according to the comment block) as returning zero on input zero, but subsequently passes the input into roundup_pow_of_two(), which is explicitly undefined for input zero. This has gone unnoticed until now, but optimization passes in GCC 7 may produce constant folded function instances where a constant value of zero is passed into order_base_2(), resulting in link errors against the deliberately undefined '____ilog2_NaN'. So update order_base_2() to adhere to its own documented interface. [ See http://marc.info/?l=linux-kernel&m=147672952517795&w=2 and follow-up discussion for more background. The gcc "optimization pass" is really just broken, but now the GCC trunk problem seems to have escaped out of just specially built daily images, so we need to work around it in mainline. - Linus ] Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/log2.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index fd7ff3d91e6a..ef3d4f67118c 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -203,6 +203,17 @@ unsigned long __rounddown_pow_of_two(unsigned long n) * ... and so on. */ -#define order_base_2(n) ilog2(roundup_pow_of_two(n)) +static inline __attribute_const__ +int __order_base_2(unsigned long n) +{ + return n > 1 ? ilog2(n - 1) + 1 : 0; +} +#define order_base_2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) ? 0 : \ + ilog2((n) - 1) + 1) : \ + __order_base_2(n) \ +) #endif /* _LINUX_LOG2_H */ -- cgit From e471486c13b82b1338d49c798f78bb62b1ed0a9e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 2 Feb 2017 10:31:00 -0800 Subject: acpi, nfit: fix acpi_nfit_flush_probe() crash We queue an on-stack work item to 'nfit_wq' and wait for it to complete as part of a 'flush_probe' request. However, if the user cancels the wait we need to make sure the item is flushed from the queue otherwise we are leaving an out-of-scope stack address on the work list. BUG: unable to handle kernel paging request at ffffbcb3c72f7cd0 IP: [] __list_add+0x1b/0xb0 [..] RIP: 0010:[] [] __list_add+0x1b/0xb0 RSP: 0018:ffffbcb3c7ba7c00 EFLAGS: 00010046 [..] Call Trace: [] insert_work+0x3a/0xc0 [] ? seq_open+0x5a/0xa0 [] __queue_work+0x16a/0x460 [] queue_work_on+0x38/0x40 [] acpi_nfit_flush_probe+0x95/0xc0 [nfit] [] ? nfit_visible+0x40/0x40 [nfit] [] wait_probe_show+0x25/0x60 [] dev_attr_show+0x20/0x50 Fixes: 7ae0fa439faf ("nfit, libnvdimm: async region scrub workqueue") Cc: Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 2f82b8eba360..7361d00818e2 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2704,6 +2704,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); struct device *dev = acpi_desc->dev; struct acpi_nfit_flush_work flush; + int rc; /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ device_lock(dev); @@ -2716,7 +2717,10 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) INIT_WORK_ONSTACK(&flush.work, flush_probe); COMPLETION_INITIALIZER_ONSTACK(flush.cmp); queue_work(nfit_wq, &flush.work); - return wait_for_completion_interruptible(&flush.cmp); + + rc = wait_for_completion_interruptible(&flush.cmp); + cancel_work_sync(&flush.work); + return rc; } static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, -- cgit From 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2017 10:31:35 -0800 Subject: net: use a work queue to defer net_disable_timestamp() work Dmitry reported a warning [1] showing that we were calling net_disable_timestamp() -> static_key_slow_dec() from a non process context. Grabbing a mutex while holding a spinlock or rcu_read_lock() is not allowed. As Cong suggested, we now use a work queue. It is possible netstamp_clear() exits while netstamp_needed_deferred is not zero, but it is probably not worth trying to do better than that. netstamp_needed_deferred atomic tracks the exact number of deferred decrements. [1] [ INFO: suspicious RCU usage. ] 4.10.0-rc5+ #192 Not tainted ------------------------------- ./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by syz-executor14/23111: #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock include/net/sock.h:1454 [inline] #0: (sk_lock-AF_INET6){+.+.+.}, at: [] rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919 #1: (rcu_read_lock){......}, at: [] nf_hook include/linux/netfilter.h:201 [inline] #1: (rcu_read_lock){......}, at: [] __ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160 stack backtrace: CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline] ___might_sleep+0x560/0x650 kernel/sched/core.c:7748 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559 RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005 RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000 R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:752 in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14 INFO: lockdep is turned off. CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sock_wfree+0xae/0x120 net/core/sock.c:1645 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put include/net/inet_frag.h:133 [inline] nf_ct_frag6_gather+0x1106/0x3840 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline] nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook include/linux/netfilter.h:212 [inline] __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline] rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872 vfs_writev+0x87/0xc0 fs/read_write.c:911 do_writev+0x110/0x2c0 fs/read_write.c:944 SYSC_writev fs/read_write.c:1017 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1014 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x445559 Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") Suggested-by: Cong Wang Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7f218e095361..29101c98399f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1695,24 +1695,19 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call static_key_slow_dec() from irq context - * If net_disable_timestamp() is called from irq context, defer the - * static_key_slow_dec() calls. - */ static atomic_t netstamp_needed_deferred; -#endif - -void net_enable_timestamp(void) +static void netstamp_clear(struct work_struct *work) { -#ifdef HAVE_JUMP_LABEL int deferred = atomic_xchg(&netstamp_needed_deferred, 0); - if (deferred) { - while (--deferred) - static_key_slow_dec(&netstamp_needed); - return; - } + while (deferred--) + static_key_slow_dec(&netstamp_needed); +} +static DECLARE_WORK(netstamp_work, netstamp_clear); #endif + +void net_enable_timestamp(void) +{ static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1720,12 +1715,12 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - if (in_interrupt()) { - atomic_inc(&netstamp_needed_deferred); - return; - } -#endif + /* net_disable_timestamp() can be called from non process context */ + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_dec(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_disable_timestamp); -- cgit From 0d5415b489f68b58e1983a53793d25d53098ed4b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 3 Feb 2017 05:43:52 +0200 Subject: Revert "vring: Force use of DMA API for ARM-based systems with legacy devices" This reverts commit c7070619f3408d9a0dffbed9149e6f00479cf43b. This has been shown to regress on some ARM systems: by forcing on DMA API usage for ARM systems, we have inadvertently kicked open a hornets' nest in terms of cache-coherency. Namely that unless the virtio device is explicitly described as capable of coherent DMA by firmware, the DMA APIs on ARM and other DT-based platforms will assume it is non-coherent. This turns out to cause a big problem for the likes of QEMU and kvmtool, which generate virtio-mmio devices in their guest DTs but neglect to add the often-overlooked "dma-coherent" property; as a result, we end up with the guest making non-cacheable accesses to the vring, the host doing so cacheably, both talking past each other and things going horribly wrong. We are working on a safer work-around. Fixes: c7070619f340 ("vring: Force use of DMA API for ARM-based systems with legacy devices") Reported-by: Robin Murphy Cc: Signed-off-by: Will Deacon Signed-off-by: Michael S. Tsirkin Acked-by: Marc Zyngier --- drivers/virtio/virtio_ring.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7e38ed79c3fc..409aeaa49246 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -159,13 +159,6 @@ static bool vring_use_dma_api(struct virtio_device *vdev) if (xen_domain()) return true; - /* - * On ARM-based machines, the DMA ops will do the right thing, - * so always use them with legacy devices. - */ - if (IS_ENABLED(CONFIG_ARM) || IS_ENABLED(CONFIG_ARM64)) - return !virtio_has_feature(vdev, VIRTIO_F_VERSION_1); - return false; } -- cgit From cda8bba0f99d25d2061c531113c14fa41effc3ae Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 30 Jan 2017 11:09:36 +0100 Subject: vhost: fix initialization for vq->is_le Currently, under certain circumstances vhost_init_is_le does just a part of the initialization job, and depends on vhost_reset_is_le being called too. For this reason vhost_vq_init_access used to call vhost_reset_is_le when vq->private_data is NULL. This is not only counter intuitive, but also real a problem because it breaks vhost_net. The bug was introduced to vhost_net with commit 2751c9882b94 ("vhost: cross-endian support for legacy devices"). The symptom is corruption of the vq's used.idx field (virtio) after VHOST_NET_SET_BACKEND was issued as a part of the vhost shutdown on a vq with pending descriptors. Let us make sure the outcome of vhost_init_is_le never depend on the state it is actually supposed to initialize, and fix virtio_net by removing the reset from vhost_vq_init_access. With the above, there is no reason for vhost_reset_is_le to do just half of the job. Let us make vhost_reset_is_le reinitialize is_le. Signed-off-by: Halil Pasic Reported-by: Michael A. Tebolt Reported-by: Dr. David Alan Gilbert Fixes: commit 2751c9882b94 ("vhost: cross-endian support for legacy devices") Cc: Signed-off-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Tested-by: Michael A. Tebolt --- drivers/vhost/vhost.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d6432603880c..8f99fe08de02 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -130,14 +130,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_init_is_le(vq); } struct vhost_flush_struct { @@ -1714,10 +1714,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) int r; bool is_le = vq->is_le; - if (!vq->private_data) { - vhost_reset_is_le(vq); + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); -- cgit From 79134d11d030b886106bf45a5638c1ccb1f0856c Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Fri, 3 Feb 2017 16:48:14 +0530 Subject: MAINTAINERS: update email address for Amit Shah I'm leaving my job at Red Hat, this email address will stop working next week. Update it to one that I will have access to later. Signed-off-by: Amit Shah Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5f10c28b2e15..e0f979294e23 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13066,7 +13066,7 @@ F: drivers/input/serio/userio.c F: include/uapi/linux/userio.h VIRTIO CONSOLE DRIVER -M: Amit Shah +M: Amit Shah L: virtualization@lists.linux-foundation.org S: Maintained F: drivers/char/virtio_console.c -- cgit From d7b028f56a971a2e4d8d7887540a144eeefcd4ab Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 3 Feb 2017 13:13:09 -0800 Subject: zswap: disable changing params if init fails Add zswap_init_failed bool that prevents changing any of the module params, if init_zswap() fails, and set zswap_enabled to false. Change 'enabled' param to a callback, and check zswap_init_failed before allowing any change to 'enabled', 'zpool', or 'compressor' params. Any driver that is built-in to the kernel will not be unloaded if its init function returns error, and its module params remain accessible for users to change via sysfs. Since zswap uses param callbacks, which assume that zswap has been initialized, changing the zswap params after a failed initialization will result in WARNING due to the param callbacks expecting a pool to already exist. This prevents that by immediately exiting any of the param callbacks if initialization failed. This was reported here: https://marc.info/?l=linux-mm&m=147004228125528&w=4 And fixes this WARNING: [ 429.723476] WARNING: CPU: 0 PID: 5140 at mm/zswap.c:503 __zswap_pool_current+0x56/0x60 The warning is just noise, and not serious. However, when init fails, zswap frees all its percpu dstmem pages and its kmem cache. The kmem cache might be serious, if kmem_cache_alloc(NULL, gfp) has problems; but the percpu dstmem pages are definitely a problem, as they're used as temporary buffer for compressed pages before copying into place in the zpool. If the user does get zswap enabled after an init failure, then zswap will likely Oops on the first page it tries to compress (or worse, start corrupting memory). Fixes: 90b0fc26d5db ("zswap: change zpool/compressor at runtime") Link: http://lkml.kernel.org/r/20170124200259.16191-2-ddstreet@ieee.org Signed-off-by: Dan Streetman Reported-by: Marcin Miroslaw Cc: Seth Jennings Cc: Michal Hocko Cc: Sergey Senozhatsky Cc: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zswap.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index 067a0d62f318..cabf09e0128b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry; /* Enable/disable zswap (disabled by default) */ static bool zswap_enabled; -module_param_named(enabled, zswap_enabled, bool, 0644); +static int zswap_enabled_param_set(const char *, + const struct kernel_param *); +static struct kernel_param_ops zswap_enabled_param_ops = { + .set = zswap_enabled_param_set, + .get = param_get_bool, +}; +module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" @@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; +/* fatal error during init */ +static bool zswap_init_failed; + /********************************* * helpers and fwd declarations **********************************/ @@ -624,6 +633,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *s = strstrip((char *)val); int ret; + if (zswap_init_failed) { + pr_err("can't set param, initialization failed\n"); + return -ENODEV; + } + /* no change required */ if (!strcmp(s, *(char **)kp->arg)) return 0; @@ -703,6 +717,17 @@ static int zswap_zpool_param_set(const char *val, return __zswap_param_set(val, kp, NULL, zswap_compressor); } +static int zswap_enabled_param_set(const char *val, + const struct kernel_param *kp) +{ + if (zswap_init_failed) { + pr_err("can't enable, initialization failed\n"); + return -ENODEV; + } + + return param_set_bool(val, kp); +} + /********************************* * writeback code **********************************/ @@ -1201,6 +1226,9 @@ hp_fail: dstmem_fail: zswap_entry_cache_destroy(); cache_fail: + /* if built-in, we aren't unloaded on failure; don't allow use */ + zswap_init_failed = true; + zswap_enabled = false; return -ENOMEM; } /* must be late so crypto has time to come up */ -- cgit From 4f40c6e5627ea73b4e7c615c59631f38cc880885 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Feb 2017 13:13:12 -0800 Subject: kasan: respect /proc/sys/kernel/traceoff_on_warning After much waiting I finally reproduced a KASAN issue, only to find my trace-buffer empty of useful information because it got spooled out :/ Make kasan_report honour the /proc/sys/kernel/traceoff_on_warning interface. Link: http://lkml.kernel.org/r/20170125164106.3514-1-aryabinin@virtuozzo.com Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Andrey Ryabinin Acked-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index b82b3e215157..f479365530b6 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -300,6 +301,8 @@ void kasan_report(unsigned long addr, size_t size, if (likely(!kasan_report_enabled())) return; + disable_trace_on_warning(); + info.access_addr = (void *)addr; info.access_size = size; info.is_write = is_write; -- cgit From 253fd0f02040a19c6fe80e4171659fa3482a422d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 3 Feb 2017 13:13:15 -0800 Subject: shmem: fix sleeping from atomic context Syzkaller fuzzer managed to trigger this: BUG: sleeping function called from invalid context at mm/shmem.c:852 in_atomic(): 1, irqs_disabled(): 0, pid: 529, name: khugepaged 3 locks held by khugepaged/529: #0: (shrinker_rwsem){++++..}, at: [] shrink_slab.part.59+0x121/0xd30 mm/vmscan.c:451 #1: (&type->s_umount_key#29){++++..}, at: [] trylock_super+0x20/0x100 fs/super.c:392 #2: (&(&sbinfo->shrinklist_lock)->rlock){+.+.-.}, at: [] spin_lock include/linux/spinlock.h:302 [inline] #2: (&(&sbinfo->shrinklist_lock)->rlock){+.+.-.}, at: [] shmem_unused_huge_shrink+0x28e/0x1490 mm/shmem.c:427 CPU: 2 PID: 529 Comm: khugepaged Not tainted 4.10.0-rc5+ #201 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: shmem_undo_range+0xb20/0x2710 mm/shmem.c:852 shmem_truncate_range+0x27/0xa0 mm/shmem.c:939 shmem_evict_inode+0x35f/0xca0 mm/shmem.c:1030 evict+0x46e/0x980 fs/inode.c:553 iput_final fs/inode.c:1515 [inline] iput+0x589/0xb20 fs/inode.c:1542 shmem_unused_huge_shrink+0xbad/0x1490 mm/shmem.c:446 shmem_unused_huge_scan+0x10c/0x170 mm/shmem.c:512 super_cache_scan+0x376/0x450 fs/super.c:106 do_shrink_slab mm/vmscan.c:378 [inline] shrink_slab.part.59+0x543/0xd30 mm/vmscan.c:481 shrink_slab mm/vmscan.c:2592 [inline] shrink_node+0x2c7/0x870 mm/vmscan.c:2592 shrink_zones mm/vmscan.c:2734 [inline] do_try_to_free_pages+0x369/0xc80 mm/vmscan.c:2776 try_to_free_pages+0x3c6/0x900 mm/vmscan.c:2982 __perform_reclaim mm/page_alloc.c:3301 [inline] __alloc_pages_direct_reclaim mm/page_alloc.c:3322 [inline] __alloc_pages_slowpath+0xa24/0x1c30 mm/page_alloc.c:3683 __alloc_pages_nodemask+0x544/0xae0 mm/page_alloc.c:3848 __alloc_pages include/linux/gfp.h:426 [inline] __alloc_pages_node include/linux/gfp.h:439 [inline] khugepaged_alloc_page+0xc2/0x1b0 mm/khugepaged.c:750 collapse_huge_page+0x182/0x1fe0 mm/khugepaged.c:955 khugepaged_scan_pmd+0xfdf/0x12a0 mm/khugepaged.c:1208 khugepaged_scan_mm_slot mm/khugepaged.c:1727 [inline] khugepaged_do_scan mm/khugepaged.c:1808 [inline] khugepaged+0xe9b/0x1590 mm/khugepaged.c:1853 kthread+0x326/0x3f0 kernel/kthread.c:227 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 The iput() from atomic context was a bad idea: if after igrab() somebody else calls iput() and we left with the last inode reference, our iput() would lead to inode eviction and therefore sleeping. This patch should fix the situation. Link: http://lkml.kernel.org/r/20170131093141.GA15899@node.shutemov.name Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index bb53285a1d99..3a7587a0314d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -415,6 +415,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_split) { LIST_HEAD(list), *pos, *next; + LIST_HEAD(to_remove); struct inode *inode; struct shmem_inode_info *info; struct page *page; @@ -441,9 +442,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, /* Check if there's anything to gain */ if (round_up(inode->i_size, PAGE_SIZE) == round_up(inode->i_size, HPAGE_PMD_SIZE)) { - list_del_init(&info->shrinklist); + list_move(&info->shrinklist, &to_remove); removed++; - iput(inode); goto next; } @@ -454,6 +454,13 @@ next: } spin_unlock(&sbinfo->shrinklist_lock); + list_for_each_safe(pos, next, &to_remove) { + info = list_entry(pos, struct shmem_inode_info, shrinklist); + inode = &info->vfs_inode; + list_del_init(&info->shrinklist); + iput(inode); + } + list_for_each_safe(pos, next, &list) { int ret; -- cgit From 35f860f9ba6aac56cc38e8b18916d833a83f1157 Mon Sep 17 00:00:00 2001 From: David Lin Date: Fri, 3 Feb 2017 13:13:18 -0800 Subject: jump label: pass kbuild_cflags when checking for asm goto support Some versions of ARM GCC compiler such as Android toolchain throws in a '-fpic' flag by default. This causes the gcc-goto check script to fail although some config would have '-fno-pic' flag in the KBUILD_CFLAGS. This patch passes the KBUILD_CFLAGS to the check script so that the script does not rely on the default config from different compilers. Link: http://lkml.kernel.org/r/20170120234329.78868-1-dtwlin@google.com Signed-off-by: David Lin Acked-by: Steven Rostedt Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96b27a888285..d7b758a1ce14 100644 --- a/Makefile +++ b/Makefile @@ -797,7 +797,7 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) KBUILD_ARFLAGS := $(call ar-option,D) # check for 'asm goto' -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif -- cgit From deb88a2a19e85842d79ba96b05031739ec327ff4 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 3 Feb 2017 13:13:20 -0800 Subject: mm/memory_hotplug.c: check start_pfn in test_pages_in_a_zone() Patch series "fix a kernel oops when reading sysfs valid_zones", v2. A sysfs memory file is created for each 2GiB memory block on x86-64 when the system has 64GiB or more memory. [1] When the start address of a memory block is not backed by struct page, i.e. a memory range is not aligned by 2GiB, reading its 'valid_zones' attribute file leads to a kernel oops. This issue was observed on multiple x86-64 systems with more than 64GiB of memory. This patch-set fixes this issue. Patch 1 first fixes an issue in test_pages_in_a_zone(), which does not test the start section. Patch 2 then fixes the kernel oops by extending test_pages_in_a_zone() to return valid [start, end). Note for stable kernels: The memory block size change was made by commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems"), which was accepted to 3.9. However, this patch-set depends on (and fixes) the change to test_pages_in_a_zone() made by commit 5f0f2887f4de ("mm/memory_hotplug.c: check for missing sections in test_pages_in_a_zone()"), which was accepted to 4.4. So, I recommend that we backport it up to 4.4. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' This patch (of 2): test_pages_in_a_zone() does not check 'start_pfn' when it is aligned by section since 'sec_end_pfn' is set equal to 'pfn'. Since this function is called for testing the range of a sysfs memory file, 'start_pfn' is always aligned by section. Fix it by properly setting 'sec_end_pfn' to the next section pfn. Also make sure that this function returns 1 only when the range belongs to a zone. Link: http://lkml.kernel.org/r/20170127222149.30893-2-toshi.kani@hpe.com Signed-off-by: Toshi Kani Cc: Andrew Banman Cc: Reza Arbab Cc: Greg KH Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ca2723d47338..1218f73890b6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1483,7 +1483,7 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) } /* - * Confirm all pages in a range [start, end) is belongs to the same zone. + * Confirm all pages in a range [start, end) belong to the same zone. */ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) { @@ -1491,9 +1491,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) struct zone *zone = NULL; struct page *page; int i; - for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); pfn < end_pfn; - pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { + pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { /* Make sure the memory section is present first */ if (!present_section_nr(pfn_to_section_nr(pfn))) continue; @@ -1512,7 +1512,11 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) zone = page_zone(page); } } - return 1; + + if (zone) + return 1; + else + return 0; } /* -- cgit From a96dfddbcc04336bbed50dc2b24823e45e09e80c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 3 Feb 2017 13:13:23 -0800 Subject: base/memory, hotplug: fix a kernel oops in show_valid_zones() Reading a sysfs "memoryN/valid_zones" file leads to the following oops when the first page of a range is not backed by struct page. show_valid_zones() assumes that 'start_pfn' is always valid for page_zone(). BUG: unable to handle kernel paging request at ffffea017a000000 IP: show_valid_zones+0x6f/0x160 This issue may happen on x86-64 systems with 64GiB or more memory since their memory block size is bumped up to 2GiB. [1] An example of such systems is desribed below. 0x3240000000 is only aligned by 1GiB and this memory block starts from 0x3200000000, which is not backed by struct page. BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable Since test_pages_in_a_zone() already checks holes, fix this issue by extending this function to return 'valid_start' and 'valid_end' for a given range. show_valid_zones() then proceeds with the valid range. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com Signed-off-by: Toshi Kani Cc: Greg Kroah-Hartman Cc: Zhang Zhen Cc: Reza Arbab Cc: David Rientjes Cc: Dan Williams Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 12 ++++++------ include/linux/memory_hotplug.h | 3 ++- mm/memory_hotplug.c | 20 +++++++++++++++----- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index dacb6a8418aa..fa26ffd25fa6 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -389,33 +389,33 @@ static ssize_t show_valid_zones(struct device *dev, { struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn, end_pfn; + unsigned long valid_start, valid_end, valid_pages; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - struct page *first_page; struct zone *zone; int zone_shift = 0; start_pfn = section_nr_to_pfn(mem->start_section_nr); end_pfn = start_pfn + nr_pages; - first_page = pfn_to_page(start_pfn); /* The block contains more than one zone can not be offlined. */ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return sprintf(buf, "none\n"); - zone = page_zone(first_page); + zone = page_zone(pfn_to_page(valid_start)); + valid_pages = valid_end - valid_start; /* MMOP_ONLINE_KEEP */ sprintf(buf, "%s", zone->name); /* MMOP_ONLINE_KERNEL */ - zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); + zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); } /* MMOP_ONLINE_MOVABLE */ - zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); + zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c1784c0b4f35..134a2f69c21a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long, int); -extern int test_pages_in_a_zone(unsigned long, unsigned long); +extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1218f73890b6..b8c11e063ff0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1484,10 +1484,13 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) /* * Confirm all pages in a range [start, end) belong to the same zone. + * When true, return its valid [start, end). */ -int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) +int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end) { unsigned long pfn, sec_end_pfn; + unsigned long start, end; struct zone *zone = NULL; struct page *page; int i; @@ -1509,14 +1512,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; + if (!zone) + start = pfn + i; zone = page_zone(page); + end = pfn + MAX_ORDER_NR_PAGES; } } - if (zone) + if (zone) { + *valid_start = start; + *valid_end = end; return 1; - else + } else { return 0; + } } /* @@ -1843,6 +1852,7 @@ static int __ref __offline_pages(unsigned long start_pfn, long offlined_pages; int ret, drain, retry_max, node; unsigned long flags; + unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; @@ -1853,10 +1863,10 @@ static int __ref __offline_pages(unsigned long start_pfn, return -EINVAL; /* This makes hotplug much easier...and readable. we assume this for now. .*/ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return -EINVAL; - zone = page_zone(pfn_to_page(start_pfn)); + zone = page_zone(pfn_to_page(valid_start)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; -- cgit From d1908f52557b3230fbd63c0429f3b4b748bf2b6d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 3 Feb 2017 13:13:26 -0800 Subject: fs: break out of iomap_file_buffered_write on fatal signals Tetsuo has noticed that an OOM stress test which performs large write requests can cause the full memory reserves depletion. He has tracked this down to the following path __alloc_pages_nodemask+0x436/0x4d0 alloc_pages_current+0x97/0x1b0 __page_cache_alloc+0x15d/0x1a0 mm/filemap.c:728 pagecache_get_page+0x5a/0x2b0 mm/filemap.c:1331 grab_cache_page_write_begin+0x23/0x40 mm/filemap.c:2773 iomap_write_begin+0x50/0xd0 fs/iomap.c:118 iomap_write_actor+0xb5/0x1a0 fs/iomap.c:190 ? iomap_write_end+0x80/0x80 fs/iomap.c:150 iomap_apply+0xb3/0x130 fs/iomap.c:79 iomap_file_buffered_write+0x68/0xa0 fs/iomap.c:243 ? iomap_write_end+0x80/0x80 xfs_file_buffered_aio_write+0x132/0x390 [xfs] ? remove_wait_queue+0x59/0x60 xfs_file_write_iter+0x90/0x130 [xfs] __vfs_write+0xe5/0x140 vfs_write+0xc7/0x1f0 ? syscall_trace_enter+0x1d0/0x380 SyS_write+0x58/0xc0 do_syscall_64+0x6c/0x200 entry_SYSCALL64_slow_path+0x25/0x25 the oom victim has access to all memory reserves to make a forward progress to exit easier. But iomap_file_buffered_write and other callers of iomap_apply loop to complete the full request. We need to check for fatal signals and back off with a short write instead. As the iomap_apply delegates all the work down to the actor we have to hook into those. All callers that work with the page cache are calling iomap_write_begin so we will check for signals there. dax_iomap_actor has to handle the situation explicitly because it copies data to the userspace directly. Other callers like iomap_page_mkwrite work on a single page or iomap_fiemap_actor do not allocate memory based on the given len. Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path") Link: http://lkml.kernel.org/r/20170201092706.9966-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 5 +++++ fs/iomap.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 3af2da5e64ce..c45598b912e1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1031,6 +1031,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct blk_dax_ctl dax = { 0 }; ssize_t map_len; + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + dax.sector = dax_iomap_sector(iomap, pos); dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; map_len = dax_map_atomic(iomap->bdev, &dax); diff --git a/fs/iomap.c b/fs/iomap.c index 354a123f170e..a51cb4c07d4d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -114,6 +114,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, BUG_ON(pos + len > iomap->offset + iomap->length); + if (fatal_signal_pending(current)) + return -EINTR; + page = grab_cache_page_write_begin(inode->i_mapping, index, flags); if (!page) return -ENOMEM; -- cgit From 5abf186a30a89d5b9c18a6bf93a2c192c9fd52f6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 3 Feb 2017 13:13:29 -0800 Subject: mm, fs: check for fatal signals in do_generic_file_read() do_generic_file_read() can be told to perform a large request from userspace. If the system is under OOM and the reading task is the OOM victim then it has an access to memory reserves and finishing the full request can lead to the full memory depletion which is dangerous. Make sure we rather go with a short read and allow the killed task to terminate. Link: http://lkml.kernel.org/r/20170201092706.9966-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Christoph Hellwig Cc: Tetsuo Handa Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index b772a33ef640..3f9afded581b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1791,6 +1791,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, cond_resched(); find_page: + if (fatal_signal_pending(current)) { + error = -EINTR; + goto out; + } + page = find_get_page(mapping, index); if (!page) { page_cache_sync_readahead(mapping, -- cgit From 6e978b22efa1db9f6e71b24440b5f1d93e968ee3 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 3 Feb 2017 14:18:39 -0800 Subject: cpufreq: intel_pstate: Disable energy efficiency optimization Some Kabylake desktop processors may not reach max turbo when running in HWP mode, even if running under sustained 100% utilization. This occurs when the HWP.EPP (Energy Performance Preference) is set to "balance_power" (0x80) -- the default on most systems. It occurs because the platform BIOS may erroneously enable an energy-efficiency setting -- MSR_IA32_POWER_CTL BIT-EE, which is not recommended to be enabled on this SKU. On the failing systems, this BIOS issue was not discovered when the desktop motherboard was tested with Windows, because the BIOS also neglects to provide the ACPI/CPPC table, that Windows requires to enable HWP, and so Windows runs in legacy P-state mode, where this setting has no effect. Linux' intel_pstate driver does not require ACPI/CPPC to enable HWP, and so it runs in HWP mode, exposing this incorrect BIOS configuration. There are several ways to address this problem. First, Linux can also run in legacy P-state mode on this system. As intel_pstate is how Linux enables HWP, booting with "intel_pstate=disable" will run in acpi-cpufreq/ondemand legacy p-state mode. Or second, the "performance" governor can be used with intel_pstate, which will modify HWP.EPP to 0. Or third, starting in 4.10, the /sys/devices/system/cpu/cpufreq/policy*/energy_performance_preference attribute in can be updated from "balance_power" to "performance". Or fourth, apply this patch, which fixes the erroneous setting of MSR_IA32_POWER_CTL BIT_EE on this model, allowing the default configuration to function as designed. Signed-off-by: Srinivas Pandruvada Reviewed-by: Len Brown Cc: 4.6+ # 4.6+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f91c25718d16..86e36544925f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1235,6 +1235,25 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); } +#define MSR_IA32_POWER_CTL_BIT_EE 19 + +/* Disable energy efficiency optimization */ +static void intel_pstate_disable_ee(int cpu) +{ + u64 power_ctl; + int ret; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); + if (ret) + return; + + if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { + pr_info("Disabling energy efficiency optimization\n"); + power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); + wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); + } +} + static int atom_get_min_pstate(void) { u64 value; @@ -1845,6 +1864,11 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { {} }; +static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { + ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params), + {} +}; + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1875,6 +1899,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->cpu = cpunum; if (hwp_active) { + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); + if (id) + intel_pstate_disable_ee(cpunum); + intel_pstate_hwp_enable(cpu); pid_params.sample_rate_ms = 50; pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; -- cgit From a9306a63631493afc75893a4ac405d4e1cbae6aa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Feb 2017 00:44:36 +0100 Subject: PM / runtime: Avoid false-positive warnings from might_sleep_if() The might_sleep_if() assertions in __pm_runtime_idle(), __pm_runtime_suspend() and __pm_runtime_resume() may generate false-positive warnings in some situations. For example, that happens if a nested pm_runtime_get_sync()/pm_runtime_put() pair is executed with disabled interrupts within an outer pm_runtime_get_sync()/pm_runtime_put() section for the same device. [Generally, pm_runtime_get_sync() may sleep, so it should not be called with disabled interrupts, but in this particular case the previous pm_runtime_get_sync() guarantees that the device will not be suspended, so the inner pm_runtime_get_sync() will return immediately after incrementing the device's usage counter.] That started to happen in the i915 driver in 4.10-rc, leading to the following splat: BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1032 in_atomic(): 1, irqs_disabled(): 0, pid: 1500, name: Xorg 1 lock held by Xorg/1500: #0: (&dev->struct_mutex){+.+.+.}, at: [] i915_mutex_lock_interruptible+0x43/0x140 [i915] CPU: 0 PID: 1500 Comm: Xorg Not tainted Call Trace: dump_stack+0x85/0xc2 ___might_sleep+0x196/0x260 __might_sleep+0x53/0xb0 __pm_runtime_resume+0x7a/0x90 intel_runtime_pm_get+0x25/0x90 [i915] aliasing_gtt_bind_vma+0xaa/0xf0 [i915] i915_vma_bind+0xaf/0x1e0 [i915] i915_gem_execbuffer_relocate_entry+0x513/0x6f0 [i915] i915_gem_execbuffer_relocate_vma.isra.34+0x188/0x250 [i915] ? trace_hardirqs_on+0xd/0x10 ? i915_gem_execbuffer_reserve_vma.isra.31+0x152/0x1f0 [i915] ? i915_gem_execbuffer_reserve.isra.32+0x372/0x3a0 [i915] i915_gem_do_execbuffer.isra.38+0xa70/0x1a40 [i915] ? __might_fault+0x4e/0xb0 i915_gem_execbuffer2+0xc5/0x260 [i915] ? __might_fault+0x4e/0xb0 drm_ioctl+0x206/0x450 [drm] ? i915_gem_execbuffer+0x340/0x340 [i915] ? __fget+0x5/0x200 do_vfs_ioctl+0x91/0x6f0 ? __fget+0x111/0x200 ? __fget+0x5/0x200 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc6 even though the code triggering it is correct. Unfortunately, the might_sleep_if() assertions in question are too coarse-grained to cover such cases correctly, so make them a bit less sensitive in order to avoid the false-positives. Reported-and-tested-by: Sedat Dilek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 872eac4cb1df..a14fac6a01d3 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -966,13 +966,13 @@ int __pm_runtime_idle(struct device *dev, int rpmflags) unsigned long flags; int retval; - might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); - if (rpmflags & RPM_GET_PUT) { if (!atomic_dec_and_test(&dev->power.usage_count)) return 0; } + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_idle(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); @@ -998,13 +998,13 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags) unsigned long flags; int retval; - might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); - if (rpmflags & RPM_GET_PUT) { if (!atomic_dec_and_test(&dev->power.usage_count)) return 0; } + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_suspend(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); @@ -1029,7 +1029,8 @@ int __pm_runtime_resume(struct device *dev, int rpmflags) unsigned long flags; int retval; - might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); + might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe && + dev->power.runtime_status != RPM_ACTIVE); if (rpmflags & RPM_GET_PUT) atomic_inc(&dev->power.usage_count); -- cgit From bfb34527a32a1a576d9bfb7026d3ab0369a6cd60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 4 Feb 2017 14:47:31 -0800 Subject: libnvdimm, pfn: fix memmap reservation size versus 4K alignment When vmemmap_populate() allocates space for the memmap it does so in 2MB sized chunks. The libnvdimm-pfn driver incorrectly accounts for this when the alignment of the device is set to 4K. When this happens we trigger memory allocation failures in altmap_alloc_block_buf() and trigger warnings of the form: WARNING: CPU: 0 PID: 3376 at arch/x86/mm/init_64.c:656 arch_add_memory+0xe4/0xf0 [..] Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_null+0x1d/0x20 arch_add_memory+0xe4/0xf0 devm_memremap_pages+0x29b/0x4e0 Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute, default to HPAGE_SIZE") Cc: Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index a2ac9e641aa9..6c033c9a2f06 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -627,15 +627,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) size = resource_size(&nsio->res); npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) { - unsigned long memmap_size; - /* * vmemmap_populate_hugepages() allocates the memmap array in * HPAGE_SIZE chunks. */ - memmap_size = ALIGN(64 * npfns, HPAGE_SIZE); - offset = ALIGN(start + SZ_8K + memmap_size + dax_label_reserve, - nd_pfn->align) - start; + offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, + max(nd_pfn->align, HPAGE_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K + dax_label_reserve, nd_pfn->align) - start; -- cgit From 34b2cef20f19c87999fff3da4071e66937db9644 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2017 11:16:52 -0800 Subject: ipv4: keep skb->dst around in presence of IP options Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst is accessed. ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options are present. We could refine the test to the presence of ts_needtime or srr, but IP options are not often used, so let's be conservative. Thanks to syzkaller team for finding this bug. Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 53ae0c6315ad..900011709e3b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1238,7 +1238,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, -- cgit From d71b7896886345c53ef1d84bda2bc758554f5d61 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 00:03:26 -0800 Subject: netlabel: out of bound access in cipso_v4_validate() syzkaller found another out of bound access in ip_options_compile(), or more exactly in cipso_v4_validate() Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is disabled") Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Paul Moore Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 4 ++++ net/ipv4/cipso_ipv4.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 3ebb168b9afc..a34b141f125f 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -309,6 +309,10 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, } for (opt_iter = 6; opt_iter < opt_len;) { + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto out; + } tag_len = opt[opt_iter + 1]; if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) { err_offset = opt_iter + 1; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 72d6f056d863..ae206163c273 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; -- cgit From 79a8b9aa388b0620cc1d525d7c0f0d9a8a85e08e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 5 Feb 2017 11:50:21 +0100 Subject: x86/CPU/AMD: Bring back Compute Unit ID Commit: a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology") restored the initial approach we had with the Fam15h topology of enumerating CU (Compute Unit) threads as cores. And this is still correct - they're beefier than HT threads but still have some shared functionality. Our current approach has a problem with the Mad Max Steam game, for example. Yves Dionne reported a certain "choppiness" while playing on v4.9.5. That problem stems most likely from the fact that the CU threads share resources within one CU and when we schedule to a thread of a different compute unit, this incurs latency due to migrating the working set to a different CU through the caches. When the thread siblings mask mirrors that aspect of the CUs and threads, the scheduler pays attention to it and tries to schedule within one CU first. Which takes care of the latency, of course. Reported-by: Yves Dionne Signed-off-by: Borislav Petkov Cc: # 4.9 Cc: Brice Goglin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20170205105022.8705-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/amd.c | 9 ++++++++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/smpboot.c | 12 +++++++++--- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1be64da0384e..e6cfe7ba2d65 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -104,6 +104,7 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; + __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1d3167269a67..20dc44d1e6be 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,8 +309,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + u32 eax, ebx, ecx, edx; - node_id = cpuid_ecx(0x8000001e) & 7; + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + + node_id = ecx & 0xff; + smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + + if (c->x86 == 0x15) + c->cu_id = ebx & 0xff; /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..ede03e849a8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->cu_id = 0xff; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 46732dc3b73c..99b920d0e516 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->cpu_core_id == o->cpu_core_id) - return topology_sane(c, o, "smt"); + per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { + if (c->cpu_core_id == o->cpu_core_id) + return topology_sane(c, o, "smt"); + + if ((c->cu_id != 0xff) && + (o->cu_id != 0xff) && + (c->cu_id == o->cu_id)) + return topology_sane(c, o, "smt"); + } } else if (c->phys_proc_id == o->phys_proc_id && c->cpu_core_id == o->cpu_core_id) { -- cgit From 08b259631b5a1d912af4832847b5642f377d9101 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Sun, 5 Feb 2017 11:50:22 +0100 Subject: x86/CPU/AMD: Fix Zen SMT topology After: a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology") our SMT scheduling topology for Fam17h systems is broken, because the ThreadId is included in the ApicId when SMT is enabled. So, without further decoding cpu_core_id is unique for each thread rather than the same for threads on the same core. This didn't affect systems with SMT disabled. Make cpu_core_id be what it is defined to be. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: # 4.9 Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170205105022.8705-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 20dc44d1e6be..2b4cf04239b6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -319,6 +319,13 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (c->x86 == 0x15) c->cu_id = ebx & 0xff; + if (c->x86 >= 0x17) { + c->cpu_core_id = ebx & 0xff; + + if (smp_num_siblings > 1) + c->x86_max_cores /= smp_num_siblings; + } + /* * We may have multiple LLCs if L3 caches exist, so check if we * have an L3 cache by looking at the L3 cache CPUID leaf. -- cgit From 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2017 23:18:55 -0800 Subject: ip6_gre: fix ip6gre_err() invalid reads Andrey Konovalov reported out of bound accesses in ip6gre_err() If GRE flags contains GRE_KEY, the following expression *(((__be32 *)p) + (grehlen / 4) - 1) accesses data ~40 bytes after the expected point, since grehlen includes the size of IPv6 headers. Let's use a "struct gre_base_hdr *greh" pointer to make this code more readable. p[1] becomes greh->protocol. grhlen is the GRE header length. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 558631860d91..630b73be5999 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -367,35 +367,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (!t) return; -- cgit From d5adbfcd5f7bcc6fa58a41c5c5ada0e5c826ce2c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Feb 2017 15:10:58 -0800 Subject: Linux 4.10-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7b758a1ce14..8e223e081c9d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From e479ab651f071dbd1518ce8fb121c7f42f2bb97d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 4 Feb 2017 13:59:22 +0200 Subject: mac80211: Fix FILS AEAD protection in Association Request frame Incorrect num_elem parameter value (1 vs. 5) was used in the aes_siv_encrypt() call. This resulted in only the first one of the five AAD vectors to SIV getting included in calculation. This does not protect all the contents correctly and would not interoperate with a standard compliant implementation. Fix this by using the correct number. A matching fix is needed in the AP side (hostapd) to get FILS authentication working properly. Fixes: 39404feee691 ("mac80211: FILS AEAD protection for station mode association frames") Reported-by: Ard Biesheuvel Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/fils_aead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c index ecfdd97758a3..e795aaa2aa1f 100644 --- a/net/mac80211/fils_aead.c +++ b/net/mac80211/fils_aead.c @@ -272,7 +272,7 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, crypt_len = skb->data + skb->len - encr; skb_put(skb, AES_BLOCK_SIZE); return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, - encr, crypt_len, 1, addr, len, encr); + encr, crypt_len, 5, addr, len, encr); } int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, -- cgit From 01fba20b5976e445676febbdf6dc78d71c6d7b62 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 4 Feb 2017 18:08:42 +0200 Subject: mac80211: Allocate a sync skcipher explicitly for FILS AEAD The skcipher could have been of the async variant which may return from skcipher_encrypt() with -EINPROGRESS after having queued the request. The FILS AEAD implementation here does not have code for dealing with that possibility, so allocate a sync cipher explicitly to avoid potential issues with hardware accelerators. This is based on the patch sent out by Ard. Fixes: 39404feee691 ("mac80211: FILS AEAD protection for station mode association frames") Reported-by: Ard Biesheuvel Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/fils_aead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c index e795aaa2aa1f..5c3af5eb4052 100644 --- a/net/mac80211/fils_aead.c +++ b/net/mac80211/fils_aead.c @@ -124,7 +124,7 @@ static int aes_siv_encrypt(const u8 *key, size_t key_len, /* CTR */ - tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm2)) { kfree(tmp); return PTR_ERR(tfm2); @@ -183,7 +183,7 @@ static int aes_siv_decrypt(const u8 *key, size_t key_len, /* CTR */ - tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm2)) return PTR_ERR(tfm2); /* K2 for CTR */ -- cgit From da7061c82e4a1bc6a5e134ef362c86261906c860 Mon Sep 17 00:00:00 2001 From: Thorsten Horstmann Date: Fri, 3 Feb 2017 14:38:29 +0100 Subject: mac80211: Fix adding of mesh vendor IEs The function ieee80211_ie_split_vendor doesn't return 0 on errors. Instead it returns any offset < ielen when WLAN_EID_VENDOR_SPECIFIC is found. The return value in mesh_add_vendor_ies must therefore be checked against ifmsh->ie_len and not 0. Otherwise all ifmsh->ie starting with WLAN_EID_VENDOR_SPECIFIC will be rejected. Fixes: 082ebb0c258d ("mac80211: fix mesh beacon format") Signed-off-by: Thorsten Horstmann Signed-off-by: Mathias Kretschmer Signed-off-by: Simon Wunderlich [sven@narfation.org: Add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 42120d965263..50e1b7f78bd4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -339,7 +339,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, /* fast-forward to vendor IEs */ offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); - if (offset) { + if (offset < ifmsh->ie_len) { len = ifmsh->ie_len - offset; data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) -- cgit From fd551bac4795854adaa87bad7e5136083719802b Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Thu, 26 Jan 2017 08:56:13 +0900 Subject: nl80211: Fix mesh HT operation check A previous change to fix checks for NL80211_MESHCONF_HT_OPMODE missed setting the flag when replacing FILL_IN_MESH_PARAM_IF_SET with checking codes. This results in dropping the received HT operation value when called by nl80211_update_mesh_config(). Fix this by setting the flag properly. Fixes: 9757235f451c ("nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value") Signed-off-by: Masashi Honma [rewrite commit message to use Fixes: line] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5c1b267e22be..aee396b9f190 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5916,6 +5916,7 @@ do { \ break; } cfg->ht_opmode = ht_opmode; + mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, 1, 65535, mask, -- cgit From 37a7ea4a9b81f6a864c10a7cb0b96458df5310a3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 6 Feb 2017 15:09:48 +0100 Subject: ALSA: seq: Don't handle loop timeout at snd_seq_pool_done() snd_seq_pool_done() syncs with closing of all opened threads, but it aborts the wait loop with a timeout, and proceeds to the release resource even if not all threads have been closed. The timeout was 5 seconds, and if you run a crazy stuff, it can exceed easily, and may result in the access of the invalid memory address -- this is what syzkaller detected in a bug report. As a fix, let the code graduate from naiveness, simply remove the loop timeout. BugLink: http://lkml.kernel.org/r/CACT4Y+YdhDV2H5LLzDTJDVF-qiYHUHhtRaW4rbb4gUhTCQB81w@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_memory.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index c850345c43b5..dfa5156f3585 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -419,7 +419,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool) { unsigned long flags; struct snd_seq_event_cell *ptr; - int max_count = 5 * HZ; if (snd_BUG_ON(!pool)) return -EINVAL; @@ -432,14 +431,8 @@ int snd_seq_pool_done(struct snd_seq_pool *pool) if (waitqueue_active(&pool->output_sleep)) wake_up(&pool->output_sleep); - while (atomic_read(&pool->counter) > 0) { - if (max_count == 0) { - pr_warn("ALSA: snd_seq_pool_done timeout: %d cells remain\n", atomic_read(&pool->counter)); - break; - } + while (atomic_read(&pool->counter) > 0) schedule_timeout_uninterruptible(1); - max_count--; - } /* release all resources */ spin_lock_irqsave(&pool->lock, flags); -- cgit From ebf6c9cb23d7e56eec8575a88071dec97ad5c6e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Feb 2017 20:23:22 -0800 Subject: ipv6: tcp: add a missing tcp_v6_restore_cb() Dmitry reported use-after-free in ip6_datagram_recv_specific_ctl() A similar bug was fixed in commit 8ce48623f0cf ("ipv6: tcp: restore IP6CB for pktoptions skbs"), but I missed another spot. tcp_v6_syn_recv_sock() can indeed set np->pktoptions from ireq->pktopts Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb8929681dc7..eaad72c3d746 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -991,6 +991,16 @@ drop: return 0; /* don't send reset */ } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, @@ -1182,8 +1192,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) + if (newnp->pktoptions) { + tcp_v6_restore_cb(newnp->pktoptions); skb_set_owner_r(newnp->pktoptions, newsk); + } } } @@ -1198,16 +1210,6 @@ out: return NULL; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * -- cgit From 08b3b33f3e4dc0016ad878c1a48f094a74956277 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 3 Feb 2017 09:29:07 -0600 Subject: MAINTAINERS: socfpga: update email for Dinh Nguyen My opensource.altera.com email will be going away soon. Signed-off-by: Dinh Nguyen Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index acd7c3d5410a..35c71bb41cfe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1773,7 +1773,7 @@ F: drivers/soc/renesas/ F: include/linux/soc/renesas/ ARM/SOCFPGA ARCHITECTURE -M: Dinh Nguyen +M: Dinh Nguyen S: Maintained F: arch/arm/mach-socfpga/ F: arch/arm/boot/dts/socfpga* @@ -1783,7 +1783,7 @@ W: http://www.rocketboards.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git ARM/SOCFPGA CLOCK FRAMEWORK SUPPORT -M: Dinh Nguyen +M: Dinh Nguyen S: Maintained F: drivers/clk/socfpga/ -- cgit From eeeefd41843218c55a8782a6920f044d9bf6207a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:10:35 +0100 Subject: block: don't try Write Same from __blkdev_issue_zeroout Write Same can return an error asynchronously if it turns out the underlying SCSI device does not support Write Same, which makes a proper fallback to other methods in __blkdev_issue_zeroout impossible. Thus only issue a Write Same from blkdev_issue_zeroout an don't try it at all from __blkdev_issue_zeroout as a non-invasive workaround. Signed-off-by: Christoph Hellwig Reported-by: Junichi Nomura Fixes: e73c23ff ("block: add async variant of blkdev_issue_zeroout") Tested-by: Junichi Nomura Signed-off-by: Jens Axboe --- block/blk-lib.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index f8c82a9b4012..ed1e78e24db0 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -306,11 +306,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, if (ret == 0 || (ret && ret != -EOPNOTSUPP)) goto out; - ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, - ZERO_PAGE(0), biop); - if (ret == 0 || (ret && ret != -EOPNOTSUPP)) - goto out; - ret = 0; while (nr_sects != 0) { bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES), @@ -369,6 +364,10 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, return 0; } + if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, + ZERO_PAGE(0))) + return 0; + blk_start_plug(&plug); ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, &bio, discard); -- cgit From 5aff1d245e8cc1ab5c4517d916edaed9e3f7f973 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 09:58:24 +0100 Subject: ARM: defconfigs: make NF_CT_PROTO_SCTP and NF_CT_PROTO_UDPLITE built-in The symbols can no longer be used as loadable modules, leading to a harmless Kconfig warning: arch/arm/configs/imote2_defconfig:60:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE arch/arm/configs/imote2_defconfig:59:warning: symbol value 'm' invalid for NF_CT_PROTO_SCTP arch/arm/configs/ezx_defconfig:68:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE arch/arm/configs/ezx_defconfig:67:warning: symbol value 'm' invalid for NF_CT_PROTO_SCTP Let's make them built-in. Signed-off-by: Arnd Bergmann --- arch/arm/configs/ezx_defconfig | 4 ++-- arch/arm/configs/imote2_defconfig | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index ea316c4b890e..d3f1768840e2 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -64,8 +64,8 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index 18e59feaa307..7f479cdb3479 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig @@ -56,8 +56,8 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_H323=m -- cgit From a088d1d73a4bcfd7bc482f8d08375b9b665dc3e5 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Fri, 3 Feb 2017 08:11:03 +0100 Subject: ipv6: Fix IPv6 packet loss in scenarios involving roaming + snooping switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When for instance a mobile Linux device roams from one access point to another with both APs sharing the same broadcast domain and a multicast snooping switch in between: 1) (c) <~~~> (AP1) <--[SSW]--> (AP2) 2) (AP1) <--[SSW]--> (AP2) <~~~> (c) Then currently IPv6 multicast packets will get lost for (c) until an MLD Querier sends its next query message. The packet loss occurs because upon roaming the Linux host so far stayed silent regarding MLD and the snooping switch will therefore be unaware of the multicast topology change for a while. This patch fixes this by always resending MLD reports when an interface change happens, for instance from NO-CARRIER to CARRIER state. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f60e88e56255..81f7b4ea4281 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3386,9 +3386,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } if (idev) { - if (idev->if_flags & IF_READY) - /* device is already configured. */ + if (idev->if_flags & IF_READY) { + /* device is already configured - + * but resend MLD reports, we might + * have roamed and need to update + * multicast snooping switches + */ + ipv6_mc_up(idev); break; + } idev->if_flags |= IF_READY; } -- cgit From b3f2d07f4649adcf6905953a10d217b5683e4077 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 17:35:46 +0100 Subject: hns: avoid stack overflow with CONFIG_KASAN The use of ACCESS_ONCE() looks like a micro-optimization to force gcc to use an indexed load for the register address, but it has an absolutely detrimental effect on builds with gcc-5 and CONFIG_KASAN=y, leading to a very likely kernel stack overflow aside from very complex object code: hisilicon/hns/hns_dsaf_gmac.c: In function 'hns_gmac_update_stats': hisilicon/hns/hns_dsaf_gmac.c:419:1: error: the frame size of 2912 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_ppe.c: In function 'hns_ppe_reset_common': hisilicon/hns/hns_dsaf_ppe.c:390:1: error: the frame size of 1184 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_ppe.c: In function 'hns_ppe_get_regs': hisilicon/hns/hns_dsaf_ppe.c:621:1: error: the frame size of 3632 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_rcb.c: In function 'hns_rcb_get_common_regs': hisilicon/hns/hns_dsaf_rcb.c:970:1: error: the frame size of 2784 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_gmac.c: In function 'hns_gmac_get_regs': hisilicon/hns/hns_dsaf_gmac.c:641:1: error: the frame size of 5728 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_rcb.c: In function 'hns_rcb_get_ring_regs': hisilicon/hns/hns_dsaf_rcb.c:1021:1: error: the frame size of 2208 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_comm_init': hisilicon/hns/hns_dsaf_main.c:1209:1: error: the frame size of 1904 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_xgmac.c: In function 'hns_xgmac_get_regs': hisilicon/hns/hns_dsaf_xgmac.c:748:1: error: the frame size of 4704 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_update_stats': hisilicon/hns/hns_dsaf_main.c:2420:1: error: the frame size of 1088 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_regs': hisilicon/hns/hns_dsaf_main.c:2753:1: error: the frame size of 10768 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] This does not seem to happen any more with gcc-7, but removing the ACCESS_ONCE seems safe anyway and it avoids a serious issue for some people. I have verified that with gcc-5.3.1, the object code we get is better in the new version both with and without CONFIG_KASAN, as we no longer allocate a 1344 byte stack frame for hns_dsaf_get_regs() but otherwise have practically identical object code. With gcc-7.0.0, removing ACCESS_ONCE has no effect, the object code is already good either way. This patch is probably not urgent to get into 4.11 as only KASAN=y builds with certain compilers are affected, but I still think it makes sense to backport into older kernels. Cc: stable@vger.kernel.org Fixes: 511e6bc ("net: add Hisilicon Network Subsystem DSAF support") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 87226685f742..8fa18fc17cd2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -1014,9 +1014,7 @@ static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(base); - - writel(value, reg_addr + reg); + writel(value, base + reg); } #define dsaf_write_dev(a, reg, value) \ @@ -1024,9 +1022,7 @@ static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value) static inline u32 dsaf_read_reg(u8 __iomem *base, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(base); - - return readl(reg_addr + reg); + return readl(base + reg); } static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value) -- cgit From ccf7abb93af09ad0868ae9033d1ca8108bdaec82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Feb 2017 14:59:38 -0800 Subject: tcp: avoid infinite loop in tcp_splice_read() Splicing from TCP socket is vulnerable when a packet with URG flag is received and stored into receive queue. __tcp_splice_read() returns 0, and sk_wait_data() immediately returns since there is the problematic skb in queue. This is a nice way to burn cpu (aka infinite loop) and trigger soft lockups. Again, this gem was found by syzkaller tool. Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Willy Tarreau Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4a044964da66..0efb4c7f6704 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -770,6 +770,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); -- cgit From e1edab87faf6ca30cd137e0795bc73aa9a9a22ec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 3 Feb 2017 18:20:48 -0500 Subject: tun: read vnet_hdr_sz once When IFF_VNET_HDR is enabled, a virtio_net header must precede data. Data length is verified to be greater than or equal to expected header length tun->vnet_hdr_sz before copying. Read this value once and cache locally, as it can be updated between the test and use (TOCTOU). Signed-off-by: Willem de Bruijn Reported-by: Dmitry Vyukov CC: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2cd10b26b650..bfabe180053e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1170,9 +1170,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (tun->flags & IFF_VNET_HDR) { - if (len < tun->vnet_hdr_sz) + int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + + if (len < vnet_hdr_sz) return -EINVAL; - len -= tun->vnet_hdr_sz; + len -= vnet_hdr_sz; if (!copy_from_iter_full(&gso, sizeof(gso), from)) return -EFAULT; @@ -1183,7 +1185,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun16_to_cpu(tun, gso.hdr_len) > len) return -EINVAL; - iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso)); + iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { @@ -1335,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, vlan_hlen = VLAN_HLEN; if (tun->flags & IFF_VNET_HDR) - vnet_hdr_sz = tun->vnet_hdr_sz; + vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); total = skb->len + vlan_hlen + vnet_hdr_sz; -- cgit From 837585a5375c38d40361cfe64e6fd11e1addb936 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 3 Feb 2017 18:20:49 -0500 Subject: macvtap: read vnet_hdr_size once When IFF_VNET_HDR is enabled, a virtio_net header must precede data. Data length is verified to be greater than or equal to expected header length tun->vnet_hdr_sz before copying. Macvtap functions read the value once, but unless READ_ONCE is used, the compiler may ignore this and read multiple times. Enforce a single read and locally cached value to avoid updates between test and use. Signed-off-by: Willem de Bruijn Suggested-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 402618565838..c27011bbe30c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -681,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, size_t linear; if (q->flags & IFF_VNET_HDR) { - vnet_hdr_len = q->vnet_hdr_sz; + vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); err = -EINVAL; if (len < vnet_hdr_len) @@ -820,7 +820,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, if (q->flags & IFF_VNET_HDR) { struct virtio_net_hdr vnet_hdr; - vnet_hdr_len = q->vnet_hdr_sz; + vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); if (iov_iter_count(iter) < vnet_hdr_len) return -EINVAL; -- cgit From f3d83317a69e7d658e7c83e24f8b31ac533c39e3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 7 Feb 2017 09:32:30 +0100 Subject: Revert "ALSA: line6: Only determine control port properties if needed" This reverts commit f6a0dd107ad0c8b59d1c9735eea4b8cb9f460949. The commit caused a regression on LINE6 Transport that has no control caps. Although reverting the commit may result back in a spurious error message for some device again, it's the simplest regression fix, hence it's taken as is at first. The further code fix will follow later. Fixes: f6a0dd107ad0 ("ALSA: line6: Only determine control port properties if needed") Reported-by: Igor Zinovev Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 90009c0b3a92..ab3c280a23d1 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -754,8 +754,9 @@ int line6_probe(struct usb_interface *interface, goto error; } + line6_get_interval(line6); + if (properties->capabilities & LINE6_CAP_CONTROL) { - line6_get_interval(line6); ret = line6_init_cap_control(line6); if (ret < 0) goto error; -- cgit From 5593523f968bc86d42a035c6df47d5e0979b5ace Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:03 +0000 Subject: pegasus: Use heap buffers for all register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") References: https://bugs.debian.org/852556 Reported-by: Lisandro Damián Nicanor Pérez Meyer Tested-by: Lisandro Damián Nicanor Pérez Meyer Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 24e803fe9a53..36674484c6fb 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -126,40 +126,61 @@ static void async_ctrl_callback(struct urb *urb) static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) { + u8 *buf; int ret; + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_rcvctrlpipe(pegasus->usb, 0), PEGASUS_REQ_GET_REGS, PEGASUS_REQT_READ, 0, - indx, data, size, 1000); + indx, buf, size, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + else if (ret <= size) + memcpy(data, buf, ret); + kfree(buf); return ret; } -static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) +static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, + const void *data) { + u8 *buf; int ret; + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, - indx, data, size, 100); + indx, buf, size, 100); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { + u8 *buf; int ret; + buf = kmemdup(&data, 1, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, - indx, &data, 1, 1000); + indx, buf, 1, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } -- cgit From 7926aff5c57b577ab0f43364ff0c59d968f6a414 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:32 +0000 Subject: rtl8150: Use heap buffers for all register access Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/usb/rtl8150.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 95b7bd0d7abc..c81c79110cef 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -155,16 +155,36 @@ static const char driver_name [] = "rtl8150"; */ static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) { - return usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), - RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), + RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, + indx, 0, buf, size, 500); + if (ret > 0 && ret <= size) + memcpy(data, buf, ret); + kfree(buf); + return ret; } -static int set_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) +static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data) { - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), + RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, + indx, 0, buf, size, 500); + kfree(buf); + return ret; } static void async_set_reg_cb(struct urb *urb) -- cgit From d41149145f98fe26dcd0bfd1d6cc095e6e041418 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:56 +0000 Subject: catc: Combine failure cleanup code in catc_probe() Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 3daa41bdd4ea..985909eab72c 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id struct net_device *netdev; struct catc *catc; u8 broadcast[ETH_ALEN]; - int i, pktsz; + int i, pktsz, ret; if (usb_set_interface(usbdev, intf->altsetting->desc.bInterfaceNumber, 1)) { @@ -811,12 +811,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id if ((!catc->ctrl_urb) || (!catc->tx_urb) || (!catc->rx_urb) || (!catc->irq_urb)) { dev_err(&intf->dev, "No free urbs available.\n"); - usb_free_urb(catc->ctrl_urb); - usb_free_urb(catc->tx_urb); - usb_free_urb(catc->rx_urb); - usb_free_urb(catc->irq_urb); - free_netdev(netdev); - return -ENOMEM; + ret = -ENOMEM; + goto fail_free; } /* The F5U011 has the same vendor/product as the netmate but a device version of 0x130 */ @@ -913,16 +909,21 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id usb_set_intfdata(intf, catc); SET_NETDEV_DEV(netdev, &intf->dev); - if (register_netdev(netdev) != 0) { - usb_set_intfdata(intf, NULL); - usb_free_urb(catc->ctrl_urb); - usb_free_urb(catc->tx_urb); - usb_free_urb(catc->rx_urb); - usb_free_urb(catc->irq_urb); - free_netdev(netdev); - return -EIO; - } + ret = register_netdev(netdev); + if (ret) + goto fail_clear_intfdata; + return 0; + +fail_clear_intfdata: + usb_set_intfdata(intf, NULL); +fail_free: + usb_free_urb(catc->ctrl_urb); + usb_free_urb(catc->tx_urb); + usb_free_urb(catc->rx_urb); + usb_free_urb(catc->irq_urb); + free_netdev(netdev); + return ret; } static void catc_disconnect(struct usb_interface *intf) -- cgit From 2d6a0e9de03ee658a9adc3bfb2f0ca55dff1e478 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:57:04 +0000 Subject: catc: Use heap buffer for memory size test Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 985909eab72c..0acc9b640419 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id struct net_device *netdev; struct catc *catc; u8 broadcast[ETH_ALEN]; - int i, pktsz, ret; + int pktsz, ret; if (usb_set_interface(usbdev, intf->altsetting->desc.bInterfaceNumber, 1)) { @@ -840,15 +840,24 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id catc->irq_buf, 2, catc_irq_done, catc, 1); if (!catc->is_f5u011) { + u32 *buf; + int i; + dev_dbg(dev, "Checking memory size\n"); - i = 0x12345678; - catc_write_mem(catc, 0x7a80, &i, 4); - i = 0x87654321; - catc_write_mem(catc, 0xfa80, &i, 4); - catc_read_mem(catc, 0x7a80, &i, 4); + buf = kmalloc(4, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto fail_free; + } + + *buf = 0x12345678; + catc_write_mem(catc, 0x7a80, buf, 4); + *buf = 0x87654321; + catc_write_mem(catc, 0xfa80, buf, 4); + catc_read_mem(catc, 0x7a80, buf, 4); - switch (i) { + switch (*buf) { case 0x12345678: catc_set_reg(catc, TxBufCount, 8); catc_set_reg(catc, RxBufCount, 32); @@ -863,6 +872,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id dev_dbg(dev, "32k Memory\n"); break; } + + kfree(buf); dev_dbg(dev, "Getting MAC from SEEROM.\n"); -- cgit From 69629464e0b587f3711739b3aa2bcdaf2e075276 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Feb 2017 09:25:24 -0800 Subject: udp: properly cope with csum errors Dmitry reported that UDP sockets being destroyed would trigger the WARN_ON(atomic_read(&sk->sk_rmem_alloc)); in inet_sock_destruct() It turns out we do not properly destroy skb(s) that have wrong UDP checksum. Thanks again to syzkaller team. Fixes : 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue") Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Hannes Frederic Sowa Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/datagram.c | 8 ++++++-- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f0e867f58722..c4f5e6fca17c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2006,7 +2006,9 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags); + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/core/datagram.c b/net/core/datagram.c index 662bea587165..ea633342ab0d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -332,7 +332,9 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) EXPORT_SYMBOL(__skb_free_datagram_locked); int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags) + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb)) { int err = 0; @@ -342,6 +344,8 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, if (skb == skb_peek(&sk->sk_receive_queue)) { __skb_unlink(skb, &sk->sk_receive_queue); atomic_dec(&skb->users); + if (destructor) + destructor(sk, skb); err = 0; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -375,7 +379,7 @@ EXPORT_SYMBOL(__sk_queue_drop_skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = __sk_queue_drop_skb(sk, skb, flags); + int err = __sk_queue_drop_skb(sk, skb, flags, NULL); kfree_skb(skb); sk_mem_reclaim_partial(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1307a7c2e544..8aab7d78d25b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1501,7 +1501,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4d5c4eee4b3f..8990856f5101 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -441,7 +441,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); -- cgit From bd4ce941c8d5b862b2f83364be5dbe8fc8ab48f8 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 6 Feb 2017 10:14:31 -0800 Subject: mlx4: Invoke softirqs after napi_reschedule mlx4 may schedule napi from a workqueue. Afterwards, softirqs are not run in a deterministic time frame and the following message may be logged: NOHZ: local_softirq_pending 08 The problem is the same as what was described in commit ec13ee80145c ("virtio_net: invoke softirqs after __napi_schedule") and this patch applies the same fix to mlx4. Fixes: 07841f9d94c1 ("net/mlx4_en: Schedule napi when RX buffers allocation fails") Cc: Eric Dumazet Signed-off-by: Benjamin Poirier Acked-by: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index eac527e25ec9..cc003fdf0ed9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -514,8 +514,11 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { - if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) + if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { + local_bh_disable(); napi_reschedule(&priv->rx_cq[ring]->napi); + local_bh_enable(); + } } } -- cgit From 2dcab598484185dea7ec22219c76dcdd59e3cb90 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 6 Feb 2017 18:10:31 -0200 Subject: sctp: avoid BUG_ON on sctp_wait_for_sndbuf Alexander Popov reported that an application may trigger a BUG_ON in sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is waiting on it to queue more data and meanwhile another thread peels off the association being used by the first thread. This patch replaces the BUG_ON call with a proper error handling. It will return -EPIPE to the original sendmsg call, similarly to what would have been done if the association wasn't found in the first place. Acked-by: Alexander Popov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 37eeab7899fc..e214d2e7e9a3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7426,7 +7426,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; lock_sock(sk); *timeo_p = current_timeo; -- cgit From a524c218bc94c705886a0e0fedeee45d1931da32 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 7 Feb 2017 09:44:58 -0800 Subject: ARC: [arcompact] brown paper bag bug in unaligned access delay slot fixup Reported-by: Jo-Philipp Wich Fixes: 9aed02feae57bf7 ("ARC: [arcompact] handle unaligned access delay slot") Cc: linux-kernel@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds --- arch/arc/kernel/unaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 91ebe382147f..5f69c3bd59bb 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -243,7 +243,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, /* clear any remanants of delay slot */ if (delay_mode(regs)) { - regs->ret = regs->bta ~1U; + regs->ret = regs->bta & ~1U; regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; -- cgit From 930a42ded3fede7ca3acafc9153f4f2d0f56a92c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 7 Feb 2017 17:26:57 +1100 Subject: vfio/spapr_tce: Set window when adding additional groups to container If a container already has a group attached, attaching a new group should just program already created IOMMU tables to the hardware via the iommu_table_group_ops::set_window() callback. However commit 6f01cc692a16 ("vfio/spapr: Add a helper to create default DMA window") did not just simplify the code but also removed the set_window() calls in the case of attaching groups to a container which already has tables so it broke VFIO PCI hotplug. This reverts set_window() bits in tce_iommu_take_ownership_ddw(). Fixes: 6f01cc692a16 ("vfio/spapr: Add a helper to create default DMA window") Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 7690e5bf3cf1..59b3f62a2d64 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1245,6 +1245,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { + long i, ret = 0; + if (!table_group->ops->create_table || !table_group->ops->set_window || !table_group->ops->release_ownership) { WARN_ON_ONCE(1); @@ -1253,7 +1255,27 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, table_group->ops->take_ownership(table_group); + /* Set all windows to the new group */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (!tbl) + continue; + + ret = table_group->ops->set_window(table_group, i, tbl); + if (ret) + goto release_exit; + } + return 0; + +release_exit: + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + table_group->ops->unset_window(table_group, i); + + table_group->ops->release_ownership(table_group); + + return ret; } static int tce_iommu_attach_group(void *iommu_data, -- cgit From 912964eacb111551db73429719eb5fadcab0ff8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Feb 2017 20:56:08 +0800 Subject: sctp: check af before verify address in sctp_addr_id2transport Commit 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc") invoked sctp_verify_addr to verify the addr. But it didn't check af variable beforehand, once users pass an address with family = 0 through sockopt, sctp_get_af_specific will return NULL and NULL pointer dereference will be caused by af->sockaddr_len. This patch is to fix it by returning NULL if af variable is NULL. Fixes: 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e214d2e7e9a3..1b5d669e3029 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -239,7 +239,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, union sctp_addr *laddr = (union sctp_addr *)addr; struct sctp_transport *transport; - if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) + if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len)) return NULL; addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, -- cgit From b6789123bccba8b5feb9901ed2e8c3c39181979d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 7 Feb 2017 11:11:16 -0800 Subject: mm: fix KPF_SWAPCACHE in /proc/kpageflags Commit 6326fec1122c ("mm: Use owner_priv bit for PageSwapCache, valid when PageSwapBacked") aliased PG_swapcache to PG_owner_priv_1 (and depending on PageSwapBacked being true). As a result, the KPF_SWAPCACHE bit in '/proc/kpageflags' should now be synthesized, instead of being shown on unrelated pages which just happen to have PG_owner_priv_1 set. Signed-off-by: Hugh Dickins Cc: Andrew Morton Cc: Nicholas Piggin Cc: Wu Fengguang Signed-off-by: Linus Torvalds --- fs/proc/page.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index a2066e6dee90..2726536489b1 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -173,7 +173,8 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); - u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); + if (PageSwapCache(page)) + u |= 1 << KPF_SWAPCACHE; u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); -- cgit From 413d37326700aaf708730b940b04192c36e13ef4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Feb 2017 09:59:21 -0800 Subject: Input: synaptics-rmi4 - select 'SERIO' when needed With CONFIG_SERIO=m, we get a build error for the rmi4-f03 driver, added in linux-4.10: warning: (HID_RMI) selects RMI4_F03 which has unmet direct dependencies (!UML && INPUT && RMI4_CORE && (SERIO=y || RMI4_CORE=SERIO)) drivers/input/built-in.o: In function `rmi_f03_attention': rmi_f03.c:(.text+0xcfe0): undefined reference to `serio_interrupt' rmi_f03.c:(.text+0xd055): undefined reference to `serio_interrupt' drivers/input/built-in.o: In function `rmi_f03_remove': rmi_f03.c:(.text+0xd115): undefined reference to `serio_unregister_port' drivers/input/built-in.o: In function `rmi_f03_probe': rmi_f03.c:(.text+0xd209): undefined reference to `__serio_register_port' An earlier patch tried to fix this, but missed the HID_RMI driver that does a 'select' on the F03 backend. This adds a hidden Kconfig symbol that enforces 'serio' to be enabled when RMI4-F03 is, which covers all cases. Fixes: d7ddad0acc4a ("Input: synaptics-rmi4 - fix F03 build error when serio is module") Fixes: c5e8848fc98e ("Input: synaptics-rmi4 - add support for F03") Signed-off-by: Arnd Bergmann Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/Kconfig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig index 8993983e3fe4..bb7762bf2879 100644 --- a/drivers/input/rmi4/Kconfig +++ b/drivers/input/rmi4/Kconfig @@ -42,13 +42,19 @@ config RMI4_SMB config RMI4_F03 bool "RMI4 Function 03 (PS2 Guest)" depends on RMI4_CORE - depends on SERIO=y || RMI4_CORE=SERIO help Say Y here if you want to add support for RMI4 function 03. Function 03 provides PS2 guest support for RMI4 devices. This includes support for TrackPoints on TouchPads. +config RMI4_F03_SERIO + tristate + depends on RMI4_CORE + depends on RMI4_F03 + default RMI4_CORE + select SERIO + config RMI4_2D_SENSOR bool depends on RMI4_CORE -- cgit From bfeda41d06d85ad9d52f2413cfc2b77be5022f75 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 7 Feb 2017 15:33:20 -0800 Subject: stacktrace, lockdep: Fix address, newline ugliness Since KERN_CONT became meaningful again, lockdep stack traces have had annoying extra newlines, like this: [ 5.561122] -> #1 (B){+.+...}: [ 5.561528] [ 5.561532] [] lock_acquire+0xc3/0x210 [ 5.562178] [ 5.562181] [] mutex_lock_nested+0x74/0x6d0 [ 5.562861] [ 5.562880] [] init_btrfs_fs+0x21/0x196 [btrfs] [ 5.563717] [ 5.563721] [] do_one_initcall+0x52/0x1b0 [ 5.564554] [ 5.564559] [] do_init_module+0x5f/0x209 [ 5.565357] [ 5.565361] [] load_module+0x218d/0x2b80 [ 5.566020] [ 5.566021] [] SyS_finit_module+0xeb/0x120 [ 5.566694] [ 5.566696] [] entry_SYSCALL_64_fastpath+0x1f/0xc2 That's happening because each printk() call now gets printed on its own line, and we do a separate call to print the spaces before the symbol. Fix it by doing the printk() directly instead of using the print_ip_sym() helper. Additionally, the symbol address isn't very helpful, so let's get rid of that, too. The final result looks like this: [ 5.194518] -> #1 (B){+.+...}: [ 5.195002] lock_acquire+0xc3/0x210 [ 5.195439] mutex_lock_nested+0x74/0x6d0 [ 5.196491] do_one_initcall+0x52/0x1b0 [ 5.196939] do_init_module+0x5f/0x209 [ 5.197355] load_module+0x218d/0x2b80 [ 5.197792] SyS_finit_module+0xeb/0x120 [ 5.198251] entry_SYSCALL_64_fastpath+0x1f/0xc2 Suggested-by: Linus Torvalds Signed-off-by: Omar Sandoval Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Link: http://lkml.kernel.org/r/43b4e114724b2bdb0308fa86cb33aa07d3d67fad.1486510315.git.osandov@fb.com Signed-off-by: Ingo Molnar --- kernel/stacktrace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index b6e4c16377c7..9c15a9124e83 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -18,10 +18,8 @@ void print_stack_trace(struct stack_trace *trace, int spaces) if (WARN_ON(!trace->entries)) return; - for (i = 0; i < trace->nr_entries; i++) { - printk("%*c", 1 + spaces, ' '); - print_ip_sym(trace->entries[i]); - } + for (i = 0; i < trace->nr_entries; i++) + printk("%*c%pS\n", 1 + spaces, ' ', (void *)trace->entries[i]); } EXPORT_SYMBOL_GPL(print_stack_trace); @@ -29,7 +27,6 @@ int snprint_stack_trace(char *buf, size_t size, struct stack_trace *trace, int spaces) { int i; - unsigned long ip; int generated; int total = 0; @@ -37,9 +34,8 @@ int snprint_stack_trace(char *buf, size_t size, return 0; for (i = 0; i < trace->nr_entries; i++) { - ip = trace->entries[i]; - generated = snprintf(buf, size, "%*c[<%p>] %pS\n", - 1 + spaces, ' ', (void *) ip, (void *) ip); + generated = snprintf(buf, size, "%*c%pS\n", 1 + spaces, ' ', + (void *)trace->entries[i]); total += generated; -- cgit From 0c461cb727d146c9ef2d3e86214f498b78b7d125 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 31 Jan 2017 11:54:04 -0500 Subject: selinux: fix off-by-one in setprocattr SELinux tries to support setting/clearing of /proc/pid/attr attributes from the shell by ignoring terminating newlines and treating an attribute value that begins with a NUL or newline as an attempt to clear the attribute. However, the test for clearing attributes has always been wrong; it has an off-by-one error, and this could further lead to reading past the end of the allocated buffer since commit bb646cdb12e75d82258c2f2e7746d5952d3e321a ("proc_pid_attr_write(): switch to memdup_user()"). Fix the off-by-one error. Even with this fix, setting and clearing /proc/pid/attr attributes from the shell is not straightforward since the interface does not support multiple write() calls (so shells that write the value and newline separately will set and then immediately clear the attribute, requiring use of echo -n to set the attribute), whereas trying to use echo -n "" to clear the attribute causes the shell to skip the write() call altogether since POSIX says that a zero-length write causes no side effects. Thus, one must use echo -n to set and echo without -n to clear, as in the following example: $ echo -n unconfined_u:object_r:user_home_t:s0 > /proc/$$/attr/fscreate $ cat /proc/$$/attr/fscreate unconfined_u:object_r:user_home_t:s0 $ echo "" > /proc/$$/attr/fscreate $ cat /proc/$$/attr/fscreate Note the use of /proc/$$ rather than /proc/self, as otherwise the cat command will read its own attribute value, not that of the shell. There are no users of this facility to my knowledge; possibly we should just get rid of it. UPDATE: Upon further investigation it appears that a local process with the process:setfscreate permission can cause a kernel panic as a result of this bug. This patch fixes CVE-2017-2618. Signed-off-by: Stephen Smalley [PM: added the update about CVE-2017-2618 to the commit description] Cc: stable@vger.kernel.org # 3.5: d6ea83ec6864e Signed-off-by: Paul Moore Signed-off-by: James Morris --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c7c6619431d5..d98550abe16d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5887,7 +5887,7 @@ static int selinux_setprocattr(struct task_struct *p, return error; /* Obtain a SID for the context, if one was specified. */ - if (size && str[1] && str[1] != '\n') { + if (size && str[0] && str[0] != '\n') { if (str[size-1] == '\n') { str[size-1] = 0; size--; -- cgit From 5351fbb1bf1413f6024892093528280769ca852f Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 26 Jan 2017 17:32:11 +0300 Subject: drm/i915: fix use-after-free in page_flip_completed() page_flip_completed() dereferences 'work' variable after executing queue_work(). This is not safe as the 'work' item might be already freed by queued work: BUG: KASAN: use-after-free in page_flip_completed+0x3ff/0x490 at addr ffff8803dc010f90 Call Trace: __asan_report_load8_noabort+0x59/0x80 page_flip_completed+0x3ff/0x490 intel_finish_page_flip_mmio+0xe3/0x130 intel_pipe_handle_vblank+0x2d/0x40 gen8_irq_handler+0x4a7/0xed0 __handle_irq_event_percpu+0xf6/0x860 handle_irq_event_percpu+0x6b/0x160 handle_irq_event+0xc7/0x1b0 handle_edge_irq+0x1f4/0xa50 handle_irq+0x41/0x70 do_IRQ+0x9a/0x200 common_interrupt+0x89/0x89 Freed: kfree+0x113/0x4d0 intel_unpin_work_fn+0x29a/0x3b0 process_one_work+0x79e/0x1b70 worker_thread+0x611/0x1460 kthread+0x241/0x3a0 ret_from_fork+0x27/0x40 Move queue_work() after trace_i915_flip_complete() to fix this. Fixes: e5510fac98a7 ("drm/i915: add tracepoints for flip requests & completions") Signed-off-by: Andrey Ryabinin Cc: # v2.6.36+ Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170126143211.24013-1-aryabinin@virtuozzo.com (cherry picked from commit 05c41f926fcc7ef838c80a6a99d84f67b4e0b824) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f1e4a21d4664..891c86aef99d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4262,10 +4262,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc) drm_crtc_vblank_put(&intel_crtc->base); wake_up_all(&dev_priv->pending_flip_queue); - queue_work(dev_priv->wq, &work->unpin_work); - trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); + + queue_work(dev_priv->wq, &work->unpin_work); } static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) -- cgit From e3818697e1d9140d0b990fecf4429d40c41ca0b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 11:19:32 +0000 Subject: drm/i915: Flush untouched framebuffers before display on !llc On a non-llc system, the objects are created with .cache_level = CACHE_NONE and so the transition to uncached for scanout is a no-op. However, if the object was never written to, it will still be in the CPU domain (having been zeroed out by shmemfs). Those cachelines need to be flushed prior to display. Reported-and-tested-by: Vito Caputo Fixes: a6a7cc4b7db6 ("drm/i915: Always flush the dirty CPU cache when pinning the scanout") Signed-off-by: Chris Wilson Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170109111932.6342-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit 69aeafeae9b30d797c439a30d1a4ccc8dc5b0eb0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4b23a7814713..b3f1134fb9c8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3478,7 +3478,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty) { + if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj, true); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } -- cgit From 7152187159193056f30ad5726741bb25028672bf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Feb 2017 10:47:11 +0100 Subject: drm/i915: fix i915 running as dom0 under Xen Commit 920cf4194954ec ("drm/i915: Introduce an internal allocator for disposable private objects") introduced a regression for the kernel running as Xen dom0: when switching to graphics mode a GPU HANG occurred. Reason seems to be a missing adaption similar to that done in commit 7453c549f5f648 ("swiotlb: Export swiotlb_max_segment to users") to i915_gem_object_get_pages_internal(). So limit the maximum page order to be used according to the maximum swiotlb segment size instead to the complete swiotlb size. Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects") Signed-off-by: Juergen Gross Link: http://patchwork.freedesktop.org/patch/msgid/20170202094711.939-1-jgross@suse.com Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10-rc1+ Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson (cherry picked from commit 5584f1b1d73e9cc95092734c316e467c6c4468f9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_internal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 4b3ff3e5b911..d09c74973cb3 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -66,8 +66,16 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) max_order = MAX_ORDER; #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */ - max_order = min(max_order, ilog2(IO_TLB_SEGPAGES)); + if (swiotlb_nr_tbl()) { + unsigned int max_segment; + + max_segment = swiotlb_max_segment(); + if (max_segment) { + max_segment = max_t(unsigned int, max_segment, + PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); + } + } #endif gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; -- cgit From 853277481178fdf14d1a4e9e6ac7174d6046176f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 1 Feb 2017 15:46:09 +0200 Subject: drm/i915: don't warn about Skylake CPU - KabyPoint PCH combo Apparently there are machines out there with Skylake CPU and KabyPoint PCH. Judging from our driver code, there doesn't seem to be any code paths that would do anything different between SunrisePoint and KabyPoint PCHs, so it would seem okay to accept the combo without warnings. Fixes: 22dea0be50b2 ("drm/i915: Introduce Kabypoint PCH for Kabylake H/DT.") References: https://lists.freedesktop.org/archives/intel-gfx/2017-February/118611.html Reported-by: Rainer Koenig Cc: Rainer Koenig Cc: Rodrigo Vivi Cc: # v4.8+ Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485956769-26015-1-git-send-email-jani.nikula@intel.com (cherry picked from commit 3aac4acb89710fe782c9e78e7b1febf76e112c6c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b2c4a0b8a627..8d6309865fa5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -213,7 +213,8 @@ static void intel_detect_pch(struct drm_device *dev) } else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_KBP; DRM_DEBUG_KMS("Found KabyPoint PCH\n"); - WARN_ON(!IS_KABYLAKE(dev_priv)); + WARN_ON(!IS_SKYLAKE(dev_priv) && + !IS_KABYLAKE(dev_priv)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && -- cgit From 5cad24d835772f9f709971a8d6fcf12afe53b2a7 Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Graux Date: Tue, 7 Feb 2017 12:12:41 +0100 Subject: mmc: mmci: avoid clearing ST Micro busy end interrupt mistakenly This fixes a race condition that may occur whenever ST micro busy end interrupt is raised just after being unmasked but before leaving mmci interrupt context. A dead-lock has been found if connecting mmci ST Micro variant whose amba id is 0x10480180 to some new eMMC that supports internal caches. Whenever mmci driver enables cache control by programming eMMC's EXT_CSD register, block driver may request to flush the eMMC internal caches causing mmci driver to send a MMC_SWITCH command to the card with FLUSH_CACHE operation. And because busy end interrupt may be mistakenly cleared while not yet processed, this mmc request may never complete. As a result, mmcqd task may be stuck forever. Here is an instance caught by lockup detector which shows that mmcqd task was hung while waiting for mmc_flush_cache command to complete: .. [ 240.251595] INFO: task mmcqd/1:52 blocked for more than 120 seconds. [ 240.257973] Not tainted 4.1.13-00510-g9d91424 #2 [ 240.263109] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 240.270955] mmcqd/1 D c047504c 0 52 2 0x00000000 [ 240.277359] [] (__schedule) from [] (schedule+0x40/0x98) [ 240.284418] [] (schedule) from [] (schedule_timeout+0x148/0x188) [ 240.292191] [] (schedule_timeout) from [] (wait_for_common+0xa4/0x170) [ 240.300491] [] (wait_for_common) from [] (mmc_wait_for_req_done+0x4c/0x13c) [ 240.309224] [] (mmc_wait_for_req_done) from [] (mmc_wait_for_cmd+0x64/0x84) [ 240.317953] [] (mmc_wait_for_cmd) from [] (__mmc_switch+0xa4/0x2a8) [ 240.325964] [] (__mmc_switch) from [] (mmc_switch+0x28/0x30) [ 240.333389] [] (mmc_switch) from [] (mmc_flush_cache+0x54/0x80) [ 240.341073] [] (mmc_flush_cache) from [] (mmc_blk_issue_rq+0x114/0x4e8) [ 240.349459] [] (mmc_blk_issue_rq) from [] (mmc_queue_thread+0xc0/0x180) [ 240.357844] [] (mmc_queue_thread) from [] (kthread+0xdc/0xf4) [ 240.365339] [] (kthread) from [] (ret_from_fork+0x14/0x2c) .. .. [ 240.664311] INFO: task partprobe:564 blocked for more than 120 seconds. [ 240.670943] Not tainted 4.1.13-00510-g9d91424 #2 [ 240.676078] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 240.683922] partprobe D c047504c 0 564 486 0x00000000 [ 240.690318] [] (__schedule) from [] (schedule+0x40/0x98) [ 240.697396] [] (schedule) from [] (schedule_timeout+0x148/0x188) [ 240.705149] [] (schedule_timeout) from [] (wait_for_common+0xa4/0x170) [ 240.713446] [] (wait_for_common) from [] (submit_bio_wait+0x58/0x64) [ 240.721571] [] (submit_bio_wait) from [] (blkdev_issue_flush+0x60/0x88) [ 240.729957] [] (blkdev_issue_flush) from [] (blkdev_fsync+0x34/0x44) [ 240.738083] [] (blkdev_fsync) from [] (do_fsync+0x3c/0x64) [ 240.745319] [] (do_fsync) from [] (ret_fast_syscall+0x0/0x3c) .. Here is the detailed sequence showing when this issue may happen: 1) At probe time, mmci device is initialized and card busy detection based on DAT[0] monitoring is enabled. 2) Later during run time, since card reported to support internal caches, a MMCI_SWITCH command is sent to eMMC device with FLUSH_CACHE operation. On receiving this command, eMMC may enter busy state (for a relatively short time in the case of the dead-lock). 3) Then mmci interrupt is raised and mmci_irq() is called: MMCISTATUS register is read and is equal to 0x01000440. So the following status bits are set: - MCI_CMDRESPEND (= 6) - MCI_DATABLOCKEND (= 10) - MCI_ST_CARDBUSY (= 24) Since MMCIMASK0 register is 0x3FF, status variable is set to 0x00000040 and BIT MCI_CMDRESPEND is cleared by writing MMCICLEAR register. Then mmci_cmd_irq() is called. Considering the following conditions: - host->busy_status is 0, - this is a "busy response", - reading again MMCISTATUS register gives 0x1000400, MMCIMASK0 is updated to unmask MCI_ST_BUSYEND bit. Thus, MMCIMASK0 is set to 0x010003FF and host->busy_status is set to wait for busy end completion. Back again in status loop of mmci_irq(), we quickly go through mmci_data_irq() as there are no data in that case. And we finally go through following test at the end of while(status) loop: /* * Don't poll for busy completion in irq context. */ if (host->variant->busy_detect && host->busy_status) status &= ~host->variant->busy_detect_flag; Because status variable is not yet null (is equal to 0x40), we do not leave interrupt context yet but we loop again into while(status) loop. So we run across following steps: a) MMCISTATUS register is read again and this time is equal to 0x01000400. So that following bits are set: - MCI_DATABLOCKEND (= 10) - MCI_ST_CARDBUSY (= 24) Since MMCIMASK0 register is equal to 0x010003FF: b) status variable is set to 0x01000000. c) MCI_ST_CARDBUSY bit is cleared by writing MMCICLEAR register. Then, mmci_cmd_irq() is called one more time. Since host->busy_status is set and that MCI_ST_CARDBUSY is set in status variable, we just return from this function. Back again in mmci_irq(), status variable is set to 0 and we finally leave the while(status) loop. As a result we leave interrupt context, waiting for busy end interrupt event. Now, consider that busy end completion is raised IN BETWEEN steps 3.a) and 3.c). In such a case, we may mistakenly clear busy end interrupt at step 3.c) while it has not yet been processed. This will result in mmc command to wait forever for a busy end completion that will never happen. To fix the problem, this patch implements the following changes: Considering that the mmci seems to be triggering the IRQ on both edges while monitoring DAT0 for busy completion and that same status bit is used to monitor start and end of busy detection, special care must be taken to make sure that both start and end interrupts are always cleared one after the other. 1) Clearing of card busy bit is moved in mmc_cmd_irq() function where unmasking of busy end bit is effectively handled. 2) Just before unmasking busy end event, busy start event is cleared by writing card busy bit in MMCICLEAR register. 3) Finally, once we are no more busy with a command, busy end event is cleared writing again card busy bit in MMCICLEAR register. This patch has been tested with the ST Accordo5 machine, not yet supported upstream but relies on the mmci driver. Signed-off-by: Sarang Mairal Signed-off-by: Jean-Nicolas Graux Reviewed-by: Linus Walleij Tested-by: Ulf Hansson Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 01a804792f30..b5972440c1bf 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1023,7 +1023,12 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd, if (!host->busy_status && busy_resp && !(status & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT)) && (readl(base + MMCISTATUS) & host->variant->busy_detect_flag)) { - /* Unmask the busy IRQ */ + + /* Clear the busy start IRQ */ + writel(host->variant->busy_detect_mask, + host->base + MMCICLEAR); + + /* Unmask the busy end IRQ */ writel(readl(base + MMCIMASK0) | host->variant->busy_detect_mask, base + MMCIMASK0); @@ -1038,10 +1043,14 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd, /* * At this point we are not busy with a command, we have - * not received a new busy request, mask the busy IRQ and - * fall through to process the IRQ. + * not received a new busy request, clear and mask the busy + * end IRQ and fall through to process the IRQ. */ if (host->busy_status) { + + writel(host->variant->busy_detect_mask, + host->base + MMCICLEAR); + writel(readl(base + MMCIMASK0) & ~host->variant->busy_detect_mask, base + MMCIMASK0); @@ -1283,12 +1292,21 @@ static irqreturn_t mmci_irq(int irq, void *dev_id) } /* - * We intentionally clear the MCI_ST_CARDBUSY IRQ here (if it's - * enabled) since the HW seems to be triggering the IRQ on both - * edges while monitoring DAT0 for busy completion. + * We intentionally clear the MCI_ST_CARDBUSY IRQ (if it's + * enabled) in mmci_cmd_irq() function where ST Micro busy + * detection variant is handled. Considering the HW seems to be + * triggering the IRQ on both edges while monitoring DAT0 for + * busy completion and that same status bit is used to monitor + * start and end of busy detection, special care must be taken + * to make sure that both start and end interrupts are always + * cleared one after the other. */ status &= readl(host->base + MMCIMASK0); - writel(status, host->base + MMCICLEAR); + if (host->variant->busy_detect) + writel(status & ~host->variant->busy_detect_mask, + host->base + MMCICLEAR); + else + writel(status, host->base + MMCICLEAR); dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status); -- cgit From 789ea12500e5ce3911d0a6a822277c3133451927 Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Fri, 3 Feb 2017 12:32:09 +0800 Subject: drm/i915/bxt: Add MST support when do DPLL calculation Add the missing INTEL_OUTPUT_DP_MST case in bxt_get_dpll() to correctly initialize the crtc_state and port plls when link training a DP MST monitor on BXT/APL devices. Fixes: a277ca7dc01d ("drm/i915: Split bxt_ddi_pll_select()") Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99572 Reviewed-by: Cooper Chiou Reviewed-by: Gary C Wang Reviewed-by: Ciobanu, Nathan D Reviewed-by: Herbert, Marc Reviewed-by: Bride, Jim Reviewed-by: Navare, Manasi D Cc: Jani Nikula Cc: # v4.9+ Signed-off-by: Lee, Shawn C Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486096329-6255-1-git-send-email-shawn.c.lee@intel.com (cherry picked from commit 0aab2c721d81590012a5021a516f00666646741f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 58a756f2f224..a2f0e070d38d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1730,7 +1730,8 @@ bxt_get_dpll(struct intel_crtc *crtc, return NULL; if ((encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP) && + encoder->type == INTEL_OUTPUT_EDP || + encoder->type == INTEL_OUTPUT_DP_MST) && !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) return NULL; -- cgit From 83bf6d55c132d5c4f773e5a04149c05f4aa0c2ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 12:57:17 +0000 Subject: drm/i915: Remove overzealous fence warn on runtime suspend The goal of the WARN was to catch when we are still actively using the fence as we go into the runtime suspend. However, the reg->pin_count is too coarse as it does not distinguish between exclusive ownership of the fence register from activity. I've not improved on the WARN, nor have we captured this WARN in an exact igt, but it is showing up regularly in the wild: [ 1915.935332] WARNING: CPU: 1 PID: 10861 at drivers/gpu/drm/i915/i915_gem.c:2022 i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.935383] WARN_ON(reg->pin_count)[ 1915.935399] Modules linked in: snd_hda_intel i915 drm_kms_helper vgem netconsole scsi_transport_iscsi fuse vfat fat x86_pkg_temp_thermal coretemp intel_cstate intel_uncore snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mei_me mei serio_raw intel_rapl_perf intel_pch_thermal soundcore wmi acpi_pad i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops drm r8169 mii video [last unloaded: drm_kms_helper] [ 1915.935785] CPU: 1 PID: 10861 Comm: kworker/1:0 Tainted: G U W 4.9.0-rc5+ #170 [ 1915.935799] Hardware name: LENOVO 80MX/Lenovo E31-80, BIOS DCCN34WW(V2.03) 12/01/2015 [ 1915.935822] Workqueue: pm pm_runtime_work [ 1915.935845] ffffc900044fbbf0 ffffffffac3220bc ffffc900044fbc40 0000000000000000 [ 1915.935890] ffffc900044fbc30 ffffffffac059bcb 000007e6044fbc60 ffff8801626e3198 [ 1915.935937] ffff8801626e0000 0000000000000002 ffffffffc05e5d4e 0000000000000000 [ 1915.935985] Call Trace: [ 1915.936013] [] dump_stack+0x4f/0x73 [ 1915.936038] [] __warn+0xcb/0xf0 [ 1915.936060] [] warn_slowpath_fmt+0x5f/0x80 [ 1915.936158] [] i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.936251] [] intel_runtime_suspend+0x64/0x280 [i915] [ 1915.936277] [] ? dequeue_entity+0x241/0xbc0 [ 1915.936298] [] pci_pm_runtime_suspend+0x55/0x180 [ 1915.936317] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936339] [] __rpm_callback+0x32/0x70 [ 1915.936356] [] rpm_callback+0x24/0x80 [ 1915.936375] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936392] [] rpm_suspend+0x12d/0x680 [ 1915.936415] [] ? _raw_spin_unlock_irq+0x17/0x30 [ 1915.936435] [] ? finish_task_switch+0x88/0x220 [ 1915.936455] [] pm_runtime_work+0x6f/0xb0 [ 1915.936477] [] process_one_work+0x1f3/0x4d0 [ 1915.936501] [] worker_thread+0x48/0x4e0 [ 1915.936523] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936542] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936559] [] kthread+0xd9/0xf0 [ 1915.936580] [] ? kthread_park+0x60/0x60 [ 1915.936600] [] ret_from_fork+0x22/0x30 In the case the register is pinned, it should be present and we will need to invalidate them to be restored upon resume as we cannot expect the owner of the pin to call get_fence prior to use after resume. Fixes: 7c108fd8feac ("drm/i915: Move fence cancellation to runtime suspend") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98804 Reported-by: Lionel Landwerlin Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Imre Deak Cc: Jani Nikula Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170203125717.8431-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit e0ec3ec698851a6c97a12d696407b3ff77700c23) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8d6309865fa5..728ca3ea74d2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2428,6 +2428,7 @@ static int intel_runtime_resume(struct device *kdev) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev_priv); + i915_gem_restore_fences(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b3f1134fb9c8..24b5b046754b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2010,8 +2010,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - if (WARN_ON(reg->pin_count)) - continue; + /* Ideally we want to assert that the fence register is not + * live at this point (i.e. that no piece of code will be + * trying to write through fence + GTT, as that both violates + * our tracking of activity and associated locking/barriers, + * but also is illegal given that the hw is powered down). + * + * Previously we used reg->pin_count as a "liveness" indicator. + * That is not sufficient, and we need a more fine-grained + * tool if we want to have a sanity check here. + */ if (!reg->vma) continue; -- cgit From 6e7eb1783be7f19eb071c96ddda0bbf22279ff46 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 7 Feb 2017 20:55:59 +0100 Subject: drm/i915: Always convert incoming exec offsets to non-canonical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're using non-canonical addresses in drm_mm, and we're making sure that userspace is using canonical addressing - both in case of softpin (verifying incoming offset) and when relocating (converting to canonical when updating offset returned to userspace). Unfortunately when considering the need for relocations, we're comparing offset from userspace (in canonical form) with drm_mm node (in non-canonical form), and as a result, we end up always relocating if our offsets are in the "problematic" range. Let's always convert the offsets to avoid the performance impact of relocations. Fixes: a5f0edf63bdf ("drm/i915: Avoid writing relocs with addresses in non-canonical form") Cc: Chris Wilson Cc: Michel Thierry Reported-by: Michał Pyrzowski Signed-off-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170207195559.18798-1-michal.winiarski@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 038c95a313e4ca954ee5ab8a0c7559a646b0f462) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 097d9d8c2315..b8b877c91b0a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1181,14 +1181,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].offset != gen8_canonical_addr(exec[i].offset & PAGE_MASK)) return -EINVAL; - - /* From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - exec[i].offset = gen8_noncanonical_addr(exec[i].offset); } + /* From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + exec[i].offset = gen8_noncanonical_addr(exec[i].offset); + if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; -- cgit From 4842e98f26dd80be3623c4714a244ba52ea096a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Feb 2017 12:35:39 +0100 Subject: ALSA: seq: Fix race at creating a queue When a sequencer queue is created in snd_seq_queue_alloc(),it adds the new queue element to the public list before referencing it. Thus the queue might be deleted before the call of snd_seq_queue_use(), and it results in the use-after-free error, as spotted by syzkaller. The fix is to reference the queue object at the right time. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_queue.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 0bec02e89d51..450c5187eecb 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -181,6 +181,8 @@ void __exit snd_seq_queues_delete(void) } } +static void queue_use(struct snd_seq_queue *queue, int client, int use); + /* allocate a new queue - * return queue index value or negative value for error */ @@ -192,11 +194,11 @@ int snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) if (q == NULL) return -ENOMEM; q->info_flags = info_flags; + queue_use(q, client, 1); if (queue_list_add(q) < 0) { queue_delete(q); return -ENOMEM; } - snd_seq_queue_use(q->queue, client, 1); /* use this queue */ return q->queue; } @@ -502,19 +504,9 @@ int snd_seq_queue_timer_set_tempo(int queueid, int client, return result; } - -/* use or unuse this queue - - * if it is the first client, starts the timer. - * if it is not longer used by any clients, stop the timer. - */ -int snd_seq_queue_use(int queueid, int client, int use) +/* use or unuse this queue */ +static void queue_use(struct snd_seq_queue *queue, int client, int use) { - struct snd_seq_queue *queue; - - queue = queueptr(queueid); - if (queue == NULL) - return -EINVAL; - mutex_lock(&queue->timer_mutex); if (use) { if (!test_and_set_bit(client, queue->clients_bitmap)) queue->clients++; @@ -529,6 +521,21 @@ int snd_seq_queue_use(int queueid, int client, int use) } else { snd_seq_timer_close(queue); } +} + +/* use or unuse this queue - + * if it is the first client, starts the timer. + * if it is not longer used by any clients, stop the timer. + */ +int snd_seq_queue_use(int queueid, int client, int use) +{ + struct snd_seq_queue *queue; + + queue = queueptr(queueid); + if (queue == NULL) + return -EINVAL; + mutex_lock(&queue->timer_mutex); + queue_use(queue, client, use); mutex_unlock(&queue->timer_mutex); queuefree(queue); return 0; -- cgit From d7df2443cd5f67fc6ee7c05a88e4996e8177f91b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 3 Feb 2017 17:10:28 +1100 Subject: powerpc/mm: Fix spurrious segfaults on radix with autonuma When autonuma (Automatic NUMA balancing) marks a PTE inaccessible it clears all the protection bits but leave the PTE valid. With the Radix MMU, an attempt at executing from such a PTE will take a fault with bit 35 of SRR1 set "SRR1_ISI_N_OR_G". It is thus incorrect to treat all such faults as errors. We should pass them to handle_mm_fault() for autonuma to deal with. The case of pages that are really not executable is handled by the existing test for VM_EXEC further down. That leaves us with catching the kernel attempts at executing user pages. We can catch that earlier, even before we do find_vma. It is never valid on powerpc for the kernel to take an exec fault to begin with. So fold that test with the existing test for the kernel faulting on kernel addresses to bail out early. Fixes: 1d18ad026844 ("powerpc/mm: Detect instruction fetch denied and report") Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6fd30ac7d14a..62a50d6d1053 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -253,8 +253,11 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(debugger_fault_handler(regs))) goto bail; - /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (address >= TASK_SIZE)) { + /* + * The kernel should never take an execute fault nor should it + * take a page fault to a kernel address. + */ + if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) { rc = SIGSEGV; goto bail; } @@ -390,20 +393,6 @@ good_area: #endif /* CONFIG_8xx */ if (is_exec) { - /* - * An execution fault + no execute ? - * - * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we - * deliberately create NX mappings, and use the fault to do the - * cache flush. This is usually handled in hash_page_do_lazy_icache() - * but we could end up here if that races with a concurrent PTE - * update. In that case we need to fall through here to the VMA - * check below. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && - (regs->msr & SRR1_ISI_N_OR_G)) - goto bad_area; - /* * Allow execution from readable areas if the MMU does not * provide separate controls over reading and executing. -- cgit From 391e2a6de9781e4906dd7e0b1cc097050bf43e11 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 23 Oct 2016 14:28:15 -0700 Subject: target: Don't BUG_ON during NodeACL dynamic -> explicit conversion After the v4.2+ RCU conversion to se_node_acl->lun_entry_hlist, a BUG_ON() was added in core_enable_device_list_for_node() to detect when the located orig->se_lun_acl contains an existing se_lun_acl pointer reference. However, this scenario can happen when a dynamically generated NodeACL is being converted to an explicit NodeACL, when the explicit NodeACL contains a different LUN mapping than the default provided by the WWN endpoint. So instead of triggering BUG_ON(), go ahead and fail instead following the original pre RCU conversion logic. Reported-by: Benjamin ESTRABAUD Cc: Benjamin ESTRABAUD Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 1ebd13ef7bd3..26929c44d703 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -352,7 +352,15 @@ int core_enable_device_list_for_node( kfree(new); return -EINVAL; } - BUG_ON(orig->se_lun_acl != NULL); + if (orig->se_lun_acl != NULL) { + pr_warn_ratelimited("Detected existing explicit" + " se_lun_acl->se_lun_group reference for %s" + " mapped_lun: %llu, failing\n", + nacl->initiatorname, mapped_lun); + mutex_unlock(&nacl->lun_entry_mutex); + kfree(new); + return -EINVAL; + } rcu_assign_pointer(new->se_lun, lun); rcu_assign_pointer(new->se_lun_acl, lun_acl); -- cgit From 0583c261e6325f392c1f7a1b9112e31298e1a4bd Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 31 Oct 2016 00:54:40 -0700 Subject: target: Use correct SCSI status during EXTENDED_COPY exception This patch adds the missing target_complete_cmd() SCSI status parameter change in target_xcopy_do_work(), that was originally missing in commit 926317de33. It correctly propigates up the correct SCSI status during EXTENDED_COPY exception cases, instead of always using the hardcoded SAM_STAT_CHECK_CONDITION from original code. This is required for ESX host environments that expect to hit SAM_STAT_RESERVATION_CONFLICT for certain scenarios, and SAM_STAT_CHECK_CONDITION results in non-retriable status for these cases. Reported-by: Nixon Vincent Tested-by: Nixon Vincent Cc: Nixon Vincent Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_xcopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index d828b3b5000b..cac5a20a4de0 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -864,7 +864,7 @@ out: " CHECK_CONDITION -> sending response\n", rc); ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; } - target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); + target_complete_cmd(ec_cmd, ec_cmd->scsi_status); } sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) -- cgit From c54eeffbe9338fa982dc853d816fda9202a13b5a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 6 Dec 2016 22:45:46 -0800 Subject: target: Fix early transport_generic_handle_tmr abort scenario This patch fixes a bug where incoming task management requests can be explicitly aborted during an active LUN_RESET, but who's struct work_struct are canceled in-flight before execution. This occurs when core_tmr_drain_tmr_list() invokes cancel_work_sync() for the incoming se_tmr_req->task_cmd->work, resulting in cmd->work for target_tmr_work() never getting invoked and the aborted TMR waiting indefinately within transport_wait_for_tasks(). To address this case, perform a CMD_T_ABORTED check early in transport_generic_handle_tmr(), and invoke the normal path via transport_cmd_check_stop_to_fabric() to complete any TMR kthreads blocked waiting for CMD_T_STOP in transport_wait_for_tasks(). Also, move the TRANSPORT_ISTATE_PROCESSING assignment earlier into transport_generic_handle_tmr() so the existing check in core_tmr_drain_tmr_list() avoids attempting abort the incoming se_tmr_req->task_cmd->work if it has already been queued into se_device->tmr_wq. Reported-by: Rob Millner Tested-by: Rob Millner Cc: Rob Millner Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 1cadc9eefa21..8b698432aea5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3110,7 +3110,6 @@ static void target_tmr_work(struct work_struct *work) spin_unlock_irqrestore(&cmd->t_state_lock, flags); goto check_stop; } - cmd->t_state = TRANSPORT_ISTATE_PROCESSING; spin_unlock_irqrestore(&cmd->t_state_lock, flags); cmd->se_tfo->queue_tm_rsp(cmd); @@ -3123,11 +3122,25 @@ int transport_generic_handle_tmr( struct se_cmd *cmd) { unsigned long flags; + bool aborted = false; spin_lock_irqsave(&cmd->t_state_lock, flags); - cmd->transport_state |= CMD_T_ACTIVE; + if (cmd->transport_state & CMD_T_ABORTED) { + aborted = true; + } else { + cmd->t_state = TRANSPORT_ISTATE_PROCESSING; + cmd->transport_state |= CMD_T_ACTIVE; + } spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (aborted) { + pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d" + "ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function, + cmd->se_tmr_req->ref_task_tag, cmd->tag); + transport_cmd_check_stop_to_fabric(cmd); + return 0; + } + INIT_WORK(&cmd->work, target_tmr_work); queue_work(cmd->se_dev->tmr_wq, &cmd->work); return 0; -- cgit From 01d4d673558985d9a118e1e05026633c3e2ade9b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 7 Dec 2016 12:55:54 -0800 Subject: target: Fix multi-session dynamic se_node_acl double free OOPs This patch addresses a long-standing bug with multi-session (eg: iscsi-target + iser-target) se_node_acl dynamic free withini transport_deregister_session(). This bug is caused when a storage endpoint is configured with demo-mode (generate_node_acls = 1 + cache_dynamic_acls = 1) initiators, and initiator login creates a new dynamic node acl and attaches two sessions to it. After that, demo-mode for the storage instance is disabled via configfs (generate_node_acls = 0 + cache_dynamic_acls = 0) and the existing dynamic acl is never converted to an explicit ACL. The end result is dynamic acl resources are released twice when the sessions are shutdown in transport_deregister_session(). If the storage instance is not changed to disable demo-mode, or the dynamic acl is converted to an explict ACL, or there is only a single session associated with the dynamic ACL, the bug is not triggered. To address this big, move the release of dynamic se_node_acl memory into target_complete_nacl() so it's only freed once when se_node_acl->acl_kref reaches zero. (Drop unnecessary list_del_init usage - HCH) Reported-by: Rob Millner Tested-by: Rob Millner Cc: Rob Millner Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 69 +++++++++++++++++++++------------- include/target/target_core_base.h | 1 + 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 8b698432aea5..437591bc7c08 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -457,8 +457,20 @@ static void target_complete_nacl(struct kref *kref) { struct se_node_acl *nacl = container_of(kref, struct se_node_acl, acl_kref); + struct se_portal_group *se_tpg = nacl->se_tpg; - complete(&nacl->acl_free_comp); + if (!nacl->dynamic_stop) { + complete(&nacl->acl_free_comp); + return; + } + + mutex_lock(&se_tpg->acl_node_mutex); + list_del(&nacl->acl_list); + mutex_unlock(&se_tpg->acl_node_mutex); + + core_tpg_wait_for_nacl_pr_ref(nacl); + core_free_device_list_for_node(nacl, se_tpg); + kfree(nacl); } void target_put_nacl(struct se_node_acl *nacl) @@ -499,12 +511,39 @@ EXPORT_SYMBOL(transport_deregister_session_configfs); void transport_free_session(struct se_session *se_sess) { struct se_node_acl *se_nacl = se_sess->se_node_acl; + /* * Drop the se_node_acl->nacl_kref obtained from within * core_tpg_get_initiator_node_acl(). */ if (se_nacl) { + struct se_portal_group *se_tpg = se_nacl->se_tpg; + const struct target_core_fabric_ops *se_tfo = se_tpg->se_tpg_tfo; + unsigned long flags; + se_sess->se_node_acl = NULL; + + /* + * Also determine if we need to drop the extra ->cmd_kref if + * it had been previously dynamically generated, and + * the endpoint is not caching dynamic ACLs. + */ + mutex_lock(&se_tpg->acl_node_mutex); + if (se_nacl->dynamic_node_acl && + !se_tfo->tpg_check_demo_mode_cache(se_tpg)) { + spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags); + if (list_empty(&se_nacl->acl_sess_list)) + se_nacl->dynamic_stop = true; + spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags); + + if (se_nacl->dynamic_stop) + list_del(&se_nacl->acl_list); + } + mutex_unlock(&se_tpg->acl_node_mutex); + + if (se_nacl->dynamic_stop) + target_put_nacl(se_nacl); + target_put_nacl(se_nacl); } if (se_sess->sess_cmd_map) { @@ -518,16 +557,12 @@ EXPORT_SYMBOL(transport_free_session); void transport_deregister_session(struct se_session *se_sess) { struct se_portal_group *se_tpg = se_sess->se_tpg; - const struct target_core_fabric_ops *se_tfo; - struct se_node_acl *se_nacl; unsigned long flags; - bool drop_nacl = false; if (!se_tpg) { transport_free_session(se_sess); return; } - se_tfo = se_tpg->se_tpg_tfo; spin_lock_irqsave(&se_tpg->session_lock, flags); list_del(&se_sess->sess_list); @@ -535,33 +570,15 @@ void transport_deregister_session(struct se_session *se_sess) se_sess->fabric_sess_ptr = NULL; spin_unlock_irqrestore(&se_tpg->session_lock, flags); - /* - * Determine if we need to do extra work for this initiator node's - * struct se_node_acl if it had been previously dynamically generated. - */ - se_nacl = se_sess->se_node_acl; - - mutex_lock(&se_tpg->acl_node_mutex); - if (se_nacl && se_nacl->dynamic_node_acl) { - if (!se_tfo->tpg_check_demo_mode_cache(se_tpg)) { - list_del(&se_nacl->acl_list); - drop_nacl = true; - } - } - mutex_unlock(&se_tpg->acl_node_mutex); - - if (drop_nacl) { - core_tpg_wait_for_nacl_pr_ref(se_nacl); - core_free_device_list_for_node(se_nacl, se_tpg); - se_sess->se_node_acl = NULL; - kfree(se_nacl); - } pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", se_tpg->se_tpg_tfo->get_fabric_name()); /* * If last kref is dropping now for an explicit NodeACL, awake sleeping * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group * removal context from within transport_free_session() code. + * + * For dynamic ACL, target_put_nacl() uses target_complete_nacl() + * to release all remaining generate_node_acl=1 created ACL resources. */ transport_free_session(se_sess); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 43edf82e54ff..da854fb4530f 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -538,6 +538,7 @@ struct se_node_acl { char initiatorname[TRANSPORT_IQN_LEN]; /* Used to signal demo mode created ACL, disabled by default */ bool dynamic_node_acl; + bool dynamic_stop; u32 queue_depth; u32 acl_index; enum target_prot_type saved_prot_type; -- cgit From 9b2792c3da1e80f2d460167d319302a24c9ca2b7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 6 Feb 2017 14:28:09 -0800 Subject: target: Fix COMPARE_AND_WRITE ref leak for non GOOD status This patch addresses a long standing bug where the commit phase of COMPARE_AND_WRITE would result in a se_cmd->cmd_kref reference leak if se_cmd->scsi_status returned non SAM_STAT_GOOD. This would manifest first as a lost SCSI response, and eventual hung task during fabric driver logout or re-login, as existing shutdown logic waited for the COMPARE_AND_WRITE se_cmd->cmd_kref to reach zero. To address this bug, compare_and_write_post() has been changed to drop the incorrect !cmd->scsi_status conditional that was preventing *post_ret = 1 for being set during non SAM_STAT_GOOD status. This patch has been tested with SAM_STAT_CHECK_CONDITION status from normal target_complete_cmd() callback path, as well as the incoming __target_execute_cmd() submission failure path when se_cmd->execute_cmd() returns non zero status. Reported-by: Donald White Cc: Donald White Tested-by: Gary Guo Cc: Gary Guo Reviewed-by: Christoph Hellwig Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 4879e70e2eef..df7b6e95c019 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -451,6 +451,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success, int *post_ret) { struct se_device *dev = cmd->se_dev; + sense_reason_t ret = TCM_NO_SENSE; /* * Only set SCF_COMPARE_AND_WRITE_POST to force a response fall-through @@ -458,9 +459,12 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success, * sent to the backend driver. */ spin_lock_irq(&cmd->t_state_lock); - if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) { + if (cmd->transport_state & CMD_T_SENT) { cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; *post_ret = 1; + + if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION) + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } spin_unlock_irq(&cmd->t_state_lock); @@ -470,7 +474,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success, */ up(&dev->caw_sem); - return TCM_NO_SENSE; + return ret; } static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success, -- cgit From 2a362249187a8d0f6d942d6e1d763d150a296f47 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 6 Feb 2017 19:39:09 -0500 Subject: btrfs: fix btrfs_compat_ioctl failures on non-compat ioctls Commit 4c63c2454ef incorrectly assumed that returning -ENOIOCTLCMD would cause the native ioctl to be called. The ->compat_ioctl callback is expected to handle all ioctls, not just compat variants. As a result, when using 32-bit userspace on 64-bit kernels, everything except those three ioctls would return -ENOTTY. Fixes: 4c63c2454ef ("btrfs: bugfix: handle FS_IOC32_{GETFLAGS,SETFLAGS,GETVERSION} in btrfs_ioctl") Cc: stable@vger.kernel.org Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0a6902555e65..0c77cad89e87 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5665,6 +5665,10 @@ long btrfs_ioctl(struct file *file, unsigned int #ifdef CONFIG_COMPAT long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + /* + * These all access 32-bit values anyway so no further + * handling is necessary. + */ switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; @@ -5675,8 +5679,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; - default: - return -ENOIOCTLCMD; } return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -- cgit From a11a7f71cac209c7c9cca66eb506e1ebb033a3b3 Mon Sep 17 00:00:00 2001 From: Marcus Huewe Date: Mon, 6 Feb 2017 18:34:56 +0100 Subject: ipv6: addrconf: fix generation of new temporary addresses Under some circumstances it is possible that no new temporary addresses will be generated. For instance, addrconf_prefix_rcv_add_addr() indirectly calls ipv6_create_tempaddr(), which creates a tentative temporary address and starts dad. Next, addrconf_prefix_rcv_add_addr() indirectly calls addrconf_verify_rtnl(). Now, assume that the previously created temporary address has the least preferred lifetime among all existing addresses and is still tentative (that is, dad is still running). Hence, the next run of addrconf_verify_rtnl() is performed when the preferred lifetime of the temporary address ends. If dad succeeds before the next run, the temporary address becomes deprecated during the next run, but no new temporary address is generated. In order to fix this, schedule the next addrconf_verify_rtnl() run slightly before the temporary address becomes deprecated, if dad succeeded. Signed-off-by: Marcus Huewe Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 81f7b4ea4281..a7bcc0ab5e99 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4015,6 +4015,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) if (bump_id) rt_genid_bump_ipv6(dev_net(dev)); + + /* Make sure that a new temporary address will be created + * before this temporary address becomes deprecated. + */ + if (ifp->flags & IFA_F_TEMPORARY) + addrconf_verify_rtnl(); } static void addrconf_dad_run(struct inet6_dev *idev) -- cgit From 628f07d33c1f2e7bf31e0a4a988bb07914bd5e73 Mon Sep 17 00:00:00 2001 From: Eyal Itkin Date: Tue, 7 Feb 2017 16:43:05 +0300 Subject: IB/rxe: Fix resid update Update the response's resid field when larger than MTU, instead of only updating the local resid variable. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Eyal Itkin Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3435efff8799..5bcf07328972 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -479,7 +479,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, goto err2; } - resid = mtu; + qp->resp.resid = mtu; } else { if (pktlen != resid) { state = RESPST_ERR_LENGTH; -- cgit From 647bf3d8a8e5777319da92af672289b2a6c4dc66 Mon Sep 17 00:00:00 2001 From: Eyal Itkin Date: Tue, 7 Feb 2017 16:45:19 +0300 Subject: IB/rxe: Fix mem_check_range integer overflow Update the range check to avoid integer-overflow in edge case. Resolves CVE 2016-8636. Signed-off-by: Eyal Itkin Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d0faca294006..86a6585b847d 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -59,9 +59,11 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) case RXE_MEM_TYPE_MR: case RXE_MEM_TYPE_FMR: - return ((iova < mem->iova) || - ((iova + length) > (mem->iova + mem->length))) ? - -EFAULT : 0; + if (iova < mem->iova || + length > mem->length || + iova > mem->iova + mem->length - length) + return -EFAULT; + return 0; default: return -EFAULT; -- cgit From 646ebd4166ca00bdf682a36bd2e1c9a74d848ac6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 8 Feb 2017 17:04:09 +0200 Subject: RDMA: Don't reference kernel private header from UAPI header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove references to private kernel header and defines from exported ib_user_verb.h file. The code snippet below is used to reproduce the issue: #include #include int main(void) { printf("IB_USER_VERBS_ABI_VERSION = %d\n", IB_USER_VERBS_ABI_VERSION); return 0; } It fails during compilation phase with an error: ➜ /tmp gcc main.c main.c:2:31: fatal error: rdma/ib_user_verb.h: No such file or directory #include ^ compilation terminated. Fixes: 189aba99e700 ("IB/uverbs: Extend modify_qp and support packet pacing") CC: Bodong Wang CC: Matan Barak CC: Christoph Hellwig Tested-by: Slava Shwartsman Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index dfdfe4e92d31..f4f87cff6dc6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -37,7 +37,6 @@ #define IB_USER_VERBS_H #include -#include /* * Increment this value if any changes that break userspace ABI @@ -548,11 +547,17 @@ enum { }; enum { - IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN + /* + * This value is equal to IB_QP_DEST_QPN. + */ + IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, }; enum { - IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT + /* + * This value is equal to IB_QP_RATE_LIMIT. + */ + IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, }; struct ib_uverbs_ex_create_qp { -- cgit From 2bd137de531367fb573d90150d1872cb2a2095f7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 8 Feb 2017 09:29:00 -0800 Subject: lwtunnel: valid encap attr check should return 0 when lwtunnel is disabled An error was reported upgrading to 4.9.8: root@Typhoon:~# ip route add default table 210 nexthop dev eth0 via 10.68.64.1 weight 1 nexthop dev eth0 via 10.68.64.2 weight 1 RTNETLINK answers: Operation not supported The problem occurs when CONFIG_LWTUNNEL is not enabled and a multipath route is submitted. The point of lwtunnel_valid_encap_type_attr is catch modules that need to be loaded before any references are taken with rntl held. With CONFIG_LWTUNNEL disabled, there will be no modules to load so the lwtunnel_valid_encap_type_attr stub should just return 0. Fixes: 9ed59592e3e3 ("lwtunnel: fix autoload of lwt modules") Reported-by: pupilla@libero.it Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 73dd87647460..0388b9c5f5e2 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -178,7 +178,10 @@ static inline int lwtunnel_valid_encap_type(u16 encap_type) } static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) { - return -EOPNOTSUPP; + /* return 0 since we are not walking attr looking for + * RTA_ENCAP_TYPE attribute on nexthops. + */ + return 0; } static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, -- cgit From d7426c69a1942b2b9b709bf66b944ff09f561484 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 8 Feb 2017 10:02:13 -0800 Subject: sit: fix a double free on error path Dmitry reported a double free in sit_init_net(): kernel BUG at mm/percpu.c:689! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 15692 Comm: syz-executor1 Not tainted 4.10.0-rc6-next-20170206 #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801c9cc27c0 task.stack: ffff88017d1d8000 RIP: 0010:pcpu_free_area+0x68b/0x810 mm/percpu.c:689 RSP: 0018:ffff88017d1df488 EFLAGS: 00010046 RAX: 0000000000010000 RBX: 00000000000007c0 RCX: ffffc90002829000 RDX: 0000000000010000 RSI: ffffffff81940efb RDI: ffff8801db841d94 RBP: ffff88017d1df590 R08: dffffc0000000000 R09: 1ffffffff0bb3bdd R10: dffffc0000000000 R11: 00000000000135dd R12: ffff8801db841d80 R13: 0000000000038e40 R14: 00000000000007c0 R15: 00000000000007c0 FS: 00007f6ea608f700(0000) GS:ffff8801dbe00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002000aff8 CR3: 00000001c8d44000 CR4: 00000000001426f0 DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: free_percpu+0x212/0x520 mm/percpu.c:1264 ipip6_dev_free+0x43/0x60 net/ipv6/sit.c:1335 sit_init_net+0x3cb/0xa10 net/ipv6/sit.c:1831 ops_init+0x10a/0x530 net/core/net_namespace.c:115 setup_net+0x2ed/0x690 net/core/net_namespace.c:291 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] SyS_unshare+0x64e/0xfc0 kernel/fork.c:2231 entry_SYSCALL_64_fastpath+0x1f/0xc2 This is because when tunnel->dst_cache init fails, we free dev->tstats once in ipip6_tunnel_init() and twice in sit_init_net(). This looks redundant but its ndo_uinit() does not seem enough to clean up everything here. So avoid this by setting dev->tstats to NULL after the first free, at least for -net. Reported-by: Dmitry Vyukov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fad992ad4bc8..99853c6e33a8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1380,6 +1380,7 @@ static int ipip6_tunnel_init(struct net_device *dev) err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (err) { free_percpu(dev->tstats); + dev->tstats = NULL; return err; } -- cgit From b22bc27868e8c11fe3f00937a341b44f80b50364 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Mon, 6 Feb 2017 10:04:28 -0600 Subject: ibmvscsis: Add SGL limit This patch adds internal LIO sgl limit since the driver already sets a max transfer limit on transport layer of 1MB to the client. Cc: stable@vger.kernel.org Tested-by: Steven Royer Signed-off-by: Bryant G. Ly Signed-off-by: Nicholas Bellinger --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 99b747cedbeb..0f807798c624 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3816,6 +3816,7 @@ static struct configfs_attribute *ibmvscsis_tpg_attrs[] = { static const struct target_core_fabric_ops ibmvscsis_ops = { .module = THIS_MODULE, .name = "ibmvscsis", + .max_data_sg_nents = MAX_TXU / PAGE_SIZE, .get_fabric_name = ibmvscsis_get_fabric_name, .tpg_get_wwn = ibmvscsis_get_fabric_wwn, .tpg_get_tag = ibmvscsis_get_tag, -- cgit From 217e6fa24ce28ec87fca8da93c9016cb78028612 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Feb 2017 15:57:20 -0500 Subject: net: introduce device min_header_len The stack must not pass packets to device drivers that are shorter than the minimum link layer header length. Previously, packet sockets would drop packets smaller than or equal to dev->hard_header_len, but this has false positives. Zero length payload is used over Ethernet. Other link layer protocols support variable length headers. Support for validation of these protocols removed the min length check for all protocols. Introduce an explicit dev->min_header_len parameter and drop all packets below this value. Initially, set it to non-zero only for Ethernet and loopback. Other protocols can follow in a patch to net-next. Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") Reported-by: Sowmini Varadhan Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- drivers/net/loopback.c | 1 + include/linux/netdevice.h | 4 ++++ net/ethernet/eth.c | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 1e05b7c2d157..0844f8496413 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -164,6 +164,7 @@ static void loopback_setup(struct net_device *dev) { dev->mtu = 64 * 1024; dev->hard_header_len = ETH_HLEN; /* 14 */ + dev->min_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 70ad0291d517..27914672602d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1511,6 +1511,7 @@ enum netdev_priv_flags { * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. + * @min_header_len: Minimum hardware header length * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed @@ -1728,6 +1729,7 @@ struct net_device { unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; + unsigned short min_header_len; unsigned short needed_headroom; unsigned short needed_tailroom; @@ -2694,6 +2696,8 @@ static inline bool dev_validate_header(const struct net_device *dev, { if (likely(len >= dev->hard_header_len)) return true; + if (len < dev->min_header_len) + return false; if (capable(CAP_SYS_RAWIO)) { memset(ll_header + len, 0, dev->hard_header_len - len); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 8c5a479681ca..516c87e75de7 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -356,6 +356,7 @@ void ether_setup(struct net_device *dev) dev->header_ops = ð_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; + dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; -- cgit From 57031eb794906eea4e1c7b31dc1e2429c0af0c66 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Feb 2017 15:57:21 -0500 Subject: packet: round up linear to header len Link layer protocols may unconditionally pull headers, as Ethernet does in eth_type_trans. Ensure that the entire link layer header always lies in the skb linear segment. tpacket_snd has such a check. Extend this to packet_snd. Variable length link layer headers complicate the computation somewhat. Here skb->len may be smaller than dev->hard_header_len. Round up the linear length to be at least as long as the smallest of the two. Reported-by: Dmitry Vyukov Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3d555c79a7b5..d56ee46b11fc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2755,7 +2755,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); - int hlen, tlen; + int hlen, tlen, linear; int extra_len = 0; /* @@ -2816,8 +2816,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, - __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len), + linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); + linear = max(linear, min_t(int, len, dev->hard_header_len)); + skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; -- cgit From 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 7 Feb 2017 12:59:46 -0800 Subject: ping: fix a null pointer dereference Andrey reported a kernel crash: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 3880 Comm: syz-executor1 Not tainted 4.10.0-rc6+ #124 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff880060048040 task.stack: ffff880069be8000 RIP: 0010:ping_v4_push_pending_frames net/ipv4/ping.c:647 [inline] RIP: 0010:ping_v4_sendmsg+0x1acd/0x23f0 net/ipv4/ping.c:837 RSP: 0018:ffff880069bef8b8 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: ffff880069befb90 RCX: 0000000000000000 RDX: 0000000000000018 RSI: ffff880069befa30 RDI: 00000000000000c2 RBP: ffff880069befbb8 R08: 0000000000000008 R09: 0000000000000000 R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069befab0 R13: ffff88006c624a80 R14: ffff880069befa70 R15: 0000000000000000 FS: 00007f6f7c716700(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004a6f28 CR3: 000000003a134000 CR4: 00000000000006e0 Call Trace: inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 This is because we miss a check for NULL pointer for skb_peek() when the queue is empty. Other places already have the same check. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 86cca610f4c2..68d77b1f1495 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); -- cgit From 382e1eea2d983cd2343482c6a638f497bb44a636 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Feb 2017 23:10:13 -0800 Subject: net: dsa: Do not destroy invalid network devices dsa_slave_create() can fail, and dsa_user_port_unapply() will properly check for the network device not being NULL before attempting to destroy it. We were not setting the slave network device as NULL if dsa_slave_create() failed, so we would later on be calling dsa_slave_destroy() on a now free'd and unitialized network device, causing crashes in dsa_slave_destroy(). Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index da3862124545..0f99297b2fb3 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -273,6 +273,7 @@ static int dsa_user_port_apply(struct device_node *port, u32 index, if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", index, err); + ds->ports[index].netdev = NULL; return err; } -- cgit From 075ad765ef7541b2860de8408c165a92b78aefa3 Mon Sep 17 00:00:00 2001 From: Thanneeru Srinivasulu Date: Wed, 8 Feb 2017 18:09:00 +0530 Subject: net: thunderx: Fix PHY autoneg for SGMII QLM mode This patch fixes the case where there is no phydev attached to a LMAC in DT due to non-existance of a PHY driver or due to usage of non-stanadard PHY which doesn't support autoneg. Changes dependeds on firmware to send correct info w.r.t PHY and autoneg capability. This patch also covers a case where a 10G/40G interface is used as a 1G with convertors with Cortina PHY in between. Signed-off-by: Thanneeru Srinivasulu Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 108 +++++++++++++++++++--- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 5 + 2 files changed, 101 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2f85b64f01fa..1e4695270da6 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -31,6 +31,7 @@ struct lmac { u8 lmac_type; u8 lane_to_sds; bool use_training; + bool autoneg; bool link_up; int lmacid; /* ID within BGX */ int lmacid_bd; /* ID on board */ @@ -461,7 +462,17 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac) /* power down, reset autoneg, autoneg enable */ cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL); cfg &= ~PCS_MRX_CTL_PWR_DN; - cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN); + cfg |= PCS_MRX_CTL_RST_AN; + if (lmac->phydev) { + cfg |= PCS_MRX_CTL_AN_EN; + } else { + /* In scenarios where PHY driver is not present or it's a + * non-standard PHY, FW sets AN_EN to inform Linux driver + * to do auto-neg and link polling or not. + */ + if (cfg & PCS_MRX_CTL_AN_EN) + lmac->autoneg = true; + } bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg); if (lmac->lmac_type == BGX_MODE_QSGMII) { @@ -472,7 +483,7 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac) return 0; } - if (lmac->lmac_type == BGX_MODE_SGMII) { + if ((lmac->lmac_type == BGX_MODE_SGMII) && lmac->phydev) { if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, PCS_MRX_STATUS_AN_CPT, false)) { dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); @@ -678,12 +689,71 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } +static void bgx_poll_for_sgmii_link(struct lmac *lmac) +{ + u64 pcs_link, an_result; + u8 speed; + + pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid, + BGX_GMP_PCS_MRX_STATUS); + + /*Link state bit is sticky, read it again*/ + if (!(pcs_link & PCS_MRX_STATUS_LINK)) + pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid, + BGX_GMP_PCS_MRX_STATUS); + + if (bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_GMP_PCS_MRX_STATUS, + PCS_MRX_STATUS_AN_CPT, false)) { + lmac->link_up = false; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; + goto next_poll; + } + + lmac->link_up = ((pcs_link & PCS_MRX_STATUS_LINK) != 0) ? true : false; + an_result = bgx_reg_read(lmac->bgx, lmac->lmacid, + BGX_GMP_PCS_ANX_AN_RESULTS); + + speed = (an_result >> 3) & 0x3; + lmac->last_duplex = (an_result >> 1) & 0x1; + switch (speed) { + case 0: + lmac->last_speed = 10; + break; + case 1: + lmac->last_speed = 100; + break; + case 2: + lmac->last_speed = 1000; + break; + default: + lmac->link_up = false; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; + break; + } + +next_poll: + + if (lmac->last_link != lmac->link_up) { + if (lmac->link_up) + bgx_sgmii_change_link_state(lmac); + lmac->last_link = lmac->link_up; + } + + queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 3); +} + static void bgx_poll_for_link(struct work_struct *work) { struct lmac *lmac; u64 spu_link, smu_link; lmac = container_of(work, struct lmac, dwork.work); + if (lmac->is_sgmii) { + bgx_poll_for_sgmii_link(lmac); + return; + } /* Receive link is latching low. Force it high and verify it */ bgx_reg_modify(lmac->bgx, lmac->lmacid, @@ -775,9 +845,21 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) (lmac->lmac_type != BGX_MODE_XLAUI) && (lmac->lmac_type != BGX_MODE_40G_KR) && (lmac->lmac_type != BGX_MODE_10G_KR)) { - if (!lmac->phydev) - return -ENODEV; - + if (!lmac->phydev) { + if (lmac->autoneg) { + bgx_reg_write(bgx, lmacid, + BGX_GMP_PCS_LINKX_TIMER, + PCS_LINKX_TIMER_COUNT); + goto poll; + } else { + /* Default to below link speed and duplex */ + lmac->link_up = true; + lmac->last_speed = 1000; + lmac->last_duplex = 1; + bgx_sgmii_change_link_state(lmac); + return 0; + } + } lmac->phydev->dev_flags = 0; if (phy_connect_direct(&lmac->netdev, lmac->phydev, @@ -786,15 +868,17 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) return -ENODEV; phy_start_aneg(lmac->phydev); - } else { - lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); - if (!lmac->check_link) - return -ENOMEM; - INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); - queue_delayed_work(lmac->check_link, &lmac->dwork, 0); + return 0; } +poll: + lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!lmac->check_link) + return -ENOMEM; + INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); + queue_delayed_work(lmac->check_link, &lmac->dwork, 0); + return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index c18ebfeb2039..a60f189429bb 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -153,10 +153,15 @@ #define PCS_MRX_CTL_LOOPBACK1 BIT_ULL(14) #define PCS_MRX_CTL_RESET BIT_ULL(15) #define BGX_GMP_PCS_MRX_STATUS 0x30008 +#define PCS_MRX_STATUS_LINK BIT_ULL(2) #define PCS_MRX_STATUS_AN_CPT BIT_ULL(5) +#define BGX_GMP_PCS_ANX_ADV 0x30010 #define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020 +#define BGX_GMP_PCS_LINKX_TIMER 0x30040 +#define PCS_LINKX_TIMER_COUNT 0x1E84 #define BGX_GMP_PCS_SGM_AN_ADV 0x30068 #define BGX_GMP_PCS_MISCX_CTL 0x30078 +#define PCS_MISC_CTL_MODE BIT_ULL(8) #define PCS_MISC_CTL_DISP_EN BIT_ULL(13) #define PCS_MISC_CTL_GMX_ENO BIT_ULL(11) #define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full -- cgit From 49d29a077af8d2ee3b291ccd8d053541bebe09d7 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Wed, 1 Feb 2017 10:35:08 +0100 Subject: drm: vc4: adapt to new behaviour of drm_crtc.c When drm_crtc_init_with_planes() was orignally added (in drm_crtc.c, e13161af80c185ecd8dc4641d0f5df58f9e3e0af drm: Add drm_crtc_init_with_planes() (v2)), it only checked for "primary" being non-null. If that was the case, it modified primary->possible_crtcs. Then, when support for cursor planes was added (fc1d3e44ef7c1db93384150fdbf8948dcf949f15 drm: Allow drivers to register cursor planes with crtc), the same behaviour was implemented for cursor planes. vc4_plane_init() since its inception has passed 0xff as "possible_crtcs" parameter to drm_universal_plane_init(). With a change in drm_crtc.c (7abc7d47510c75dd984380ebf819616e574c9604 drm: don't override possible_crtcs for primary/cursor planes) passing 0xff results in primary's possible_crtcs set to 0xff (cursor was updated manually by vc4_crtc.c). Consequently, it would be allowed to use the primary plane from CRTC 1 (for example) on CRTC 0, which would result in the overlay and cursors being buried. Signed-off-by: Andrzej Pietrasiewicz Reviewed-by: Eric Anholt Link: http://patchwork.freedesktop.org/patch/msgid/1485941708-27892-1-git-send-email-andrzej.p@samsung.com Fixes: 7abc7d47510c ("drm: don't override possible_crtcs for primary/cursor planes") --- drivers/gpu/drm/vc4/vc4_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 881bf489478b..686cdd3c86f2 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -858,7 +858,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, } } plane = &vc4_plane->base; - ret = drm_universal_plane_init(dev, plane, 0xff, + ret = drm_universal_plane_init(dev, plane, 0, &vc4_plane_funcs, formats, num_formats, type, NULL); -- cgit From ed5bd7dc88edf4a4a9c67130742b1b59aa017a5f Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 8 Feb 2017 14:30:50 -0800 Subject: kernel/ucount.c: mark user_header with kmemleak_ignore() The user_header gets caught by kmemleak with the following splat as missing a free: unreferenced object 0xffff99667a733d80 (size 96): comm "swapper/0", pid 1, jiffies 4294892317 (age 62191.468s) hex dump (first 32 bytes): a0 b6 92 b4 ff ff ff ff 00 00 00 00 01 00 00 00 ................ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: kmemleak_alloc+0x4a/0xa0 __kmalloc+0x144/0x260 __register_sysctl_table+0x54/0x5e0 register_sysctl+0x1b/0x20 user_namespace_sysctl_init+0x17/0x34 do_one_initcall+0x52/0x1a0 kernel_init_freeable+0x173/0x200 kernel_init+0xe/0x100 ret_from_fork+0x2c/0x40 The BUG_ON()s are intended to crash so no need to clean up after ourselves on error there. This is also a kernel/ subsys_init() we don't need a respective exit call here as this is never modular, so just white list it. Link: http://lkml.kernel.org/r/20170203211404.31458-1-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Cc: Eric W. Biederman Cc: Kees Cook Cc: Nikolay Borisov Cc: Serge Hallyn Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ucount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 4bbd38ec3788..95c6336fc2b3 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -227,11 +227,10 @@ static __init int user_namespace_sysctl_init(void) * properly. */ user_header = register_sysctl("user", empty); + kmemleak_ignore(user_header); BUG_ON(!user_header); BUG_ON(!setup_userns_sysctls(&init_user_ns)); #endif return 0; } subsys_initcall(user_namespace_sysctl_init); - - -- cgit From 0911d0041c22922228ca52a977d7b0b0159fee4b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Feb 2017 14:30:53 -0800 Subject: mm: avoid returning VM_FAULT_RETRY from ->page_mkwrite handlers Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return code (GFS2 or Lustre can definitely do this). However VM_FAULT_RETRY from ->page_mkwrite is completely unhandled by the mm code and results in locking and writeably mapping the page which definitely is not what the caller wanted. Fix Lustre and block_page_mkwrite_ret() used by other filesystems (notably GFS2) to return VM_FAULT_NOPAGE instead which results in bailing out from the fault code, the CPU then retries the access, and we fault again effectively doing what the handler wanted. Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Al Viro Reviewed-by: Jinshan Xiong Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/lustre/lustre/llite/llite_mmap.c | 4 +--- include/linux/buffer_head.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index ee01f20d8b11..9afa6bec3e6f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -390,15 +390,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = VM_FAULT_LOCKED; break; case -ENODATA: + case -EAGAIN: case -EFAULT: result = VM_FAULT_NOPAGE; break; case -ENOMEM: result = VM_FAULT_OOM; break; - case -EAGAIN: - result = VM_FAULT_RETRY; - break; default: result = VM_FAULT_SIGBUS; break; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d67ab83823ad..79591c3660cc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -243,12 +243,10 @@ static inline int block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; - if (err == -EFAULT) + if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; - if (err == -EAGAIN) - return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } -- cgit From 4d59b6ccf000862beed6fc0765d3209f98a8d8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Feb 2017 14:30:56 -0800 Subject: cpumask: use nr_cpumask_bits for parsing functions Commit 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions") converted both cpumask printing and parsing functions to use nr_cpu_ids instead of nr_cpumask_bits. While this was okay for the printing functions as it just picked one of the two output formats that we were alternating between depending on a kernel config, doing the same for parsing wasn't okay. nr_cpumask_bits can be either nr_cpu_ids or NR_CPUS. We can always use nr_cpu_ids but that is a variable while NR_CPUS is a constant, so it can be more efficient to use NR_CPUS when we can get away with it. Converting the printing functions to nr_cpu_ids makes sense because it affects how the masks get presented to userspace and doesn't break anything; however, using nr_cpu_ids for parsing functions can incorrectly leave the higher bits uninitialized while reading in these masks from userland. As all testing and comparison functions use nr_cpumask_bits which can be larger than nr_cpu_ids, the parsed cpumasks can erroneously yield false negative results. This made the taskstats interface incorrectly return -EINVAL even when the inputs were correct. Fix it by restoring the parse functions to use nr_cpumask_bits instead of nr_cpu_ids. Link: http://lkml.kernel.org/r/20170206182442.GB31078@htj.duckdns.org Fixes: 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions") Signed-off-by: Tejun Heo Reported-by: Martin Steigerwald Debugged-by: Ben Hutchings Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c717f5ea88cb..b3d2c1a89ac4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -560,7 +560,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -575,7 +575,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), - nr_cpu_ids); + nr_cpumask_bits); } /** @@ -590,7 +590,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) char *nl = strchr(buf, '\n'); unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); - return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -602,7 +602,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } /** -- cgit From a810007afe239d59c1115fcaa06eb5b480f876e9 Mon Sep 17 00:00:00 2001 From: Sean Rees Date: Wed, 8 Feb 2017 14:30:59 -0800 Subject: mm/slub.c: fix random_seq offset destruction Commit 210e7a43fa90 ("mm: SLUB freelist randomization") broke USB hub initialisation as described in https://bugzilla.kernel.org/show_bug.cgi?id=177551. Bail out early from init_cache_random_seq if s->random_seq is already initialised. This prevents destroying the previously computed random_seq offsets later in the function. If the offsets are destroyed, then shuffle_freelist will truncate page->freelist to just the first object (orphaning the rest). Fixes: 210e7a43fa90 ("mm: SLUB freelist randomization") Link: http://lkml.kernel.org/r/20170207140707.20824-1-sean@erifax.org Signed-off-by: Sean Rees Reported-by: Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Thomas Garnier Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 7aa6f433f4de..7ec0a965c6a3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1422,6 +1422,10 @@ static int init_cache_random_seq(struct kmem_cache *s) int err; unsigned long i, count = oo_objects(s->oo); + /* Bailout if already initialised */ + if (s->random_seq) + return 0; + err = cache_random_seq_create(s, count, GFP_KERNEL); if (err) { pr_err("SLUB: Unable to initialize free list for %s\n", -- cgit From 3b802c9455f973fa786eafb4d5bd4634a7dd5130 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 7 Feb 2017 16:23:31 -0800 Subject: Revert "hwrng: core - zeroize buffers with random data" This reverts commit 2cc751545854d7bd7eedf4d7e377bb52e176cd07. With this commit in place I get on a Cavium ThunderX (arm64) system: $ if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v > rng-bad.txt 1+0 records in 1+0 records out 256 bytes (256 B) copied, 9.1171e-05 s, 2.8 MB/s $ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v >> rng-bad.txt 1+0 records in 1+0 records out 256 bytes (256 B) copied, 9.6141e-05 s, 2.7 MB/s $ cat rng-bad.txt 000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000050 00 00 00 00 37 20 46 ae d0 fc 1c 55 25 6e b0 b8 000060 7c 7e d7 d4 00 0f 6f b2 91 1e 30 a8 fa 3e 52 0e 000070 06 2d 53 30 be a1 20 0f aa 56 6e 0e 44 6e f4 35 000080 b7 6a fe d2 52 70 7e 58 56 02 41 ea d1 9c 6a 6a 000090 d1 bd d8 4c da 35 45 ef 89 55 fc 59 d5 cd 57 ba 0000a0 4e 3e 02 1c 12 76 43 37 23 e1 9f 7a 9f 9e 99 24 0000b0 47 b2 de e3 79 85 f6 55 7e ad 76 13 4f a0 b5 41 0000c0 c6 92 42 01 d9 12 de 8f b4 7b 6e ae d7 24 fc 65 0000d0 4d af 0a aa 36 d9 17 8d 0e 8b 7a 3b b6 5f 96 47 0000e0 46 f7 d8 ce 0b e8 3e c6 13 a6 2c b6 d6 cc 17 26 0000f0 e3 c3 17 8e 9e 45 56 1e 41 ef 29 1a a8 65 c8 3a 000100 000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000050 00 00 00 00 f4 90 65 aa 8b f2 5e 31 01 53 b4 d4 000060 06 c0 23 a2 99 3d 01 e4 b0 c1 b1 55 0f 80 63 cf 000070 33 24 d8 3a 1d 5e cd 2c ba c0 d0 18 6f bc 97 46 000080 1e 19 51 b1 90 15 af 80 5e d1 08 0d eb b0 6c ab 000090 6a b4 fe 62 37 c5 e1 ee 93 c3 58 78 91 2a d5 23 0000a0 63 50 eb 1f 3b 84 35 18 cf b2 a4 b8 46 69 9e cf 0000b0 0c 95 af 03 51 45 a8 42 f1 64 c9 55 fc 69 76 63 0000c0 98 9d 82 fa 76 85 24 da 80 07 29 fe 4e 76 0c 61 0000d0 ff 23 94 4f c8 5c ce 0b 50 e8 31 bc 9d ce f4 ca 0000e0 be ca 28 da e6 fa cc 64 1c ec a8 41 db fe 42 bd 0000f0 a0 e2 4b 32 b4 52 ba 03 70 8e c1 8e d0 50 3a c6 000100 To my untrained mental entropy detector, the first several bytes of each read from /dev/hwrng seem to not be very random (i.e. all zero). When I revert the patch (apply this patch), I get back to what we have in v4.9, which looks like (much more random appearing): $ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v > rng-good.txt 1+0 records in 1+0 records out 256 bytes (256 B) copied, 0.000252233 s, 1.0 MB/s $ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v >> rng-good.txt 1+0 records in 1+0 records out 256 bytes (256 B) copied, 0.000113571 s, 2.3 MB/s $ cat rng-good.txt 000000 75 d1 2d 19 68 1f d2 26 a1 49 22 61 66 e8 09 e5 000010 e0 4e 10 d0 1a 2c 45 5d 59 04 79 8e e2 b7 2c 2e 000020 e8 ad da 34 d5 56 51 3d 58 29 c7 7a 8e ed 22 67 000030 f9 25 b9 fb c6 b7 9c 35 1f 84 21 35 c1 1d 48 34 000040 45 7c f6 f1 57 63 1a 88 38 e8 81 f0 a9 63 ad 0e 000050 be 5d 3e 74 2e 4e cb 36 c2 01 a8 14 e1 38 e1 bb 000060 23 79 09 56 77 19 ff 98 e8 44 f3 27 eb 6e 0a cb 000070 c9 36 e3 2a 96 13 07 a0 90 3f 3b bd 1d 04 1d 67 000080 be 33 14 f8 02 c2 a4 02 ab 8b 5b 74 86 17 f0 5e 000090 a1 d7 aa ef a6 21 7b 93 d1 85 86 eb 4e 8c d0 4c 0000a0 56 ac e4 45 27 44 84 9f 71 db 36 b9 f7 47 d7 b3 0000b0 f2 9c 62 41 a3 46 2b 5b e3 80 63 a4 35 b5 3c f4 0000c0 bc 1e 3a ad e4 59 4a 98 6c e8 8d ff 1b 16 f8 52 0000d0 05 5c 2f 52 2a 0f 45 5b 51 fb 93 97 a4 49 4f 06 0000e0 f3 a0 d1 1e ba 3d ed a7 60 8f bb 84 2c 21 94 2d 0000f0 b3 66 a6 61 1e 58 30 24 85 f8 c8 18 c3 77 00 22 000100 000000 73 ca cc a1 d9 bb 21 8d c3 5c f3 ab 43 6d a7 a4 000010 4a fd c5 f4 9c ba 4a 0f b1 2e 19 15 4e 84 26 e0 000020 67 c9 f2 52 4d 65 1f 81 b7 8b 6d 2b 56 7b 99 75 000030 2e cd d0 db 08 0c 4b df f3 83 c6 83 00 2e 2b b8 000040 0f af 61 1d f2 02 35 74 b5 a4 6f 28 f3 a1 09 12 000050 f2 53 b5 d2 da 45 01 e5 12 d6 46 f8 0b db ed 51 000060 7b f4 0d 54 e0 63 ea 22 e2 1d d0 d6 d0 e7 7e e0 000070 93 91 fb 87 95 43 41 28 de 3d 8b a3 a8 8f c4 9e 000080 30 95 12 7a b2 27 28 ff 37 04 2e 09 7c dd 7c 12 000090 e1 50 60 fb 6d 5f a8 65 14 40 89 e3 4c d2 87 8f 0000a0 34 76 7e 66 7a 8e 6b a3 fc cf 38 52 2e f9 26 f0 0000b0 98 63 15 06 34 99 b2 88 4f aa d8 14 88 71 f1 81 0000c0 be 51 11 2b f4 7e a0 1e 12 b2 44 2e f6 8d 84 ea 0000d0 63 82 2b 66 b3 9a fd 08 73 5a c2 cc ab 5a af b1 0000e0 88 e3 a6 80 4b fc db ed 71 e0 ae c0 0a a4 8c 35 0000f0 eb 89 f9 8a 4b 52 59 6f 09 7c 01 3f 56 e7 c7 bf 000100 Signed-off-by: David Daney Acked-by: Herbert Xu Signed-off-by: Linus Torvalds --- drivers/char/hw_random/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6ce5ce8be2f2..87fba424817e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -92,7 +92,6 @@ static void add_early_randomness(struct hwrng *rng) mutex_unlock(&reading_mutex); if (bytes_read > 0) add_device_randomness(rng_buffer, bytes_read); - memset(rng_buffer, 0, size); } static inline void cleanup_rng(struct kref *kref) @@ -288,7 +287,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: - memset(rng_buffer, 0, rng_buffer_size()); return ret ? : err; out_unlock_reading: @@ -427,7 +425,6 @@ static int hwrng_fillfn(void *unused) /* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, rc * current_quality * 8 >> 10); - memset(rng_fillbuf, 0, rng_buffer_size()); } hwrng_fill = NULL; return 0; -- cgit From d966564fcdc19e13eb6ba1fbe6b8101070339c3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Feb 2017 18:08:29 -0800 Subject: Revert "x86/ioapic: Restore IO-APIC irq_chip retrigger callback" This reverts commit 020eb3daaba2857b32c4cf4c82f503d6a00a67de. Gabriel C reports that it causes his machine to not boot, and we haven't tracked down the reason for it yet. Since the bug it fixes has been around for a longish time, we're better off reverting the fix for now. Gabriel says: "It hangs early and freezes with a lot RCU warnings. I bisected it down to : > Ruslan Ruslichenko (1): > x86/ioapic: Restore IO-APIC irq_chip retrigger callback Reverting this one fixes the problem for me.. The box is a PRIMERGY TX200 S5 , 2 socket , 2 x E5520 CPU(s) installed" and Ruslan and Thomas are currently stumped. Reported-and-bisected-by: Gabriel C Cc: Ruslan Ruslichenko Cc: Thomas Gleixner Cc: stable@kernel.org # for the backport of the original commit Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/io_apic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52f352b063fd..bd6b8c270c24 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1875,7 +1875,6 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1887,7 +1886,6 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -- cgit From 90c1e3c2fafec57fcb55b5d69bcf293b1a5fc8b3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 6 Feb 2017 13:05:16 +1100 Subject: powerpc/mm/radix: Update ERAT flushes when invalidating TLB Three tiny changes to the ERAT flushing logic: First don't make it depend on DD1. It hasn't been decided yet but we might run DD2 in a mode that also requires explicit flushes for performance reasons so make it unconditional. We also add a missing isync, and finally remove the flush from _tlbiel_va as it is only necessary for congruence-class invalidations (PID, LPID and full TLB), not targetted invalidations. Fixes: 96ed1fe511a8 ("powerpc/mm/radix: Invalidate ERAT on tlbiel for POWER9 DD1") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 61b79119065f..952713d6cf04 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -50,9 +50,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { __tlbiel_pid(pid, set, ric); } - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); - return; + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } static inline void _tlbie_pid(unsigned long pid, unsigned long ric) @@ -85,8 +83,6 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, -- cgit From 9b256714979fad61ae11d90b53cf67dd5e6484eb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 7 Feb 2017 11:35:31 +1100 Subject: powerpc/powernv: Fix CPU hotplug to handle waking on HVI The IPIs come in as HVI not EE, so we need to test the appropriate SRR1 bits. The encoding is such that it won't have false positives on P7 and P8 so we can just test it like that. We also need to handle the icp-opal variant of the flush. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 3 ++- arch/powerpc/include/asm/xics.h | 1 + arch/powerpc/platforms/powernv/smp.c | 12 ++++++++++-- arch/powerpc/sysdev/xics/icp-opal.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0d4531aa2052..dff79798903d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -649,9 +649,10 @@ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ -#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */ +#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 and 9 */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ +#define SRR1_WAKEHVI 0x00240000 /* Hypervisor Virtualization Interrupt (P9) */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ #define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index f0b238516e9b..e0b9e576905a 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -44,6 +44,7 @@ static inline int icp_hv_init(void) { return -ENODEV; } #ifdef CONFIG_PPC_POWERNV extern int icp_opal_init(void); +extern void icp_opal_flush_interrupt(void); #else static inline int icp_opal_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index c789258ae1e1..eec0e8d0454d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -155,8 +155,10 @@ static void pnv_smp_cpu_kill_self(void) wmask = SRR1_WAKEMASK_P8; idle_states = pnv_get_supported_cpuidle_states(); + /* We don't want to take decrementer interrupts while we are offline, - * so clear LPCR:PECE1. We keep PECE2 enabled. + * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9) + * enabled as to let IPIs in. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); @@ -206,8 +208,12 @@ static void pnv_smp_cpu_kill_self(void) * contains 0. */ if (((srr1 & wmask) == SRR1_WAKEEE) || + ((srr1 & wmask) == SRR1_WAKEHVI) || (local_paca->irq_happened & PACA_IRQ_EE)) { - icp_native_flush_interrupt(); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + icp_opal_flush_interrupt(); + else + icp_native_flush_interrupt(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); @@ -221,6 +227,8 @@ static void pnv_smp_cpu_kill_self(void) if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline !\n", cpu); } + + /* Re-enable decrementer interrupts */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); DBG("CPU%d coming online...\n", cpu); } diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 60c57657c772..c96c0cb95d87 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -132,6 +132,35 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) return smp_ipi_demux(); } +/* + * Called when an interrupt is received on an off-line CPU to + * clear the interrupt, so that the CPU can go back to nap mode. + */ +void icp_opal_flush_interrupt(void) +{ + unsigned int xirr; + unsigned int vec; + + do { + xirr = icp_opal_get_xirr(); + vec = xirr & 0x00ffffff; + if (vec == XICS_IRQ_SPURIOUS) + break; + if (vec == XICS_IPI) { + /* Clear pending IPI */ + int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); + } else { + pr_err("XICS: hw interrupt 0x%x to offline cpu, " + "disabling\n", vec); + xics_mask_unknown_vec(vec); + } + + /* EOI the interrupt */ + } while (opal_int_eoi(xirr) > 0); +} + #endif /* CONFIG_SMP */ static const struct icp_ops icp_opal_ops = { -- cgit From f83e6862047e1e371bdc5d512dd6cabe8a3965b8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 7 Feb 2017 11:35:36 +1100 Subject: powerpc/powernv: Properly set "host-ipi" on IPIs Otherwise KVM will fail to pass them through to the host Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index c96c0cb95d87..f9670eabfcfa 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -120,14 +120,16 @@ static void icp_opal_cause_ipi(int cpu, unsigned long data) { int hw_cpu = get_hard_smp_processor_id(cpu); + kvmppc_set_host_ipi(cpu, 1); opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); } static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) { - int hw_cpu = hard_smp_processor_id(); + int cpu = smp_processor_id(); - opal_int_set_mfrr(hw_cpu, 0xff); + kvmppc_set_host_ipi(cpu, 0); + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); return smp_ipi_demux(); } -- cgit From af677166cf63c179dc2485053166e02c4aea01eb Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 9 Feb 2017 09:20:54 +0800 Subject: ALSA: hda - adding a new NV HDMI/DP codec ID in the driver Without this change, the HDMI/DP codec will be recognised as a generic codec, and there is no sound when playing through this codec. As suggested by NVidia side, after adding the new ID in the driver, the sound playing works well. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index cf9bc042fe96..3fc201c3b95a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3639,6 +3639,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de0080, "GPU 80 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0082, "GPU 82 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), -- cgit From f43128c75202f29ee71aa83e6c320a911137c189 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 27 Jan 2017 15:59:30 +0100 Subject: i2c: piix4: Fix request_region size Since '701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC")' we are using the SMBSLVCNT register at offset 0x8. We need to request it. Fixes: 701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC") Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index e34d82e79b98..73cc6799cc59 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -58,7 +58,7 @@ #define SMBSLVDAT (0xC + piix4_smba) /* count for request_region */ -#define SMBIOSIZE 8 +#define SMBIOSIZE 9 /* PCI Address Constants */ #define SMBBA 0x090 -- cgit From bbb27fc33d44e7b8d96369810654df4ee1837566 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 2 Feb 2017 20:15:16 +0100 Subject: i2c: piix4: Request the SMBUS semaphore inside the mutex SMBSLVCNT must be protected with the piix4_mutex_sb800 in order to avoid multiple buses accessing to the semaphore at the same time. Fixes: 701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC") Reported-by: Jean Delvare Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 73cc6799cc59..c21ca7bf2efe 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -592,6 +592,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 port; int retval; + mutex_lock(&piix4_mutex_sb800); + /* Request the SMBUS semaphore, avoid conflicts with the IMC */ smbslvcnt = inb_p(SMBSLVCNT); do { @@ -605,10 +607,10 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, usleep_range(1000, 2000); } while (--retries); /* SMBus is still owned by the IMC, we give up */ - if (!retries) + if (!retries) { + mutex_unlock(&piix4_mutex_sb800); return -EBUSY; - - mutex_lock(&piix4_mutex_sb800); + } outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); @@ -623,11 +625,11 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1); - mutex_unlock(&piix4_mutex_sb800); - /* Release the semaphore */ outb_p(smbslvcnt | 0x20, SMBSLVCNT); + mutex_unlock(&piix4_mutex_sb800); + return retval; } -- cgit From 8672aed7bd865774257efd40929702759a869329 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 8 Feb 2017 22:44:44 -0800 Subject: pstore: don't OOPS when there are no ftrace zones We'll OOPS in ramoops_get_next_prz() if the platform didn't ask for any ftrace zones (i.e., cxt->fprzs will be NULL). Let's just skip this entire FTRACE section if there's no 'fprzs'. Regression seen on a coreboot/depthcharge-based Chromebook. Fixes: 2fbea82bbb89 ("pstore: Merge per-CPU ftrace records into one") Cc: Joel Fernandes Cc: Kees Cook Signed-off-by: Brian Norris Signed-off-by: Kees Cook --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 27c059e1760a..1d887efaaf71 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -280,7 +280,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, 1, id, type, PSTORE_TYPE_PMSG, 0); /* ftrace is last since it may want to dynamically allocate memory. */ - if (!prz_ok(prz)) { + if (!prz_ok(prz) && cxt->fprzs) { if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { prz = ramoops_get_next_prz(cxt->fprzs, &cxt->ftrace_read_cnt, 1, id, type, -- cgit From 6d9f66ac7fec2a6ccd649e5909806dfe36f1fc25 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Feb 2017 19:05:26 -0800 Subject: net: phy: Fix PHY module checks and NULL deref in phy_attach_direct() The Generic PHY drivers gets assigned after we checked that the current PHY driver is NULL, so we need to check a few things before we can safely dereference d->driver. This would be causing a NULL deference to occur when a system binds to the Generic PHY driver. Update phy_attach_direct() to do the following: - grab the driver module reference after we have assigned the Generic PHY drivers accordingly, and remember we came from the generic PHY path - update the error path to clean up the module reference in case the Generic PHY probe function fails - split the error path involving phy_detacht() to avoid double free/put since phy_detach() does all the clean up - finally, have phy_detach() drop the module reference count before we call device_release_driver() for the Generic PHY driver case Fixes: cafe8df8b9bc ("net: phy: Fix lack of reference count on PHY driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0d8f4d3847f6..8c8e15b8739d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -908,6 +908,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, struct module *ndev_owner = dev->dev.parent->driver->owner; struct mii_bus *bus = phydev->mdio.bus; struct device *d = &phydev->mdio.dev; + bool using_genphy = false; int err; /* For Ethernet device drivers that register their own MDIO bus, we @@ -920,11 +921,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return -EIO; } - if (!try_module_get(d->driver->owner)) { - dev_err(&dev->dev, "failed to get the device driver module\n"); - return -EIO; - } - get_device(d); /* Assume that if there is no driver, that it doesn't @@ -938,12 +934,22 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, d->driver = &genphy_driver[GENPHY_DRV_1G].mdiodrv.driver; + using_genphy = true; + } + + if (!try_module_get(d->driver->owner)) { + dev_err(&dev->dev, "failed to get the device driver module\n"); + err = -EIO; + goto error_put_device; + } + + if (using_genphy) { err = d->driver->probe(d); if (err >= 0) err = device_bind_driver(d); if (err) - goto error; + goto error_module_put; } if (phydev->attached_dev) { @@ -980,9 +986,14 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return err; error: + /* phy_detach() does all of the cleanup below */ phy_detach(phydev); - put_device(d); + return err; + +error_module_put: module_put(d->driver->owner); +error_put_device: + put_device(d); if (ndev_owner != bus->owner) module_put(bus->owner); return err; @@ -1045,6 +1056,8 @@ void phy_detach(struct phy_device *phydev) phy_led_triggers_unregister(phydev); + module_put(phydev->mdio.dev.driver->owner); + /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device * from the generic driver so that there's a chance a @@ -1065,7 +1078,6 @@ void phy_detach(struct phy_device *phydev) bus = phydev->mdio.bus; put_device(&phydev->mdio.dev); - module_put(phydev->mdio.dev.driver->owner); if (ndev_owner != bus->owner) module_put(bus->owner); } -- cgit From 538d92912d3190a1dd809233a0d57277459f37b2 Mon Sep 17 00:00:00 2001 From: Vineeth Remanan Pillai Date: Tue, 7 Feb 2017 18:59:01 +0000 Subject: xen-netfront: Rework the fix for Rx stall during OOM and network stress The commit 90c311b0eeea ("xen-netfront: Fix Rx stall during network stress and OOM") caused the refill timer to be triggerred almost on all invocations of xennet_alloc_rx_buffers for certain workloads. This reworks the fix by reverting to the old behaviour and taking into consideration the skb allocation failure. Refill timer is now triggered on insufficient requests or skb allocation failure. Signed-off-by: Vineeth Remanan Pillai Fixes: 90c311b0eeea (xen-netfront: Fix Rx stall during network stress and OOM) Reported-by: Boris Ostrovsky Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8315fe73ecd0..9dba69731f30 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -281,6 +281,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) { RING_IDX req_prod = queue->rx.req_prod_pvt; int notify; + int err = 0; if (unlikely(!netif_carrier_ok(queue->info->netdev))) return; @@ -295,8 +296,10 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) struct xen_netif_rx_request *req; skb = xennet_alloc_one_rx_buffer(queue); - if (!skb) + if (!skb) { + err = -ENOMEM; break; + } id = xennet_rxidx(req_prod); @@ -320,8 +323,13 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx.req_prod_pvt = req_prod; - /* Not enough requests? Try again later. */ - if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) { + /* Try again later if there are not enough requests or skb allocation + * failed. + * Enough requests is quantified as the sum of newly created slots and + * the unconsumed slots at the backend. + */ + if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN || + unlikely(err)) { mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); return; } -- cgit From 98e3862ca2b1ae595a13805dcab4c3a6d7718f4d Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 7 Feb 2017 12:59:47 -0800 Subject: kcm: fix 0-length case for kcm_sendmsg() Dmitry reported a kernel warning: WARNING: CPU: 3 PID: 2936 at net/kcm/kcmsock.c:627 kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627 CPU: 3 PID: 2936 Comm: a.out Not tainted 4.10.0-rc6+ #209 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:539 warn_slowpath_null+0x2c/0x40 kernel/panic.c:582 kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627 kcm_sendmsg+0x163a/0x2200 net/kcm/kcmsock.c:1029 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x600 net/socket.c:848 new_sync_write fs/read_write.c:499 [inline] __vfs_write+0x483/0x740 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 [inline] SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 when calling syscall(__NR_write, sock2, 0x208aaf27ul, 0x0ul) on a KCM seqpacket socket. It appears that kcm_sendmsg() does not handle len==0 case correctly, which causes an empty skb is allocated and queued. Fix this by skipping the skb allocation for len==0 case. Reported-by: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 7e08a4d3d77d..64f0e8531af0 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -929,23 +929,25 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out_error; } - /* New message, alloc head skb */ - head = alloc_skb(0, sk->sk_allocation); - while (!head) { - kcm_push(kcm); - err = sk_stream_wait_memory(sk, &timeo); - if (err) - goto out_error; - + if (msg_data_left(msg)) { + /* New message, alloc head skb */ head = alloc_skb(0, sk->sk_allocation); - } + while (!head) { + kcm_push(kcm); + err = sk_stream_wait_memory(sk, &timeo); + if (err) + goto out_error; - skb = head; + head = alloc_skb(0, sk->sk_allocation); + } - /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling - * csum_and_copy_from_iter from skb_do_copy_data_nocache. - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; + skb = head; + + /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling + * csum_and_copy_from_iter from skb_do_copy_data_nocache. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } start: while (msg_data_left(msg)) { @@ -1018,10 +1020,12 @@ wait_for_memory: if (eor) { bool not_busy = skb_queue_empty(&sk->sk_write_queue); - /* Message complete, queue it on send buffer */ - __skb_queue_tail(&sk->sk_write_queue, head); - kcm->seq_skb = NULL; - KCM_STATS_INCR(kcm->stats.tx_msgs); + if (head) { + /* Message complete, queue it on send buffer */ + __skb_queue_tail(&sk->sk_write_queue, head); + kcm->seq_skb = NULL; + KCM_STATS_INCR(kcm->stats.tx_msgs); + } if (msg->msg_flags & MSG_BATCH) { kcm->tx_wait_more = true; -- cgit From 5a70348e1187c5bf1cbd0ec51843f36befed1c2d Mon Sep 17 00:00:00 2001 From: Stefan Brüns Date: Wed, 8 Feb 2017 02:46:32 +0100 Subject: sierra_net: Add support for IPv6 and Dual-Stack Link Sense Indications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a context is configured as dualstack ("IPv4v6"), the modem indicates the context activation with a slightly different indication message. The dual-stack indication omits the link_type (IPv4/v6) and adds additional address fields. IPv6 LSIs are identical to IPv4 LSIs, but have a different link type. Signed-off-by: Stefan Brüns Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/sierra_net.c | 101 ++++++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 12071f1582df..631b1ed86610 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -73,8 +73,6 @@ static atomic_t iface_counter = ATOMIC_INIT(0); /* Private data structure */ struct sierra_net_data { - u8 ethr_hdr_tmpl[ETH_HLEN]; /* ethernet header template for rx'd pkts */ - u16 link_up; /* air link up or down */ u8 tx_hdr_template[4]; /* part of HIP hdr for tx'd packets */ @@ -122,6 +120,7 @@ struct param { /* LSI Protocol types */ #define SIERRA_NET_PROTOCOL_UMTS 0x01 +#define SIERRA_NET_PROTOCOL_UMTS_DS 0x04 /* LSI Coverage */ #define SIERRA_NET_COVERAGE_NONE 0x00 #define SIERRA_NET_COVERAGE_NOPACKET 0x01 @@ -129,7 +128,8 @@ struct param { /* LSI Session */ #define SIERRA_NET_SESSION_IDLE 0x00 /* LSI Link types */ -#define SIERRA_NET_AS_LINK_TYPE_IPv4 0x00 +#define SIERRA_NET_AS_LINK_TYPE_IPV4 0x00 +#define SIERRA_NET_AS_LINK_TYPE_IPV6 0x02 struct lsi_umts { u8 protocol; @@ -137,9 +137,14 @@ struct lsi_umts { __be16 length; /* eventually use a union for the rest - assume umts for now */ u8 coverage; - u8 unused2[41]; + u8 network_len; /* network name len */ + u8 network[40]; /* network name (UCS2, bigendian) */ u8 session_state; u8 unused3[33]; +} __packed; + +struct lsi_umts_single { + struct lsi_umts lsi; u8 link_type; u8 pdp_addr_len; /* NW-supplied PDP address len */ u8 pdp_addr[16]; /* NW-supplied PDP address (bigendian)) */ @@ -158,10 +163,31 @@ struct lsi_umts { u8 reserved[8]; } __packed; +struct lsi_umts_dual { + struct lsi_umts lsi; + u8 pdp_addr4_len; /* NW-supplied PDP IPv4 address len */ + u8 pdp_addr4[4]; /* NW-supplied PDP IPv4 address (bigendian)) */ + u8 pdp_addr6_len; /* NW-supplied PDP IPv6 address len */ + u8 pdp_addr6[16]; /* NW-supplied PDP IPv6 address (bigendian)) */ + u8 unused4[23]; + u8 dns1_addr4_len; /* NW-supplied 1st DNS v4 address len (bigendian) */ + u8 dns1_addr4[4]; /* NW-supplied 1st DNS v4 address */ + u8 dns1_addr6_len; /* NW-supplied 1st DNS v6 address len */ + u8 dns1_addr6[16]; /* NW-supplied 1st DNS v6 address (bigendian)*/ + u8 dns2_addr4_len; /* NW-supplied 2nd DNS v4 address len (bigendian) */ + u8 dns2_addr4[4]; /* NW-supplied 2nd DNS v4 address */ + u8 dns2_addr6_len; /* NW-supplied 2nd DNS v6 address len */ + u8 dns2_addr6[16]; /* NW-supplied 2nd DNS v6 address (bigendian)*/ + u8 unused5[68]; +} __packed; + #define SIERRA_NET_LSI_COMMON_LEN 4 -#define SIERRA_NET_LSI_UMTS_LEN (sizeof(struct lsi_umts)) +#define SIERRA_NET_LSI_UMTS_LEN (sizeof(struct lsi_umts_single)) #define SIERRA_NET_LSI_UMTS_STATUS_LEN \ (SIERRA_NET_LSI_UMTS_LEN - SIERRA_NET_LSI_COMMON_LEN) +#define SIERRA_NET_LSI_UMTS_DS_LEN (sizeof(struct lsi_umts_dual)) +#define SIERRA_NET_LSI_UMTS_DS_STATUS_LEN \ + (SIERRA_NET_LSI_UMTS_DS_LEN - SIERRA_NET_LSI_COMMON_LEN) /* Forward definitions */ static void sierra_sync_timer(unsigned long syncdata); @@ -190,10 +216,11 @@ static inline void sierra_net_set_private(struct usbnet *dev, dev->data[0] = (unsigned long)priv; } -/* is packet IPv4 */ +/* is packet IPv4/IPv6 */ static inline int is_ip(struct sk_buff *skb) { - return skb->protocol == cpu_to_be16(ETH_P_IP); + return skb->protocol == cpu_to_be16(ETH_P_IP) || + skb->protocol == cpu_to_be16(ETH_P_IPV6); } /* @@ -349,38 +376,43 @@ static inline int sierra_net_is_valid_addrlen(u8 len) static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen) { struct lsi_umts *lsi = (struct lsi_umts *)data; + u32 expected_length; - if (datalen < sizeof(struct lsi_umts)) { - netdev_err(dev->net, "%s: Data length %d, exp %Zu\n", - __func__, datalen, - sizeof(struct lsi_umts)); - return -1; - } - - if (lsi->length != cpu_to_be16(SIERRA_NET_LSI_UMTS_STATUS_LEN)) { - netdev_err(dev->net, "%s: LSI_UMTS_STATUS_LEN %d, exp %u\n", - __func__, be16_to_cpu(lsi->length), - (u32)SIERRA_NET_LSI_UMTS_STATUS_LEN); + if (datalen < sizeof(struct lsi_umts_single)) { + netdev_err(dev->net, "%s: Data length %d, exp >= %Zu\n", + __func__, datalen, sizeof(struct lsi_umts_single)); return -1; } /* Validate the protocol - only support UMTS for now */ - if (lsi->protocol != SIERRA_NET_PROTOCOL_UMTS) { + if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS) { + struct lsi_umts_single *single = (struct lsi_umts_single *)lsi; + + /* Validate the link type */ + if (single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV4 && + single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV6) { + netdev_err(dev->net, "Link type unsupported: 0x%02x\n", + single->link_type); + return -1; + } + expected_length = SIERRA_NET_LSI_UMTS_STATUS_LEN; + } else if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS_DS) { + expected_length = SIERRA_NET_LSI_UMTS_DS_STATUS_LEN; + } else { netdev_err(dev->net, "Protocol unsupported, 0x%02x\n", - lsi->protocol); + lsi->protocol); return -1; } - /* Validate the link type */ - if (lsi->link_type != SIERRA_NET_AS_LINK_TYPE_IPv4) { - netdev_err(dev->net, "Link type unsupported: 0x%02x\n", - lsi->link_type); + if (be16_to_cpu(lsi->length) != expected_length) { + netdev_err(dev->net, "%s: LSI_UMTS_STATUS_LEN %d, exp %u\n", + __func__, be16_to_cpu(lsi->length), expected_length); return -1; } /* Validate the coverage */ - if (lsi->coverage == SIERRA_NET_COVERAGE_NONE - || lsi->coverage == SIERRA_NET_COVERAGE_NOPACKET) { + if (lsi->coverage == SIERRA_NET_COVERAGE_NONE || + lsi->coverage == SIERRA_NET_COVERAGE_NOPACKET) { netdev_err(dev->net, "No coverage, 0x%02x\n", lsi->coverage); return 0; } @@ -652,7 +684,6 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) u8 numendpoints; u16 fwattr = 0; int status; - struct ethhdr *eth; struct sierra_net_data *priv; static const u8 sync_tmplate[sizeof(priv->sync_msg)] = { 0x00, 0x00, SIERRA_NET_HIP_MSYNC_ID, 0x00}; @@ -690,11 +721,6 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->dev_addr[ETH_ALEN-2] = atomic_inc_return(&iface_counter); dev->net->dev_addr[ETH_ALEN-1] = ifacenum; - /* we will have to manufacture ethernet headers, prepare template */ - eth = (struct ethhdr *)priv->ethr_hdr_tmpl; - memcpy(ð->h_dest, dev->net->dev_addr, ETH_ALEN); - eth->h_proto = cpu_to_be16(ETH_P_IP); - /* prepare shutdown message template */ memcpy(priv->shdwn_msg, shdwn_tmplate, sizeof(priv->shdwn_msg)); /* set context index initially to 0 - prepares tx hdr template */ @@ -824,9 +850,14 @@ static int sierra_net_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skb_pull(skb, hh.hdrlen); - /* We are going to accept this packet, prepare it */ - memcpy(skb->data, sierra_net_get_private(dev)->ethr_hdr_tmpl, - ETH_HLEN); + /* We are going to accept this packet, prepare it. + * In case protocol is IPv6, keep it, otherwise force IPv4. + */ + skb_reset_mac_header(skb); + if (eth_hdr(skb)->h_proto != cpu_to_be16(ETH_P_IPV6)) + eth_hdr(skb)->h_proto = cpu_to_be16(ETH_P_IP); + eth_zero_addr(eth_hdr(skb)->h_source); + memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); /* Last packet in batch handled by usbnet */ if (hh.payload_len.word == skb->len) -- cgit From 764895d3039e903dac3a70f219949efe43d036a0 Mon Sep 17 00:00:00 2001 From: Stefan Brüns Date: Wed, 8 Feb 2017 02:46:33 +0100 Subject: sierra_net: Skip validating irrelevant fields for IDLE LSIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the context is deactivated, the link_type is set to 0xff, which triggers a warning message, and results in a wrong link status, as the LSI is ignored. Signed-off-by: Stefan Brüns Signed-off-by: David S. Miller --- drivers/net/usb/sierra_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 631b1ed86610..d9440bc022f2 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -384,6 +384,13 @@ static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen) return -1; } + /* Validate the session state */ + if (lsi->session_state == SIERRA_NET_SESSION_IDLE) { + netdev_err(dev->net, "Session idle, 0x%02x\n", + lsi->session_state); + return 0; + } + /* Validate the protocol - only support UMTS for now */ if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS) { struct lsi_umts_single *single = (struct lsi_umts_single *)lsi; @@ -417,13 +424,6 @@ static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen) return 0; } - /* Validate the session state */ - if (lsi->session_state == SIERRA_NET_SESSION_IDLE) { - netdev_err(dev->net, "Session idle, 0x%02x\n", - lsi->session_state); - return 0; - } - /* Set link_sense true */ return 1; } -- cgit From e2e004acc7cbe3c531e752a270a74e95cde3ea48 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 8 Feb 2017 10:57:37 +0000 Subject: xen-netfront: Improve error handling during initialization This fixes a crash when running out of grant refs when creating many queues across many netdevs. * If creating queues fails (i.e. there are no grant refs available), call xenbus_dev_fatal() to ensure that the xenbus device is set to the closed state. * If no queues are created, don't call xennet_disconnect_backend as netdev->real_num_tx_queues will not have been set correctly. * If setup_netfront() fails, ensure that all the queues created are cleaned up, not just those that have been set up. * If any queues were set up and an error occurs, call xennet_destroy_queues() to clean up the napi context. * If any fatal error occurs, unregister and destroy the netdev to avoid leaving around a half setup network device. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 9dba69731f30..d3812581c6c0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1830,27 +1830,19 @@ static int talk_to_netback(struct xenbus_device *dev, xennet_destroy_queues(info); err = xennet_create_queues(info, &num_queues); - if (err < 0) - goto destroy_ring; + if (err < 0) { + xenbus_dev_fatal(dev, err, "creating queues"); + kfree(info->queues); + info->queues = NULL; + goto out; + } /* Create shared ring, alloc event channel -- for each queue */ for (i = 0; i < num_queues; ++i) { queue = &info->queues[i]; err = setup_netfront(dev, queue, feature_split_evtchn); - if (err) { - /* setup_netfront() will tidy up the current - * queue on error, but we need to clean up - * those already allocated. - */ - if (i > 0) { - rtnl_lock(); - netif_set_real_num_tx_queues(info->netdev, i); - rtnl_unlock(); - goto destroy_ring; - } else { - goto out; - } - } + if (err) + goto destroy_ring; } again: @@ -1940,9 +1932,10 @@ abort_transaction_no_dev_fatal: xenbus_transaction_end(xbt, 1); destroy_ring: xennet_disconnect_backend(info); - kfree(info->queues); - info->queues = NULL; + xennet_destroy_queues(info); out: + unregister_netdev(info->netdev); + xennet_free_netdev(info->netdev); return err; } -- cgit From 9c8bb163ae784be4f79ae504e78c862806087c54 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Feb 2017 21:16:45 +0800 Subject: igmp, mld: Fix memory leak in igmpv3/mld_del_delrec() In function igmpv3/mld_add_delrec() we allocate pmc and put it in idev->mc_tomb, so we should free it when we don't need it in del_delrec(). But I removed kfree(pmc) incorrectly in latest two patches. Now fix it. Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info when ...") Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when ...") Reported-by: Daniel Borkmann Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 1 + net/ipv6/mcast.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5b15459955f8..44fd86de2823 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); + kfree(pmc); } spin_unlock_bh(&im->lock); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7139fffd61b6..1bdc703cb966 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -779,6 +779,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); + kfree(pmc); } spin_unlock_bh(&im->mca_lock); } -- cgit From bb1a619735b4660f21bce3e728b937640024b4ad Mon Sep 17 00:00:00 2001 From: Yendapally Reddy Dhananjaya Reddy Date: Wed, 8 Feb 2017 17:14:26 -0500 Subject: net: phy: Initialize mdio clock at probe function USB PHYs need the MDIO clock divisor enabled earlier to work. Initialize mdio clock divisor in probe function. The ext bus bit available in the same register will be used by mdio mux to enable external mdio. Signed-off-by: Yendapally Reddy Dhananjaya Reddy Fixes: ddc24ae1 ("net: phy: Broadcom iProc MDIO bus driver") Reviewed-by: Florian Fainelli Signed-off-by: Jon Mason Signed-off-by: David S. Miller --- drivers/net/phy/mdio-bcm-iproc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index c0b4e65267af..46fe1ae919a3 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -81,8 +81,6 @@ static int iproc_mdio_read(struct mii_bus *bus, int phy_id, int reg) if (rc) return rc; - iproc_mdio_config_clk(priv->base); - /* Prepare the read operation */ cmd = (MII_DATA_TA_VAL << MII_DATA_TA_SHIFT) | (reg << MII_DATA_RA_SHIFT) | @@ -112,8 +110,6 @@ static int iproc_mdio_write(struct mii_bus *bus, int phy_id, if (rc) return rc; - iproc_mdio_config_clk(priv->base); - /* Prepare the write operation */ cmd = (MII_DATA_TA_VAL << MII_DATA_TA_SHIFT) | (reg << MII_DATA_RA_SHIFT) | @@ -163,6 +159,8 @@ static int iproc_mdio_probe(struct platform_device *pdev) bus->read = iproc_mdio_read; bus->write = iproc_mdio_write; + iproc_mdio_config_clk(priv->base); + rc = of_mdiobus_register(bus, pdev->dev.of_node); if (rc) { dev_err(&pdev->dev, "MDIO bus registration failed\n"); -- cgit From ffdadd68af5a397b8a52289ab39d62e1acb39e63 Mon Sep 17 00:00:00 2001 From: ojab Date: Wed, 28 Dec 2016 11:05:24 +0000 Subject: scsi: mpt3sas: disable ASPM for MPI2 controllers MPI2 controllers sometimes got lost (i.e. disappear from /sys/bus/pci/devices) if ASMP is enabled. Signed-off-by: Slava Kardakov Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60644 Cc: Acked-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 268103182d34..0b5b423b1db0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -8761,6 +8762,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (hba_mpi_version) { case MPI2_VERSION: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM); /* Use mpt2sas driver host template for SAS 2.0 HBA's */ shost = scsi_host_alloc(&mpt2sas_driver_template, sizeof(struct MPT3SAS_ADAPTER)); -- cgit From 8af8e1c22f9994bb1849c01d66c24fe23f9bc9a0 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Thu, 9 Feb 2017 11:04:47 -0700 Subject: scsi: aacraid: Fix INTx/MSI-x issue with older controllers commit 78cbccd3bd68 ("aacraid: Fix for KDUMP driver hang") caused a problem on older controllers which do not support MSI-x (namely ASR3405,ASR3805). This patch conditionalizes the previous patch to controllers which support MSI-x Cc: # v4.7+ Fixes: 78cbccd3bd68 ("aacraid: Fix for KDUMP driver hang") Reported-by: Arkadiusz Miskiewicz Signed-off-by: Dave Carroll Reviewed-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 4f56b1003cc7..5b48bedd7c38 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -50,9 +50,13 @@ struct aac_common aac_config = { static inline int aac_is_msix_mode(struct aac_dev *dev) { - u32 status; + u32 status = 0; - status = src_readl(dev, MUnit.OMR); + if (dev->pdev->device == PMC_DEVICE_S6 || + dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8) { + status = src_readl(dev, MUnit.OMR); + } return (status & AAC_INT_MODE_MSIX); } -- cgit From 2dfa6688aafdc3f74efeb1cf05fb871465d67f79 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 8 Feb 2017 15:34:22 +0100 Subject: scsi: zfcp: fix use-after-free by not tracing WKA port open/close on failed send Dan Carpenter kindly reported: The patch d27a7cb91960: "zfcp: trace on request for open and close of WKA port" from Aug 10, 2016, leads to the following static checker warning: drivers/s390/scsi/zfcp_fsf.c:1615 zfcp_fsf_open_wka_port() warn: 'req' was already freed. drivers/s390/scsi/zfcp_fsf.c 1609 zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); 1610 retval = zfcp_fsf_req_send(req); 1611 if (retval) 1612 zfcp_fsf_req_free(req); ^^^ Freed. 1613 out: 1614 spin_unlock_irq(&qdio->req_q_lock); 1615 if (req && !IS_ERR(req)) 1616 zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); ^^^^^^^^^^^ Use after free. 1617 return retval; 1618 } Same thing for zfcp_fsf_close_wka_port() as well. Rather than relying on req being NULL (or ERR_PTR) for all cases where we don't want to trace or should not trace, simply check retval which is unconditionally initialized with -EIO != 0 and it can only become 0 on successful retval = zfcp_fsf_req_send(req). With that we can also remove the then again unnecessary unconditional initialization of req which was introduced with that earlier commit. Reported-by: Dan Carpenter Suggested-by: Benjamin Block Signed-off-by: Steffen Maier Fixes: d27a7cb91960 ("zfcp: trace on request for open and close of WKA port") Cc: #2.6.38+ Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 75f820ca17b7..27ff38f839fc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1583,7 +1583,7 @@ out: int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1612,7 +1612,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); return retval; } @@ -1638,7 +1638,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req) int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1667,7 +1667,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id); return retval; } -- cgit From 0839ffb83e44e5ff1843e932592525fc2bff23ff Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 9 Feb 2017 14:20:42 -0500 Subject: nfsd: Revert "nfsd: special case truncates some more" This patch incorrectly attempted nested mnt_want_write, and incorrectly disabled nfsd's owner override for truncate. We'll fix those problems and make another attempt soon, for the moment I think the safest is to revert. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 97 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 37 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ca13236dbb1f..26c6fdb4bf67 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -332,6 +332,37 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) } } +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + int host_err; + + if (iap->ia_size < inode->i_size) { + __be32 err; + + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } + + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + /* * Set various file attributes. After this call fhp needs an fh_put. */ @@ -346,6 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; + int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -358,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - return err; + goto out; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - goto out_host_err; + return nfserrno(host_err); } dentry = fhp->fh_dentry; @@ -373,59 +405,50 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - return 0; + goto out; nfsd_sanitize_attrs(inode, iap); - if (check_guard && guardtime != inode->i_ctime.tv_sec) - return nfserr_notsync; - /* * The size case is special, it changes the file in addition to the - * attributes, and file systems don't expect it to be mixed with - * "random" attribute changes. We thus split out the size change - * into a separate call for vfs_truncate, and do the rest as a - * a separate setattr call. + * attributes. */ if (iap->ia_valid & ATTR_SIZE) { - struct path path = { - .mnt = fhp->fh_export->ex_path.mnt, - .dentry = dentry, - }; - bool implicit_mtime = false; + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; /* - * vfs_truncate implicity updates the mtime IFF the file size - * actually changes. Avoid the additional seattr call below if - * the only other attribute that the client sends is the mtime. + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) */ - if (iap->ia_size != i_size_read(inode) && - ((iap->ia_valid & ~(ATTR_SIZE | ATTR_MTIME)) == 0)) - implicit_mtime = true; - - host_err = vfs_truncate(&path, iap->ia_size); - if (host_err) - goto out_host_err; - - iap->ia_valid &= ~ATTR_SIZE; - if (implicit_mtime) - iap->ia_valid &= ~ATTR_MTIME; - if (!iap->ia_valid) - goto done; + if (iap->ia_size != i_size_read(inode)) + iap->ia_valid |= ATTR_MTIME; } iap->ia_valid |= ATTR_CTIME; + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); - if (host_err) - goto out_host_err; + err = nfserrno(host_err); -done: - host_err = commit_metadata(fhp); -out_host_err: - return nfserrno(host_err); +out_put_write_access: + if (size_change) + put_write_access(inode); + if (!err) + err = nfserrno(commit_metadata(fhp)); +out: + return err; } #if defined(CONFIG_NFSD_V4) -- cgit From 451d24d1e5f40bad000fa9abe36ddb16fc9928cb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 Jan 2017 11:27:10 +0100 Subject: perf/core: Fix crash in perf_event_read() Alexei had his box explode because doing read() on a package (rapl/uncore) event that isn't currently scheduled in ends up doing an out-of-bounds load. Rework the code to more explicitly deal with event->oncpu being -1. Reported-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Tested-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Fixes: d6a2f9035bfc ("perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG") Link: http://lkml.kernel.org/r/20170131102710.GL6515@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e5aaa806702d..e235bb991bdd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3487,14 +3487,15 @@ struct perf_read_data { int ret; }; -static int find_cpu_to_read(struct perf_event *event, int local_cpu) +static int __perf_event_read_cpu(struct perf_event *event, int event_cpu) { - int event_cpu = event->oncpu; u16 local_pkg, event_pkg; if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) { - event_pkg = topology_physical_package_id(event_cpu); - local_pkg = topology_physical_package_id(local_cpu); + int local_cpu = smp_processor_id(); + + event_pkg = topology_physical_package_id(event_cpu); + local_pkg = topology_physical_package_id(local_cpu); if (event_pkg == local_pkg) return local_cpu; @@ -3624,7 +3625,7 @@ u64 perf_event_read_local(struct perf_event *event) static int perf_event_read(struct perf_event *event, bool group) { - int ret = 0, cpu_to_read, local_cpu; + int event_cpu, ret = 0; /* * If event is enabled and currently active on a CPU, update the @@ -3637,21 +3638,25 @@ static int perf_event_read(struct perf_event *event, bool group) .ret = 0, }; - local_cpu = get_cpu(); - cpu_to_read = find_cpu_to_read(event, local_cpu); - put_cpu(); + event_cpu = READ_ONCE(event->oncpu); + if ((unsigned)event_cpu >= nr_cpu_ids) + return 0; + + preempt_disable(); + event_cpu = __perf_event_read_cpu(event, event_cpu); /* * Purposely ignore the smp_call_function_single() return * value. * - * If event->oncpu isn't a valid CPU it means the event got + * If event_cpu isn't a valid CPU it means the event got * scheduled out and that will have updated the event count. * * Therefore, either way, we'll have an up-to-date event count * after this. */ - (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1); + (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1); + preempt_enable(); ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; -- cgit From 7bdb59f1ad474bd7161adc8f923cdef10f2638d1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Feb 2017 17:44:54 +0100 Subject: tick/nohz: Fix possible missing clock reprog after tick soft restart ts->next_tick keeps track of the next tick deadline in order to optimize clock programmation on irq exit and avoid redundant clock device writes. Now if ts->next_tick missed an update, we may spuriously miss a clock reprog later as the nohz code is fooled by an obsolete next_tick value. This is what happens here on a specific path: when we observe an expired timer from the nohz update code on irq exit, we perform a soft tick restart which simply fires the closest possible tick without actually exiting the nohz mode and restoring a periodic state. But we forget to update ts->next_tick accordingly. As a result, after the next tick resulting from such soft tick restart, the nohz code sees a stale value on ts->next_tick which doesn't match the clock deadline that just expired. If that obsolete ts->next_tick value happens to collide with the actual next tick deadline to be scheduled, we may spuriously bypass the clock reprogramming. In the worst case, the tick may never fire again. Fix this with a ts->next_tick reset on soft tick restart. Signed-off-by: Frederic Weisbecker Reviewed: Wanpeng Li Acked-by: Rik van Riel Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1486485894-29173-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 74e0388cc88d..fc6f740d0277 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -725,6 +725,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, */ if (delta == 0) { tick_nohz_restart(ts, now); + /* + * Make sure next tick stop doesn't get fooled by past + * clock deadline + */ + ts->next_tick = 0; goto out; } } -- cgit From f2e04214ef7f7e49d1e06109ad1b2718155dab25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Feb 2017 16:08:41 +0100 Subject: x86/tsc: Avoid the large time jump when sanitizing TSC ADJUST Olof reported that on a machine which has a BIOS wreckaged TSC the timestamps in dmesg are making a large jump because the TSC value is jumping forward after resetting the TSC ADJUST register to a sane value. This can be avoided by calling the TSC ADJUST saniziting function before initializing the per cpu sched clock machinery. That takes the offset into account and avoid the time jump. What cannot be avoided is that the 'Firmware Bug' warnings on the secondary CPUs are printed with the large time offsets because it would be too much effort and ugly hackery to print those warnings into a buffer and emit them after the adjustemt on the starting CPUs. It's a firmware bug and should be fixed in firmware. The weird timestamps are collateral damage and just illustrate the sillyness of the BIOS folks: [ 0.397445] smp: Bringing up secondary CPUs ... [ 0.402100] x86: Booting SMP configuration: [ 0.406343] .... node #0, CPUs: #1 [1265776479.930667] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU1: -2978888639183101 [1265776479.944664] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2978888639183101 [ 0.508119] #2 [1265776480.032346] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU2: -2978888639183677 [1265776480.044192] TSC ADJUST synchronize: Reference CPU0: 0 CPU2: -2978888639183677 [ 0.607643] #3 [1265776480.131874] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU3: -2978888639184530 [1265776480.143720] TSC ADJUST synchronize: Reference CPU0: 0 CPU3: -2978888639184530 [ 0.707108] smp: Brought up 1 node, 4 CPUs [ 0.711271] smpboot: Total of 4 processors activated (21698.88 BogoMIPS) Reported-by: Olof Johansson Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170209151231.411460506@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e41af597aed8..37e7cf544e51 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1356,6 +1356,9 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same @@ -1386,8 +1389,6 @@ void __init tsc_init(void) if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); - else - tsc_store_and_check_tsc_adjust(true); check_system_tsc_reliable(); -- cgit From 5f2e71e71410ecb858cfec184ba092adaca61626 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Feb 2017 16:08:42 +0100 Subject: x86/tsc: Make the TSC ADJUST sanitizing work for tsc_reliable When the TSC is marked reliable then the synchronization check is skipped, but that also skips the TSC ADJUST sanitizing code. So on a machine with a wreckaged BIOS the TSC deviation between CPUs might go unnoticed. Let the TSC adjust sanitizing code run unconditionally and just skip the expensive synchronization checks when TSC is marked reliable. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Olof Johansson Link: http://lkml.kernel.org/r/20170209151231.491189912@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_sync.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index d0db011051a5..728f75378475 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu) if (unsynchronized_tsc()) return; - if (tsc_clocksource_reliable) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR @@ -380,14 +373,19 @@ void check_tsc_sync_target(void) int cpus = 2; /* Also aborts if there is no TSC. */ - if (unsynchronized_tsc() || tsc_clocksource_reliable) + if (unsynchronized_tsc()) return; /* * Store, verify and sanitize the TSC adjust register. If * successful skip the test. + * + * The test is also skipped when the TSC is marked reliable. This + * is true for SoCs which have no fallback clocksource. On these + * SoCs the TSC is frequency synchronized, but still the TSC ADJUST + * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false)) { + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { atomic_inc(&skip_test); return; } -- cgit From 146fbb766934dc003fcbf755b519acef683576bf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 10 Feb 2017 12:54:05 +0300 Subject: x86/mm/ptdump: Fix soft lockup in page table walker CONFIG_KASAN=y needs a lot of virtual memory mapped for its shadow. In that case ptdump_walk_pgd_level_core() takes a lot of time to walk across all page tables and doing this without a rescheduling causes soft lockups: NMI watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [swapper/0:1] ... Call Trace: ptdump_walk_pgd_level_core+0x40c/0x550 ptdump_walk_pgd_level_checkwx+0x17/0x20 mark_rodata_ro+0x13b/0x150 kernel_init+0x2f/0x120 ret_from_fork+0x2c/0x40 I guess that this issue might arise even without KASAN on huge machines with several terabytes of RAM. Stick cond_resched() in pgd loop to fix this. Reported-by: Tobias Regnery Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: "Paul E . McKenney" Cc: Dmitry Vyukov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170210095405.31802-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/dump_pagetables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ea9c49adaa1f..8aa6bea1cd6c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, } else note_page(m, &st, __pgprot(0), 1); + cond_resched(); start++; } -- cgit From b85ea006b6bebb692628f11882af41c3e12e1e09 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Thu, 9 Feb 2017 11:46:15 +0000 Subject: net: hns: Fix the device being used for dma mapping during TX This patch fixes the device being used to DMA map skb->data. Erroneous device assignment causes the crash when SMMU is enabled. This happens during TX since buffer gets DMA mapped with device correspondign to net_device and gets unmapped using the device related to DSAF. Signed-off-by: Kejian Yan Reviewed-by: Yisen Zhuang Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 672b64606321..8aed72860e7c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -305,8 +305,8 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, struct hns_nic_ring_data *ring_data) { struct hns_nic_priv *priv = netdev_priv(ndev); - struct device *dev = priv->dev; struct hnae_ring *ring = ring_data->ring; + struct device *dev = ring_to_dev(ring); struct netdev_queue *dev_queue; struct skb_frag_struct *frag; int buf_num; -- cgit From 7ba1b689038726d34e3244c1ac9e2e18c2ea4787 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Feb 2017 14:12:11 +0100 Subject: NET: mkiss: Fix panic If a USB-to-serial adapter is unplugged, the driver re-initializes, with dev->hard_header_len and dev->addr_len set to zero, instead of the correct values. If then a packet is sent through the half-dead interface, the kernel will panic due to running out of headroom in the skb when pushing for the AX.25 headers resulting in this panic: [] (skb_panic) from [] (skb_push+0x4c/0x50) [] (skb_push) from [] (ax25_hard_header+0x34/0xf4 [ax25]) [] (ax25_hard_header [ax25]) from [] (ax_header+0x38/0x40 [mkiss]) [] (ax_header [mkiss]) from [] (neigh_compat_output+0x8c/0xd8) [] (neigh_compat_output) from [] (ip_finish_output+0x2a0/0x914) [] (ip_finish_output) from [] (ip_output+0xd8/0xf0) [] (ip_output) from [] (ip_local_out_sk+0x44/0x48) This patch makes mkiss behave like the 6pack driver. 6pack does not panic. In 6pack.c sp_setup() (same function name here) the values for dev->hard_header_len and dev->addr_len are set to the same values as in my mkiss patch. [ralf@linux-mips.org: Massages original submission to conform to the usual standards for patch submissions.] Signed-off-by: Thomas Osterried Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index ece59c54a653..4a40a3d825b4 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -648,8 +648,8 @@ static void ax_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->mtu = AX_MTU; - dev->hard_header_len = 0; - dev->addr_len = 0; + dev->hard_header_len = AX25_MAX_HEADER_LEN; + dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_AX25; dev->tx_queue_len = 10; dev->header_ops = &ax25_header_ops; -- cgit From 74470954857c264168d2b5a113904cf0cfd27d18 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 30 Jan 2017 12:45:46 -0500 Subject: xen-netfront: Delete rx_refill_timer in xennet_disconnect_backend() rx_refill_timer should be deleted as soon as we disconnect from the backend since otherwise it is possible for the timer to go off before we get to xennet_destroy_queues(). If this happens we may dereference queue->rx.sring which is set to NULL in xennet_disconnect_backend(). Signed-off-by: Boris Ostrovsky CC: stable@vger.kernel.org Reviewed-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index d3812581c6c0..1e4125a98291 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1387,6 +1387,8 @@ static void xennet_disconnect_backend(struct netfront_info *info) for (i = 0; i < num_queues && info->queues; ++i) { struct netfront_queue *queue = &info->queues[i]; + del_timer_sync(&queue->rx_refill_timer); + if (queue->tx_irq && (queue->tx_irq == queue->rx_irq)) unbind_from_irqhandler(queue->tx_irq, queue); if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) { @@ -1741,7 +1743,6 @@ static void xennet_destroy_queues(struct netfront_info *info) if (netif_running(info->netdev)) napi_disable(&queue->napi); - del_timer_sync(&queue->rx_refill_timer); netif_napi_del(&queue->napi); } -- cgit From 72fb96e7bdbbdd4421b0726992496531060f3636 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Feb 2017 16:15:52 -0800 Subject: l2tp: do not use udp_ioctl() udp_ioctl(), as its name suggests, is used by UDP protocols, but is also used by L2TP :( L2TP should use its own handler, because it really does not look the same. SIOCINQ for instance should not assume UDP checksum or headers. Thanks to Andrey and syzkaller team for providing the report and a nice reproducer. While crashes only happen on recent kernels (after commit 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")), this probably needs to be backported to older kernels. Fixes: 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue") Fixes: 85584672012e ("udp: Fix udp_poll() and ioctl()") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_ip.c | 27 ++++++++++++++++++++++++++- net/l2tp/l2tp_ip6.c | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8f560f7140a0..aebf281d09ee 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3d73278b86ca..28c21546d5b6 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -553,6 +554,30 @@ out: return err ? err : copied; } +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int amount; + + switch (cmd) { + case SIOCOUTQ: + amount = sk_wmem_alloc_get(sk); + break; + case SIOCINQ: + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + spin_unlock_bh(&sk->sk_receive_queue.lock); + break; + + default: + return -ENOIOCTLCMD; + } + + return put_user(amount, (int __user *)arg); +} +EXPORT_SYMBOL(l2tp_ioctl); + static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, @@ -561,7 +586,7 @@ static struct proto l2tp_ip_prot = { .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 331ccf5a7bad..f47c45250f86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -722,7 +722,7 @@ static struct proto l2tp_ip6_prot = { .bind = l2tp_ip6_bind, .connect = l2tp_ip6_connect, .disconnect = l2tp_ip6_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip6_destroy_sock, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, -- cgit From 6e78b3f7a193546b1c00a6d084596e774f147169 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 10 Feb 2017 15:03:35 -0800 Subject: Btrfs: fix btrfs_decompress_buf2page() If btrfs_decompress_buf2page() is handed a bio with its page in the middle of the working buffer, then we adjust the offset into the working buffer. After we copy into the bio, we advance the iterator by the number of bytes we copied. Then, we have some logic to handle the case of discontiguous pages and adjust the offset into the working buffer again. However, if we didn't advance the bio to a new page, we may enter this case in error, essentially repeating the adjustment that we already made when we entered the function. The end result is bogus data in the bio. Previously, we only checked for this case when we advanced to a new page, but the conversion to bio iterators changed that. This restores the old, correct behavior. A case I saw when testing with zlib was: buf_start = 42769 total_out = 46865 working_bytes = total_out - buf_start = 4096 start_byte = 45056 The condition (total_out > start_byte && buf_start < start_byte) is true, so we adjust the offset: buf_offset = start_byte - buf_start = 2287 working_bytes -= buf_offset = 1809 current_buf_start = buf_start = 42769 Then, we copy bytes = min(bvec.bv_len, PAGE_SIZE - buf_offset, working_bytes) = 1809 buf_offset += bytes = 4096 working_bytes -= bytes = 0 current_buf_start += bytes = 44578 After bio_advance(), we are still in the same page, so start_byte is the same. Then, we check (total_out > start_byte && current_buf_start < start_byte), which is true! So, we adjust the values again: buf_offset = start_byte - buf_start = 2287 working_bytes = total_out - start_byte = 1809 current_buf_start = buf_start + buf_offset = 45056 But note that working_bytes was already zero before this, so we should have stopped copying. Fixes: 974b1adc3b10 ("btrfs: use bio iterators for the decompression handlers") Reported-by: Pat Erley Reviewed-by: Chris Mason Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason Reviewed-by: Liu Bo Tested-by: Liu Bo --- fs/btrfs/compression.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7f390849343b..c4444d6f439f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1024,6 +1024,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long buf_offset; unsigned long current_buf_start; unsigned long start_byte; + unsigned long prev_start_byte; unsigned long working_bytes = total_out - buf_start; unsigned long bytes; char *kaddr; @@ -1071,26 +1072,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, if (!bio->bi_iter.bi_size) return 0; bvec = bio_iter_iovec(bio, bio->bi_iter); - + prev_start_byte = start_byte; start_byte = page_offset(bvec.bv_page) - disk_start; /* - * make sure our new page is covered by this - * working buffer + * We need to make sure we're only adjusting + * our offset into compression working buffer when + * we're switching pages. Otherwise we can incorrectly + * keep copying when we were actually done. */ - if (total_out <= start_byte) - return 1; + if (start_byte != prev_start_byte) { + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; - /* - * the next page in the biovec might not be adjacent - * to the last page, but it might still be found - * inside this working buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + buf_offset; + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; + } } } -- cgit From db5d0b597bc27bbddf40f2f8359a73be4eb77104 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 10 Feb 2017 13:45:05 -0500 Subject: ibmvnic: Initialize completion variables before starting work Initialize condition variables prior to invoking any work that can mark them complete. This resolves a race in the ibmvnic driver where the driver faults trying to complete an uninitialized condition variable. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c12596676bbb..c7150343342d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -189,9 +189,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, } ltb->map_id = adapter->map_id; adapter->map_id++; + + init_completion(&adapter->fw_done); send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); - init_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done); return 0; } @@ -1121,10 +1122,10 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); crq.request_statistics.len = cpu_to_be32(sizeof(struct ibmvnic_statistics)); - ibmvnic_send_crq(adapter, &crq); /* Wait for data to be written */ init_completion(&adapter->stats_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->stats_done); for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) @@ -2799,9 +2800,9 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator; crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok); crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size; - ibmvnic_send_crq(adapter, &crq); init_completion(&adapter->fw_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->fw_done); if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size)) @@ -3581,9 +3582,9 @@ static int ibmvnic_dump_show(struct seq_file *seq, void *v) memset(&crq, 0, sizeof(crq)); crq.request_dump_size.first = IBMVNIC_CRQ_CMD; crq.request_dump_size.cmd = REQUEST_DUMP_SIZE; - ibmvnic_send_crq(adapter, &crq); init_completion(&adapter->fw_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->fw_done); seq_write(seq, adapter->dump_data, adapter->dump_data_size); @@ -3629,8 +3630,8 @@ static void handle_crq_init_rsp(struct work_struct *work) } } - send_version_xchg(adapter); reinit_completion(&adapter->init_done); + send_version_xchg(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Passive init timeout\n"); goto task_failed; @@ -3640,9 +3641,9 @@ static void handle_crq_init_rsp(struct work_struct *work) if (adapter->renegotiate) { adapter->renegotiate = false; release_sub_crqs_no_irqs(adapter); - send_cap_queries(adapter); reinit_completion(&adapter->init_done); + send_cap_queries(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Passive init timeout\n"); @@ -3772,9 +3773,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->debugfs_dump = ent; } } - ibmvnic_send_crq_init(adapter); init_completion(&adapter->init_done); + ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) return 0; @@ -3782,9 +3783,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (adapter->renegotiate) { adapter->renegotiate = false; release_sub_crqs_no_irqs(adapter); - send_cap_queries(adapter); reinit_completion(&adapter->init_done); + send_cap_queries(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) return 0; -- cgit From e722af6391949e8851310441bb0cec157d25611d Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 10 Feb 2017 13:29:06 -0500 Subject: ibmvnic: Call napi_disable instead of napi_enable in failure path The failure path in ibmvnic_open() mistakenly makes a second call to napi_enable instead of calling napi_disable. This can result in a BUG_ON for any queues that were enabled in the previous call to napi_enable. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c7150343342d..752b0822b020 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -506,7 +506,7 @@ rx_pool_alloc_failed: adapter->rx_pool = NULL; rx_pool_arr_alloc_failed: for (i = 0; i < adapter->req_rx_queues; i++) - napi_enable(&adapter->napi[i]); + napi_disable(&adapter->napi[i]); alloc_napi_failed: return -ENOMEM; } -- cgit From 7089db84e356562f8ba737c29e472cc42d530dbc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Feb 2017 13:03:20 -0800 Subject: Linux 4.10-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8e223e081c9d..503dae1de8ef 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From 722c5ac708b4f5c1fcfad5fed4c95234c8b06590 Mon Sep 17 00:00:00 2001 From: IHARA Hiroka Date: Sun, 12 Feb 2017 18:34:53 -0800 Subject: Input: elan_i2c - add ELAN0605 to the ACPI table ELAN0605 has been confirmed to be a variant of ELAN0600, which is blacklisted in the hid-core to be managed by elan_i2c. This device can be found in Lenovo ideapad 310s (80U4000). Signed-off-by: Hiroka IHARA Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index fa598f7f4372..1e1d0ad406f2 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1231,6 +1231,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0605", 0 }, { "ELAN1000", 0 }, { } }; -- cgit From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Feb 2017 20:28:24 -0800 Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command to the given cgroup the descendent cgroup will be able to override effective bpf program that was inherited from this cgroup. By default it's not passed, therefore override is disallowed. Examples: 1. prog X attached to /A with default prog Y fails to attach to /A/B and /A/B/C Everything under /A runs prog X 2. prog X attached to /A with allow_override. prog Y fails to attach to /A/B with default (non-override) prog M attached to /A/B with allow_override. Everything under /A/B runs prog M only. 3. prog X attached to /A with allow_override. prog Y fails to attach to /A with default. The user has to detach first to switch the mode. In the future this behavior may be extended with a chain of non-overridable programs. Also fix the bug where detach from cgroup where nothing is attached was not throwing error. Return ENOENT in such case. Add several testcases and adjust libbpf. Fixes: 3007098494be ("cgroup: add support for eBPF programs") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: Daniel Mack Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 13 ++++---- include/uapi/linux/bpf.h | 7 +++++ kernel/bpf/cgroup.c | 59 +++++++++++++++++++++++++++------- kernel/bpf/syscall.c | 20 ++++++++---- kernel/cgroup.c | 9 +++--- samples/bpf/test_cgrp2_attach.c | 2 +- samples/bpf/test_cgrp2_attach2.c | 68 +++++++++++++++++++++++++++++++++++++--- samples/bpf/test_cgrp2_sock.c | 2 +- samples/bpf/test_cgrp2_sock2.c | 2 +- tools/lib/bpf/bpf.c | 4 ++- tools/lib/bpf/bpf.h | 3 +- 11 files changed, 151 insertions(+), 38 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 92bc89ae7e20..c970a25d2a49 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -21,20 +21,19 @@ struct cgroup_bpf { */ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; + bool disallow_override[MAX_BPF_ATTACH_TYPE]; }; void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type); +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool overridable); /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type); +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eb0e87dbe9f..d2b0ac799d03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -116,6 +116,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -171,6 +177,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a515f7b007c6..da0f53690295 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) e = rcu_dereference_protected(parent->bpf.effective[type], lockdep_is_held(&cgroup_mutex)); rcu_assign_pointer(cgrp->bpf.effective[type], e); + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; } } @@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) * * Must be called with cgroup_mutex held. */ -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type) +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool new_overridable) { - struct bpf_prog *old_prog, *effective; + struct bpf_prog *old_prog, *effective = NULL; struct cgroup_subsys_state *pos; + bool overridable = true; - old_prog = xchg(cgrp->bpf.prog + type, prog); + if (parent) { + overridable = !parent->bpf.disallow_override[type]; + effective = rcu_dereference_protected(parent->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + } + + if (prog && effective && !overridable) + /* if parent has non-overridable prog attached, disallow + * attaching new programs to descendent cgroup + */ + return -EPERM; + + if (prog && effective && overridable != new_overridable) + /* if parent has overridable prog attached, only + * allow overridable programs in descendent cgroup + */ + return -EPERM; - effective = (!prog && parent) ? - rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)) : - prog; + old_prog = cgrp->bpf.prog[type]; + + if (prog) { + overridable = new_overridable; + effective = prog; + if (old_prog && + cgrp->bpf.disallow_override[type] == new_overridable) + /* disallow attaching non-overridable on top + * of existing overridable in this cgroup + * and vice versa + */ + return -EPERM; + } + + if (!prog && !old_prog) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + + cgrp->bpf.prog[type] = prog; css_for_each_descendant_pre(pos, &cgrp->self) { struct cgroup *desc = container_of(pos, struct cgroup, self); /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) + if (desc->bpf.prog[type] && desc != cgrp) { pos = css_rightmost_descendant(pos); - else + } else { rcu_assign_pointer(desc->bpf.effective[type], effective); + desc->bpf.disallow_override[type] = !overridable; + } } if (prog) @@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + return 0; } /** diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 19b6129eab23..bbb016adbaeb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr) #ifdef CONFIG_CGROUP_BPF -#define BPF_PROG_ATTACH_LAST_FIELD attach_type +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags static int bpf_prog_attach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; struct bpf_prog *prog; struct cgroup *cgrp; - enum bpf_prog_type ptype; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + return -EINVAL; + switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: @@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - cgroup_bpf_update(cgrp, prog, attr->attach_type); + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + if (ret) + bpf_prog_put(prog); cgroup_put(cgrp); - return 0; + return ret; } #define BPF_PROG_DETACH_LAST_FIELD attach_type @@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { struct cgroup *cgrp; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - cgroup_bpf_update(cgrp, NULL, attr->attach_type); + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); cgroup_put(cgrp); break; @@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return -EINVAL; } - return 0; + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 688dd02af985..53bbca7c4859 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void) subsys_initcall(cgroup_namespaces_init); #ifdef CONFIG_CGROUP_BPF -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type) +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable) { struct cgroup *parent = cgroup_parent(cgrp); + int ret; mutex_lock(&cgroup_mutex); - __cgroup_bpf_update(cgrp, parent, prog, type); + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); mutex_unlock(&cgroup_mutex); + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 504058631ffc..4bfcaf93fcf3 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, type); + ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 6e69be37f87f..3049b1f26267 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -79,11 +79,12 @@ int main(int argc, char **argv) if (join_cgroup(FOO)) goto err; - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo"); goto err; } + printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); assert(system(PING_CMD) != 0); /* Create cgroup /foo/bar, get fd, and join it */ @@ -94,24 +95,27 @@ int main(int argc, char **argv) if (join_cgroup(BAR)) goto err; + printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } + printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { log_err("Detaching program from /foo/bar"); goto err; } + printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" + "This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -121,8 +125,60 @@ int main(int argc, char **argv) goto err; } + printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" + "This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching prog to /foo/bar"); + goto err; + } + + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo/bar"); + goto err; + } + + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { + errno = 0; + log_err("Unexpected success in double detach from /foo"); + goto err; + } + + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching non-overridable prog to /foo"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo"); + goto err; + } + + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching different non-overridable prog to /foo"); + goto err; + } + goto out; err: @@ -132,5 +188,9 @@ out: close(foo); close(bar); cleanup_cgroup_environment(); + if (!rc) + printf("PASS\n"); + else + printf("FAIL\n"); return rc; } diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index 0791b949cbe4..c3cfb23e23b5 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -75,7 +75,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index 455ef0d06e93..db036077b644 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) } ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, - BPF_CGROUP_INET_SOCK_CREATE); + BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3ddb58a36d3c..ae752fa4eaa7 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; @@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; + attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2f9853dd882..4ac6c4b84100 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, + unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -- cgit From 8b74d439e1697110c5e5c600643e823eb1dd0762 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Feb 2017 14:03:52 -0800 Subject: net/llc: avoid BUG_ON() in skb_orphan() It seems nobody used LLC since linux-3.12. Fortunately fuzzers like syzkaller still know how to run this code, otherwise it would be no fun. Setting skb->sk without skb->destructor leads to all kinds of bugs, we now prefer to be very strict about it. Ideally here we would use skb_set_owner() but this helper does not exist yet, only CAN seems to have a private helper for that. Fixes: 376c7311bdb6 ("net: add a temporary sanity check in skb_orphan()") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/llc/llc_conn.c | 3 +++ net/llc/llc_sap.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3e821daf9dd4..8bc5a1bd2d45 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d0e1e804ebd7..5404d0d195cc 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } -- cgit From 202461e2f3c15dbfb05825d29ace0d20cdf55fa4 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 13 Feb 2017 03:31:55 +0100 Subject: tick/broadcast: Prevent deadlock on tick_broadcast_lock tick_broadcast_lock is taken from interrupt context, but the following call chain takes the lock without disabling interrupts: [ 12.703736] _raw_spin_lock+0x3b/0x50 [ 12.703738] tick_broadcast_control+0x5a/0x1a0 [ 12.703742] intel_idle_cpu_online+0x22/0x100 [ 12.703744] cpuhp_invoke_callback+0x245/0x9d0 [ 12.703752] cpuhp_thread_fun+0x52/0x110 [ 12.703754] smpboot_thread_fn+0x276/0x320 So the following deadlock can happen: lock(tick_broadcast_lock); lock(tick_broadcast_lock); intel_idle_cpu_online() is the only place which violates the calling convention of tick_broadcast_control(). This was caused by the removal of the smp function call in course of the cpu hotplug rework. Instead of slapping local_irq_disable/enable() at the call site, we can relax the calling convention and handle it in the core code, which makes the whole machinery more robust. Fixes: 29d7bbada98e ("intel_idle: Remove superfluous SMP fuction call") Reported-by: Gabriel C Signed-off-by: Mike Galbraith Cc: Ruslan Ruslichenko Cc: Jiri Slaby Cc: Greg KH Cc: Borislav Petkov Cc: lwn@lwn.net Cc: Andrew Morton Cc: Linus Torvalds Cc: Anna-Maria Gleixner Cc: Sebastian Siewior Cc: stable Link: http://lkml.kernel.org/r/1486953115.5912.4.camel@gmx.de Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 3109204c87cc..17ac99b60ee5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -347,17 +347,16 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; + unsigned long flags; + /* Protects also the local clockevent device. */ + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; @@ -365,12 +364,11 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + goto out; if (!tick_device_is_functional(dev)) - return; + goto out; - raw_spin_lock(&tick_broadcast_lock); cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); @@ -420,7 +418,8 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) tick_broadcast_setup_oneshot(bc); } } - raw_spin_unlock(&tick_broadcast_lock); +out: + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); -- cgit From 25f71d1c3e98ef0e52371746220d66458eac75bc Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Fri, 30 Dec 2016 16:17:55 +0800 Subject: futex: Move futex_init() to core_initcall The UEVENT user mode helper is enabled before the initcalls are executed and is available when the root filesystem has been mounted. The user mode helper is triggered by device init calls and the executable might use the futex syscall. futex_init() is marked __initcall which maps to device_initcall, but there is no guarantee that futex_init() is invoked _before_ the first device init call which triggers the UEVENT user mode helper. If the user mode helper uses the futex syscall before futex_init() then the syscall crashes with a NULL pointer dereference because the futex subsystem has not been initialized yet. Move futex_init() to core_initcall so futexes are initialized before the root filesystem is mounted and the usermode helper becomes available. [ tglx: Rewrote changelog ] Signed-off-by: Yang Yang Cc: jiang.biao2@zte.com.cn Cc: jiang.zhengxiong@zte.com.cn Cc: zhong.weidong@zte.com.cn Cc: deng.huali@zte.com.cn Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1483085875-6130-1-git-send-email-yang.yang29@zte.com.cn Signed-off-by: Thomas Gleixner --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index 0842c8ca534b..cdf365036141 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3323,4 +3323,4 @@ static int __init futex_init(void) return 0; } -__initcall(futex_init); +core_initcall(futex_init); -- cgit From 35879ee4769099905fa3bda0b21e73d434e2df6a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 10 Feb 2017 07:18:36 -0200 Subject: [media] videodev2.h: go back to limited range Y'CbCr for SRGB and, ADOBERGB This reverts 'commit 7e0739cd9c40 ("[media] videodev2.h: fix sYCC/AdobeYCC default quantization range"). The problem is that many drivers can convert R'G'B' content (often from sensors) to Y'CbCr, but they all produce limited range Y'CbCr. To stay backwards compatible the default quantization range for sRGB and AdobeRGB Y'CbCr encoding should be limited range, not full range, even though the corresponding standards specify full range. Update the V4L2_MAP_QUANTIZATION_DEFAULT define accordingly and also update the documentation. Fixes: 7e0739cd9c40 ("[media] videodev2.h: fix sYCC/AdobeYCC default quantization range") Signed-off-by: Hans Verkuil Cc: # for v4.9 and up Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/pixfmt-007.rst | 23 +++++++++++++++++------ include/uapi/linux/videodev2.h | 7 +++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/media/uapi/v4l/pixfmt-007.rst b/Documentation/media/uapi/v4l/pixfmt-007.rst index 44bb5a7059b3..95a23a28c595 100644 --- a/Documentation/media/uapi/v4l/pixfmt-007.rst +++ b/Documentation/media/uapi/v4l/pixfmt-007.rst @@ -211,7 +211,13 @@ Colorspace sRGB (V4L2_COLORSPACE_SRGB) The :ref:`srgb` standard defines the colorspace used by most webcams and computer graphics. The default transfer function is ``V4L2_XFER_FUNC_SRGB``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is full range. +``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited range. + +Note that the :ref:`sycc` standard specifies full range quantization, +however all current capture hardware supported by the kernel convert +R'G'B' to limited range Y'CbCr. So choosing full range as the default +would break how applications interpret the quantization range. + The chromaticities of the primary colors and the white reference are: @@ -276,7 +282,7 @@ the following ``V4L2_YCBCR_ENC_601`` encoding as defined by :ref:`sycc`: Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range [-0.5…0.5]. This transform is identical to one defined in SMPTE -170M/BT.601. The Y'CbCr quantization is full range. +170M/BT.601. The Y'CbCr quantization is limited range. .. _col-adobergb: @@ -288,10 +294,15 @@ The :ref:`adobergb` standard defines the colorspace used by computer graphics that use the AdobeRGB colorspace. This is also known as the :ref:`oprgb` standard. The default transfer function is ``V4L2_XFER_FUNC_ADOBERGB``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is full -range. The chromaticities of the primary colors and the white reference -are: +``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited +range. + +Note that the :ref:`oprgb` standard specifies full range quantization, +however all current capture hardware supported by the kernel convert +R'G'B' to limited range Y'CbCr. So choosing full range as the default +would break how applications interpret the quantization range. +The chromaticities of the primary colors and the white reference are: .. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}| @@ -344,7 +355,7 @@ the following ``V4L2_YCBCR_ENC_601`` encoding: Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range [-0.5…0.5]. This transform is identical to one defined in SMPTE -170M/BT.601. The Y'CbCr quantization is full range. +170M/BT.601. The Y'CbCr quantization is limited range. .. _col-bt2020: diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 46e8a2e369f9..45184a2ef66c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -362,8 +362,8 @@ enum v4l2_quantization { /* * The default for R'G'B' quantization is always full range, except * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG, SRGB, ADOBERGB, - * XV601 or XV709: those are full range. + * limited range, except for COLORSPACE_JPEG, XV601 or XV709: those + * are full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -379,8 +379,7 @@ enum v4l2_quantization { (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ V4L2_QUANTIZATION_LIM_RANGE : \ (((is_rgb_or_hsv) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \ - (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) || \ - (colsp) == V4L2_COLORSPACE_ADOBERGB || (colsp) == V4L2_COLORSPACE_SRGB ? \ + (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \ V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) enum v4l2_priority { -- cgit From 42980da2eb7eb9695d8efc0c0ef145cbbb993b2c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 11 Feb 2017 09:24:46 -0200 Subject: [media] cec: initiator should be the same as the destination for, poll Poll messages that are used to allocate a logical address should use the same initiator as the destination. Instead, it expected that the initiator was 0xf which is not according to the standard. This also had consequences for the message checks in cec_transmit_msg_fh that incorrectly rejected poll messages with the same initiator and destination. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 87a6b65ed3af..ccda41c2c9e4 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -612,8 +612,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, } memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); if (msg->len == 1) { - if (cec_msg_initiator(msg) != 0xf || - cec_msg_destination(msg) == 0xf) { + if (cec_msg_destination(msg) == 0xf) { dprintk(1, "cec_transmit_msg: invalid poll message\n"); return -EINVAL; } @@ -638,7 +637,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, dprintk(1, "cec_transmit_msg: destination is the adapter itself\n"); return -EINVAL; } - if (cec_msg_initiator(msg) != 0xf && + if (msg->len > 1 && adap->is_configured && !cec_has_log_addr(adap, cec_msg_initiator(msg))) { dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n", cec_msg_initiator(msg)); @@ -1072,7 +1071,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, /* Send poll message */ msg.len = 1; - msg.msg[0] = 0xf0 | log_addr; + msg.msg[0] = (log_addr << 4) | log_addr; err = cec_transmit_msg_fh(adap, &msg, NULL, true); /* -- cgit From 0c59d28121b96d826c188280f367e754b5d83350 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Feb 2017 14:15:44 -0300 Subject: MAINTAINERS: Remove old e-mail address The ghostprotocols.net domain is not working, remove it from CREDITS and MAINTAINERS, and change the status to "Odd fixes", and since I haven't been maintaining those, remove my address from there. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- CREDITS | 5 ++--- MAINTAINERS | 15 ++++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CREDITS b/CREDITS index c58560701d13..c5626bf06264 100644 --- a/CREDITS +++ b/CREDITS @@ -2478,12 +2478,11 @@ S: D-90453 Nuernberg S: Germany N: Arnaldo Carvalho de Melo -E: acme@ghostprotocols.net +E: acme@kernel.org E: arnaldo.melo@gmail.com E: acme@redhat.com -W: http://oops.ghostprotocols.net:81/blog/ P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 -D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks +D: tools/, IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks S: Brazil N: Karsten Merker diff --git a/MAINTAINERS b/MAINTAINERS index 107c10e8f2d2..527d13759ecc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -877,8 +877,8 @@ S: Odd fixes F: drivers/hwmon/applesmc.c APPLETALK NETWORK LAYER -M: Arnaldo Carvalho de Melo -S: Maintained +L: netdev@vger.kernel.org +S: Odd fixes F: drivers/net/appletalk/ F: net/appletalk/ @@ -6727,9 +6727,8 @@ S: Odd Fixes F: drivers/tty/ipwireless/ IPX NETWORK LAYER -M: Arnaldo Carvalho de Melo L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: include/net/ipx.h F: include/uapi/linux/ipx.h F: net/ipx/ @@ -7501,8 +7500,8 @@ S: Maintained F: drivers/misc/lkdtm* LLC (802.2) -M: Arnaldo Carvalho de Melo -S: Maintained +L: netdev@vger.kernel.org +S: Odd fixes F: include/linux/llc.h F: include/uapi/linux/llc.h F: include/net/llc* @@ -13373,10 +13372,8 @@ S: Maintained F: drivers/input/misc/wistron_btns.c WL3501 WIRELESS PCMCIA CARD DRIVER -M: Arnaldo Carvalho de Melo L: linux-wireless@vger.kernel.org -W: http://oops.ghostprotocols.net:81/blog -S: Maintained +S: Odd fixes F: drivers/net/wireless/wl3501* WOLFSON MICROELECTRONICS DRIVERS -- cgit From 3ba5b5ea7dc3a10ef50819b43a9f8de2705f4eec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 13 Feb 2017 15:52:28 +0300 Subject: x86/vm86: Fix unused variable warning if THP is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC complains about unused variable 'vma' in mark_screen_rdonly() if THP is disabled: arch/x86/kernel/vm86_32.c: In function ‘mark_screen_rdonly’: arch/x86/kernel/vm86_32.c:180:26: warning: unused variable ‘vma’ [-Wunused-variable] struct vm_area_struct *vma = find_vma(mm, 0xA0000); That's silly. pmd_trans_huge() resolves to 0 when THP is disabled, so the whole block should be eliminated. Moving the variable declaration outside the if() block shuts GCC up. Reported-by: Jérémy Lefaure Signed-off-by: Kirill A. Shutemov Tested-by: Borislav Petkov Cc: Carlos O'Donell Link: http://lkml.kernel.org/r/20170213125228.63645-1-kirill.shutemov@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vm86_32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index ec5d7545e6dc..0442d98367ae 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,11 +160,12 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) static void mark_screen_rdonly(struct mm_struct *mm) { + struct vm_area_struct *vma; + spinlock_t *ptl; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - spinlock_t *ptl; int i; down_write(&mm->mmap_sem); @@ -177,7 +178,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pmd = pmd_offset(pud, 0xA0000); if (pmd_trans_huge(*pmd)) { - struct vm_area_struct *vma = find_vma(mm, 0xA0000); + vma = find_vma(mm, 0xA0000); split_huge_pmd(vma, pmd, 0xA0000); } if (pmd_none_or_clear_bad(pmd)) -- cgit From ebf692f85ff78092cd238166d8d7ec51419f9c02 Mon Sep 17 00:00:00 2001 From: Mart van Santen Date: Fri, 10 Feb 2017 12:02:18 +0000 Subject: xen-netback: vif counters from int/long to u64 This patch fixes an issue where the type of counters in the queue(s) and interface are not in sync (queue counters are int, interface counters are long), causing incorrect reporting of tx/rx values of the vif interface and unclear counter overflows. This patch sets both counters to the u64 type. Signed-off-by: Mart van Santen Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 8 ++++---- drivers/net/xen-netback/interface.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3ce1f7da8647..530586be05b4 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -113,10 +113,10 @@ struct xenvif_stats { * A subset of struct net_device_stats that contains only the * fields that are updated in netback.c for each queue. */ - unsigned int rx_bytes; - unsigned int rx_packets; - unsigned int tx_bytes; - unsigned int tx_packets; + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; /* Additional stats used by xenvif */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 579521327b03..50fa1692d985 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -221,10 +221,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned long rx_bytes = 0; - unsigned long rx_packets = 0; - unsigned long tx_bytes = 0; - unsigned long tx_packets = 0; + u64 rx_bytes = 0; + u64 rx_packets = 0; + u64 tx_bytes = 0; + u64 tx_packets = 0; unsigned int index; spin_lock(&vif->lock); -- cgit From 4872e57c812dd312bf8193b5933fa60585cda42f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 11 Feb 2017 00:38:57 +0100 Subject: NET: Fix /proc/net/arp for AX.25 When sending ARP requests over AX.25 links the hwaddress in the neighbour cache are not getting initialized. For such an incomplete arp entry ax2asc2 will generate an empty string resulting in /proc/net/arp output like the following: $ cat /proc/net/arp IP address HW type Flags HW address Mask Device 192.168.122.1 0x1 0x2 52:54:00:00:5d:5f * ens3 172.20.1.99 0x3 0x0 * bpq0 The missing field will confuse the procfs parsing of arp(8) resulting in incorrect output for the device such as the following: $ arp Address HWtype HWaddress Flags Mask Iface gateway ether 52:54:00:00:5d:5f C ens3 172.20.1.99 (incomplete) ens3 This changes the content of /proc/net/arp to: $ cat /proc/net/arp IP address HW type Flags HW address Mask Device 172.20.1.99 0x3 0x0 * * bpq0 192.168.122.1 0x1 0x2 52:54:00:00:5d:5f * ens3 To do so it change ax2asc to put the string "*" in buf for a NULL address argument. Finally the HW address field is left aligned in a 17 character field (the length of an ethernet HW address in the usual hex notation) for readability. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- net/ipv4/arp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } -- cgit From 6a25478077d987edc5e2f880590a2bc5fcab4441 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Feb 2017 19:26:45 +0800 Subject: gfs2: Use rhashtable walk interface in glock_hash_walk The function glock_hash_walk walks the rhashtable by hand. This is broken because if it catches the hash table in the middle of a rehash, then it will miss entries. This patch replaces the manual walk by using the rhashtable walk interface. Fixes: 88ffbf3e037e ("GFS2: Use resizable hash table for glocks") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- fs/gfs2/glock.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 94f50cac91c6..70e94170af85 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1420,26 +1420,32 @@ static struct shrinker glock_shrinker = { * @sdp: the filesystem * @bucket: the bucket * + * Note that the function can be called multiple times on the same + * object. So the user must ensure that the function can cope with + * that. */ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct rhash_head *pos; - const struct bucket_table *tbl; - int i; + struct rhashtable_iter iter; - rcu_read_lock(); - tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) { + rhashtable_walk_enter(&gl_hash_table, &iter); + + do { + gl = ERR_PTR(rhashtable_walk_start(&iter)); + if (gl) + continue; + + while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); - } - } - rcu_read_unlock(); - cond_resched(); + + rhashtable_walk_stop(&iter); + } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); } /** -- cgit From 9dbbfb0ab6680c6a85609041011484e6658e7d3c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Feb 2017 19:26:46 +0800 Subject: tipc: Fix tipc_sk_reinit race conditions There are two problems with the function tipc_sk_reinit. Firstly it's doing a manual walk over an rhashtable. This is broken as an rhashtable can be resized and if you manually walk over it during a resize then you may miss entries. Secondly it's missing memory barriers as previously the code used spinlocks which provide the barriers implicitly. This patch fixes both problems. Fixes: 07f6c4bc048a ("tipc: convert tipc reference table to...") Signed-off-by: Herbert Xu Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/net.c | 4 ++++ net/tipc/socket.c | 30 +++++++++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/net/tipc/net.c b/net/tipc/net.c index 28bf4feeb81c..ab8a2d5d1e32 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -110,6 +110,10 @@ int tipc_net_start(struct net *net, u32 addr) char addr_string[16]; tn->own_addr = addr; + + /* Ensure that the new address is visible before we reinit. */ + smp_mb(); + tipc_named_reinit(net); tipc_sk_reinit(net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 800caaa699a1..370a5912bcb5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -384,8 +384,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; tn = net_generic(sock_net(sk), tipc_net_id); - tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, - NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ sock->ops = ops; @@ -395,6 +393,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } + + /* Ensure tsk is visible before we read own_addr. */ + smp_mb(); + + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_shutdown = 0; @@ -2269,24 +2274,27 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, void tipc_sk_reinit(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - const struct bucket_table *tbl; - struct rhash_head *pos; + struct rhashtable_iter iter; struct tipc_sock *tsk; struct tipc_msg *msg; - int i; - rcu_read_lock(); - tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + rhashtable_walk_enter(&tn->sk_rht, &iter); + + do { + tsk = ERR_PTR(rhashtable_walk_start(&iter)); + if (tsk) + continue; + + while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; msg_set_prevnode(msg, tn->own_addr); msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - } - rcu_read_unlock(); + + rhashtable_walk_stop(&iter); + } while (tsk == ERR_PTR(-EAGAIN)); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) -- cgit From 40137906c5f55c252194ef5834130383e639536f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Feb 2017 19:26:47 +0800 Subject: rhashtable: Add nested tables This patch adds code that handles GFP_ATOMIC kmalloc failure on insertion. As we cannot use vmalloc, we solve it by making our hash table nested. That is, we allocate single pages at each level and reach our desired table size by nesting them. When a nested table is created, only a single page is allocated at the top-level. Lower levels are allocated on demand during insertion. Therefore for each insertion to succeed, only two (non-consecutive) pages are needed. After a nested table is created, a rehash will be scheduled in order to switch to a vmalloced table as soon as possible. Also, the rehash code will never rehash into a nested table. If we detect a nested table during a rehash, the rehash will be aborted and a new rehash will be scheduled. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 78 +++++++++---- lib/rhashtable.c | 270 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 276 insertions(+), 72 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5c132d3188be..f2e12a845910 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,6 +61,7 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -68,10 +69,12 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing + * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; + unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -81,7 +84,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); + #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) +static inline struct rhash_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : + &tbl->buckets[hash]; +} + /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht); * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht); * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht); * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - const struct bucket_table *tbl; + struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -697,8 +727,12 @@ slow_path: } elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!pprev) + goto out; + + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -736,7 +770,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -746,7 +780,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 32d0ad058380..172454e6b979 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -32,6 +32,11 @@ #define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 32UL +union nested_table { + union nested_table __rcu *table; + struct rhash_head __rcu *bucket; +}; + static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) @@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, /* Never allocate more than 0.5 locks per bucket */ size = min_t(unsigned int, size, tbl->size >> 1); + if (tbl->nest) + size = min(size, 1U << tbl->nest); + if (sizeof(spinlock_t) != 0) { tbl->locks = NULL; #ifdef CONFIG_NUMA @@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, return 0; } +static void nested_table_free(union nested_table *ntbl, unsigned int size) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + const unsigned int len = 1 << shift; + unsigned int i; + + ntbl = rcu_dereference_raw(ntbl->table); + if (!ntbl) + return; + + if (size > len) { + size >>= shift; + for (i = 0; i < len; i++) + nested_table_free(ntbl + i, size); + } + + kfree(ntbl); +} + +static void nested_bucket_table_free(const struct bucket_table *tbl) +{ + unsigned int size = tbl->size >> tbl->nest; + unsigned int len = 1 << tbl->nest; + union nested_table *ntbl; + unsigned int i; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + + for (i = 0; i < len; i++) + nested_table_free(ntbl + i, size); + + kfree(ntbl); +} + static void bucket_table_free(const struct bucket_table *tbl) { + if (tbl->nest) + nested_bucket_table_free(tbl); + if (tbl) kvfree(tbl->locks); @@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head) bucket_table_free(container_of(head, struct bucket_table, rcu)); } +static union nested_table *nested_table_alloc(struct rhashtable *ht, + union nested_table __rcu **prev, + unsigned int shifted, + unsigned int nhash) +{ + union nested_table *ntbl; + int i; + + ntbl = rcu_dereference(*prev); + if (ntbl) + return ntbl; + + ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); + + if (ntbl && shifted) { + for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) + INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, + (i << shifted) | nhash); + } + + rcu_assign_pointer(*prev, ntbl); + + return ntbl; +} + +static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, + size_t nbuckets, + gfp_t gfp) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + struct bucket_table *tbl; + size_t size; + + if (nbuckets < (1 << (shift + 1))) + return NULL; + + size = sizeof(*tbl) + sizeof(tbl->buckets[0]); + + tbl = kzalloc(size, gfp); + if (!tbl) + return NULL; + + if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, + 0, 0)) { + kfree(tbl); + return NULL; + } + + tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; + + return tbl; +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) @@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); + + size = nbuckets; + + if (tbl == NULL && gfp != GFP_KERNEL) { + tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); + nbuckets = 0; + } if (tbl == NULL) return NULL; - tbl->size = nbuckets; + tbl->size = size; if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); @@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, rht_dereference_rcu(old_tbl->future_tbl, ht)); - struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; - int err = -ENOENT; + struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); + int err = -EAGAIN; struct rhash_head *head, *next, *entry; spinlock_t *new_bucket_lock; unsigned int new_hash; + if (new_tbl->nest) + goto out; + + err = -ENOENT; + rht_for_each(entry, old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -202,19 +312,26 @@ out: return err; } -static void rhashtable_rehash_chain(struct rhashtable *ht, +static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); spinlock_t *old_bucket_lock; + int err; old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); spin_lock_bh(old_bucket_lock); - while (!rhashtable_rehash_one(ht, old_hash)) + while (!(err = rhashtable_rehash_one(ht, old_hash))) ; - old_tbl->rehash++; + + if (err == -ENOENT) { + old_tbl->rehash++; + err = 0; + } spin_unlock_bh(old_bucket_lock); + + return err; } static int rhashtable_rehash_attach(struct rhashtable *ht, @@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht) struct bucket_table *new_tbl; struct rhashtable_walker *walker; unsigned int old_hash; + int err; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) return 0; - for (old_hash = 0; old_hash < old_tbl->size; old_hash++) - rhashtable_rehash_chain(ht, old_hash); + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { + err = rhashtable_rehash_chain(ht, old_hash); + if (err) + return err; + } /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); @@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } -/** - * rhashtable_expand - Expand hash table while allowing concurrent lookups - * @ht: the hash table to expand - * - * A secondary bucket array is allocated and the hash entries are migrated. - * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. - * - * The caller must ensure that no concurrent resizing occurs by holding - * ht->mutex. - * - * It is valid to have concurrent insertions and deletions protected by per - * bucket locks or concurrent RCU protected lookups and traversals. - */ -static int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_rehash_alloc(struct rhashtable *ht, + struct bucket_table *old_tbl, + unsigned int size) { - struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; int err; ASSERT_RHT_MUTEX(ht); - old_tbl = rhashtable_last_table(ht, old_tbl); - - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; @@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht) */ static int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); unsigned int nelems = atomic_read(&ht->nelems); unsigned int size = 0; - int err; - - ASSERT_RHT_MUTEX(ht); if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); @@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht) if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; - new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); - if (new_tbl == NULL) - return -ENOMEM; - - err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); - if (err) - bucket_table_free(new_tbl); - - return err; + return rhashtable_rehash_alloc(ht, old_tbl, size); } static void rht_deferred_worker(struct work_struct *work) @@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work) tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) - rhashtable_expand(ht); + err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) - rhashtable_shrink(ht); + err = rhashtable_shrink(ht); + else if (tbl->nest) + err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - err = rhashtable_rehash_table(ht); + if (!err) + err = rhashtable_rehash_table(ht); mutex_unlock(&ht->mutex); @@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, int elasticity; elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, struct rhash_head *obj, void *data) { + struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + pprev = rht_bucket_insert(ht, tbl, hash); + if (!pprev) + return ERR_PTR(-ENOMEM); + + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - const struct bucket_table *tbl; + struct bucket_table *tbl; unsigned int i; cancel_work_sync(&ht->run_work); @@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; - for (pos = rht_dereference(tbl->buckets[i], ht), + for (pos = rht_dereference(*rht_bucket(tbl, i), ht), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht) return rhashtable_free_and_destroy(ht, NULL, NULL); } EXPORT_SYMBOL_GPL(rhashtable_destroy); + +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + static struct rhash_head __rcu *rhnull = + (struct rhash_head __rcu *)NULLS_MARKER(0); + unsigned int index = hash & ((1 << tbl->nest) - 1); + unsigned int size = tbl->size >> tbl->nest; + unsigned int subhash = hash; + union nested_table *ntbl; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); + subhash >>= tbl->nest; + + while (ntbl && size > (1 << shift)) { + index = subhash & ((1 << shift) - 1); + ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); + size >>= shift; + subhash >>= shift; + } + + if (!ntbl) + return &rhnull; + + return &ntbl[subhash].bucket; + +} +EXPORT_SYMBOL_GPL(rht_bucket_nested); + +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + unsigned int index = hash & ((1 << tbl->nest) - 1); + unsigned int size = tbl->size >> tbl->nest; + union nested_table *ntbl; + unsigned int shifted; + unsigned int nhash; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + hash >>= tbl->nest; + nhash = index; + shifted = tbl->nest; + ntbl = nested_table_alloc(ht, &ntbl[index].table, + size <= (1 << shift) ? shifted : 0, nhash); + + while (ntbl && size > (1 << shift)) { + index = hash & ((1 << shift) - 1); + size >>= shift; + hash >>= shift; + nhash |= index << shifted; + shifted += shift; + ntbl = nested_table_alloc(ht, &ntbl[index].table, + size <= (1 << shift) ? shifted : 0, + nhash); + } + + if (!ntbl) + return NULL; + + return &ntbl[hash].bucket; + +} +EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); -- cgit From 3d4ef329757cfd5e0b23cce97cdeca7e2df89c99 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 13 Feb 2017 13:46:41 +0200 Subject: mmc: core: fix multi-bit bus width without high-speed mode Commit 577fb13199b1 ("mmc: rework selection of bus speed mode") refactored bus width selection code to mmc_select_bus_width(). However, it also altered the behavior to not call the selection code in non-high-speed modes anymore. This causes 1-bit mode to always be used when the high-speed mode is not enabled, even though 4-bit and 8-bit bus are valid bus widths in the backwards-compatibility (legacy) mode as well (see e.g. 5.3.2 Bus Speed Modes in JEDEC 84-B50). This results in a significant regression in transfer speeds. Fix the code to allow 4-bit and 8-bit widths even without high-speed mode, as before. Tested with a Zynq-7000 PicoZed 7020 board. Fixes: 577fb13199b1 ("mmc: rework selection of bus speed mode") Signed-off-by: Anssi Hannula Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b61b52f9da3d..0fccca075e29 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1706,10 +1706,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, err = mmc_select_hs400(card); if (err) goto free_card; - } else if (mmc_card_hs(card)) { + } else { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); - if (err > 0) { + if (err > 0 && mmc_card_hs(card)) { err = mmc_select_hs_ddr(card); if (err) goto free_card; -- cgit From fed06ee89b78d3af32e235e0e89ad0d946fcb95d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 12 Feb 2017 11:21:31 +0200 Subject: net/mlx5e: Disable preemption when doing TC statistics upcall When called by HW offloading drivers, the TC action (e.g net/sched/act_mirred.c) code uses this_cpu logic, e.g _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets) per the kernel documention, preemption should be disabled, add that. Before the fix, when running with CONFIG_PREEMPT set, we get a BUG: using smp_processor_id() in preemptible [00000000] code: tc/3793 asserion from the TC action (mirred) stats_update callback. Fixes: aad7e08d39bd ('net/mlx5e: Hardware offloaded flower filter statistics support') Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c5282b6aba8b..2ebbe80d8126 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1087,10 +1087,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + preempt_disable(); + tcf_exts_to_list(f->exts, &actions); list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); + preempt_enable(); + return 0; } -- cgit From ec5e3b0a1d41fbda0cc33a45bc9e54e91d9d12c7 Mon Sep 17 00:00:00 2001 From: "Jonathan T. Leighton" Date: Sun, 12 Feb 2017 17:26:06 -0500 Subject: ipv6: Inhibit IPv4-mapped src address on the wire. This patch adds a check for the problematic case of an IPv4-mapped IPv6 source address and a destination address that is neither an IPv4-mapped IPv6 address nor in6addr_any, and returns an appropriate error. The check in done before returning from looking up the route. Signed-off-by: Jonathan T. Leighton Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b6a94ff0bbd0..e164684456df 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1021,6 +1021,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } } #endif + if (ipv6_addr_v4mapped(&fl6->saddr) && + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) + return -EAFNOSUPPORT; return 0; -- cgit From 052d2369d1b479cdbbe020fdd6d057d3c342db74 Mon Sep 17 00:00:00 2001 From: "Jonathan T. Leighton" Date: Sun, 12 Feb 2017 17:26:07 -0500 Subject: ipv6: Handle IPv4-mapped src to in6addr_any dst. This patch adds a check on the type of the source address for the case where the destination address is in6addr_any. If the source is an IPv4-mapped IPv6 source address, the destination is changed to ::ffff:127.0.0.1, and otherwise the destination is changed to ::1. This is done in three locations to handle UDP calls to either connect() or sendmsg() and TCP calls to connect(). Note that udpv6_sendmsg() delays handling an in6addr_any destination until very late, so the patch only needs to handle the case where the source is an IPv4-mapped IPv6 address. Signed-off-by: Jonathan T. Leighton Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 14 +++++++++----- net/ipv6/tcp_ipv6.c | 11 ++++++++--- net/ipv6/udp.c | 4 ++++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a3eaafd87100..eec27f87efac 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (np->sndflow) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type == IPV6_ADDR_ANY) { + if (ipv6_addr_any(&usin->sin6_addr)) { /* * connect to self */ - usin->sin6_addr.s6_addr[15] = 0x01; + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; } + addr_type = ipv6_addr_type(&usin->sin6_addr); + daddr = &usin->sin6_addr; - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; if (__ipv6_only_sock(sk)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index eaad72c3d746..4c60c6f71cd3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -148,8 +148,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 0x1; + if (ipv6_addr_any(&usin->sin6_addr)) { + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; + } addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -188,7 +193,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * TCP over IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8990856f5101..221825a9407a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1033,6 +1033,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; + if (ipv6_addr_any(daddr) && + ipv6_addr_v4mapped(&np->saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + daddr); break; case AF_INET: goto do_udp_sendmsg; -- cgit From 01f8902bcf3ff124d0aeb88a774180ebcec20ace Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Mon, 13 Feb 2017 10:01:25 +0800 Subject: net: fec: fix multicast filtering hardware setup Fix hardware setup of multicast address hash: - Never clear the hardware hash (to avoid packet loss) - Construct the hash register values in software and then write once to hardware Signed-off-by: Rui Sousa Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 38160c2bebcb..8be7034b2e7b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2910,6 +2910,7 @@ static void set_multicast_list(struct net_device *ndev) struct netdev_hw_addr *ha; unsigned int i, bit, data, crc, tmp; unsigned char hash; + unsigned int hash_high = 0, hash_low = 0; if (ndev->flags & IFF_PROMISC) { tmp = readl(fep->hwp + FEC_R_CNTRL); @@ -2932,11 +2933,7 @@ static void set_multicast_list(struct net_device *ndev) return; } - /* Clear filter and add the addresses in hash register - */ - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - + /* Add the addresses in hash register */ netdev_for_each_mc_addr(ha, ndev) { /* calculate crc32 value of mac address */ crc = 0xffffffff; @@ -2954,16 +2951,14 @@ static void set_multicast_list(struct net_device *ndev) */ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; - if (hash > 31) { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - tmp |= 1 << (hash - 32); - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - } else { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); - tmp |= 1 << hash; - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - } + if (hash > 31) + hash_high |= 1 << (hash - 32); + else + hash_low |= 1 << hash; } + + writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW); } /* Set a MAC change in hardware. */ -- cgit From cd27b96bc13841ee7af25837a6ae86fee87273d6 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Feb 2017 11:13:16 -0800 Subject: kcm: fix a null pointer dereference in kcm_sendmsg() In commit 98e3862ca2b1 ("kcm: fix 0-length case for kcm_sendmsg()") I tried to avoid skb allocation for 0-length case, but missed a check for NULL pointer in the non EOR case. Fixes: 98e3862ca2b1 ("kcm: fix 0-length case for kcm_sendmsg()") Reported-by: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Cong Wang Acked-by: Tom Herbert Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 64f0e8531af0..a646f3481240 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1044,8 +1044,10 @@ wait_for_memory: } else { /* Message not complete, save state */ partial_message: - kcm->seq_skb = head; - kcm_tx_msg(head)->last_skb = skb; + if (head) { + kcm->seq_skb = head; + kcm_tx_msg(head)->last_skb = skb; + } } KCM_STATS_ADD(kcm->stats.tx_bytes, copied); -- cgit From a60ced990e309666915d21445e95347d12406694 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 14 Feb 2017 14:42:15 +0200 Subject: net: ethernet: ti: cpsw: fix cpsw assignment in resume There is a copy-paste error, which hides breaking of resume for CPSW driver: there was replaced netdev_priv() to ndev_to_cpsw(ndev) in suspend, but left it unchanged in resume. Fixes: 606f39939595a4d4540406bfc11f265b2036af6d (ti: cpsw: move platform data and slaves info to cpsw_common) Reported-by: Alexey Starikovskiy Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b203143647e6..65088224c207 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -3160,7 +3160,7 @@ static int cpsw_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_common *cpsw = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Select default pin state */ pinctrl_pm_select_default_state(dev); -- cgit From f39f0d1e1e93145a0e91d9a7a639c42fd037ecc3 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 14 Feb 2017 10:22:59 -0600 Subject: ibmvnic: Fix initial MTU settings In the current driver, the MTU is set to the maximum value capable for the backing device. This decision turned out to be a mistake as it led to confusion among users. The expected initial MTU value used for other IBM vNIC capable operating systems is 1500, with the maximum value (9000) reserved for when Jumbo frames are enabled. This patch sets the MTU to the default value for a net device. It also corrects a discrepancy between MTU values received from firmware, which includes the ethernet header length, and net device MTU values. Finally, it removes redundant min/max MTU assignments after device initialization. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 752b0822b020..5b66b4fd1767 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1497,7 +1497,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; - adapter->req_mtu = adapter->max_mtu; + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; } total_queues = adapter->req_tx_queues + adapter->req_rx_queues; @@ -2627,12 +2627,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, break; case MIN_MTU: adapter->min_mtu = be64_to_cpu(crq->query_capability.number); - netdev->min_mtu = adapter->min_mtu; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); break; case MAX_MTU: adapter->max_mtu = be64_to_cpu(crq->query_capability.number); - netdev->max_mtu = adapter->max_mtu; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); break; case MAX_MULTICAST_FILTERS: @@ -3657,9 +3657,7 @@ static void handle_crq_init_rsp(struct work_struct *work) goto task_failed; netdev->real_num_tx_queues = adapter->req_tx_queues; - netdev->mtu = adapter->req_mtu; - netdev->min_mtu = adapter->min_mtu; - netdev->max_mtu = adapter->max_mtu; + netdev->mtu = adapter->req_mtu - ETH_HLEN; if (adapter->failover) { adapter->failover = false; @@ -3799,7 +3797,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->real_num_tx_queues = adapter->req_tx_queues; - netdev->mtu = adapter->req_mtu; + netdev->mtu = adapter->req_mtu - ETH_HLEN; rc = register_netdev(netdev); if (rc) { -- cgit From d199fab63c11998a602205f7ee7ff7c05c97164b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Feb 2017 09:03:51 -0800 Subject: packet: fix races in fanout_add() Multiple threads can call fanout_add() at the same time. We need to grab fanout_mutex earlier to avoid races that could lead to one thread freeing po->rollover that was set by another thread. Do the same in fanout_release(), for peace of mind, and to help us finding lockdep issues earlier. Fixes: dc99f600698d ("packet: Add fanout support.") Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 55 +++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d56ee46b11fc..0f03f6a53b4d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1619,6 +1619,7 @@ static void fanout_release_data(struct packet_fanout *f) static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { + struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; @@ -1641,23 +1642,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { - po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); - if (!po->rollover) - return -ENOMEM; - atomic_long_set(&po->rollover->num, 0); - atomic_long_set(&po->rollover->num_huge, 0); - atomic_long_set(&po->rollover->num_failed, 0); + err = -ENOMEM; + rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); + if (!rollover) + goto out; + atomic_long_set(&rollover->num, 0); + atomic_long_set(&rollover->num_huge, 0); + atomic_long_set(&rollover->num_failed, 0); + po->rollover = rollover; } - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1704,11 +1710,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } out: - mutex_unlock(&fanout_mutex); - if (err) { - kfree(po->rollover); + if (err && rollover) { + kfree(rollover); po->rollover = NULL; } + mutex_unlock(&fanout_mutex); return err; } @@ -1717,23 +1723,22 @@ static void fanout_release(struct sock *sk) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; - f = po->fanout; - if (!f) - return; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + f = po->fanout; + if (f) { + po->fanout = NULL; + + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + fanout_release_data(f); + kfree(f); + } - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); + if (po->rollover) + kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); - - if (po->rollover) - kfree_rcu(po->rollover, rcu); } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, -- cgit From f9c85ee67164b37f9296eab3b754e543e4e96a1c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Feb 2017 17:47:57 -0200 Subject: [media] siano: make it work again with CONFIG_VMAP_STACK Reported as a Kaffeine bug: https://bugs.kde.org/show_bug.cgi?id=375811 The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. On Kernel 4.9, the default is to not accept DMA on stack anymore on x86 architecture. On other architectures, this has been a requirement since Kernel 2.2. So, after this patch, this driver should likely work fine on all archs. Tested with USB ID 2040:5510: Hauppauge Windham Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/siano/smsusb.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c index a4dcaec31d02..8c1f926567ec 100644 --- a/drivers/media/usb/siano/smsusb.c +++ b/drivers/media/usb/siano/smsusb.c @@ -218,22 +218,30 @@ static int smsusb_start_streaming(struct smsusb_device_t *dev) static int smsusb_sendrequest(void *context, void *buffer, size_t size) { struct smsusb_device_t *dev = (struct smsusb_device_t *) context; - struct sms_msg_hdr *phdr = (struct sms_msg_hdr *) buffer; - int dummy; + struct sms_msg_hdr *phdr; + int dummy, ret; if (dev->state != SMSUSB_ACTIVE) { pr_debug("Device not active yet\n"); return -ENOENT; } + phdr = kmalloc(size, GFP_KERNEL); + if (!phdr) + return -ENOMEM; + memcpy(phdr, buffer, size); + pr_debug("sending %s(%d) size: %d\n", smscore_translate_msg(phdr->msg_type), phdr->msg_type, phdr->msg_length); smsendian_handle_tx_message((struct sms_msg_data *) phdr); - smsendian_handle_message_header((struct sms_msg_hdr *)buffer); - return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), - buffer, size, &dummy, 1000); + smsendian_handle_message_header((struct sms_msg_hdr *)phdr); + ret = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), + phdr, size, &dummy, 1000); + + kfree(phdr); + return ret; } static char *smsusb1_fw_lkup[] = { -- cgit From 12688dc21f71f4dcc9e2b8b5556b0c6cc8df1491 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 13 Feb 2017 11:18:19 +0200 Subject: Revert "i2c: designware: detect when dynamic tar update is possible" This reverts commit 63d0f0a6952a1a02bc4f116b7da7c7887e46efa3. It caused a regression on platforms where I2C controller is synthesized with dynamic TAR update disabled. Detection code is testing is bit DW_IC_CON_10BITADDR_MASTER in register DW_IC_CON read-only but fails to restore original value in case bit is read-write. Instead of fixing this we revert the commit since it was preparation for the commit 0317e6c0f1dc ("i2c: designware: do not disable adapter after transfer") which was also reverted. Reported-by: Shah Nehal-Bakulchandra Reported-by: Suravee Suthikulpanit Acked-By: Lucas De Marchi Cc: # v4.9+ Fixes: 63d0f0a6952a ("i2c: designware: detect when dynamic tar update is possible") Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 45 ++++++++------------------------ drivers/i2c/busses/i2c-designware-core.h | 1 - 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 6d81c56184d3..e9db857c6226 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -475,30 +475,28 @@ static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev) static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) { struct i2c_msg *msgs = dev->msgs; - u32 ic_tar = 0; + u32 ic_con, ic_tar = 0; /* Disable the adapter */ __i2c_dw_enable_and_wait(dev, false); /* if the slave address is ten bit address, enable 10BITADDR */ - if (dev->dynamic_tar_update_enabled) { + ic_con = dw_readl(dev, DW_IC_CON); + if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) { + ic_con |= DW_IC_CON_10BITADDR_MASTER; /* * If I2C_DYNAMIC_TAR_UPDATE is set, the 10-bit addressing - * mode has to be enabled via bit 12 of IC_TAR register, - * otherwise bit 4 of IC_CON is used. + * mode has to be enabled via bit 12 of IC_TAR register. + * We set it always as I2C_DYNAMIC_TAR_UPDATE can't be + * detected from registers. */ - if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) - ic_tar = DW_IC_TAR_10BITADDR_MASTER; + ic_tar = DW_IC_TAR_10BITADDR_MASTER; } else { - u32 ic_con = dw_readl(dev, DW_IC_CON); - - if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) - ic_con |= DW_IC_CON_10BITADDR_MASTER; - else - ic_con &= ~DW_IC_CON_10BITADDR_MASTER; - dw_writel(dev, ic_con, DW_IC_CON); + ic_con &= ~DW_IC_CON_10BITADDR_MASTER; } + dw_writel(dev, ic_con, DW_IC_CON); + /* * Set the slave (target) address and enable 10-bit addressing mode * if applicable. @@ -963,7 +961,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev) { struct i2c_adapter *adap = &dev->adapter; int r; - u32 reg; init_completion(&dev->cmd_complete); @@ -971,26 +968,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev) if (r) return r; - r = i2c_dw_acquire_lock(dev); - if (r) - return r; - - /* - * Test if dynamic TAR update is enabled in this controller by writing - * to IC_10BITADDR_MASTER field in IC_CON: when it is enabled this - * field is read-only so it should not succeed - */ - reg = dw_readl(dev, DW_IC_CON); - dw_writel(dev, reg ^ DW_IC_CON_10BITADDR_MASTER, DW_IC_CON); - - if ((dw_readl(dev, DW_IC_CON) & DW_IC_CON_10BITADDR_MASTER) == - (reg & DW_IC_CON_10BITADDR_MASTER)) { - dev->dynamic_tar_update_enabled = true; - dev_dbg(dev->dev, "Dynamic TAR update enabled"); - } - - i2c_dw_release_lock(dev); - snprintf(adap->name, sizeof(adap->name), "Synopsys DesignWare I2C adapter"); adap->retries = 3; diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 26250b425e2f..c1db3a5a340f 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -125,7 +125,6 @@ struct dw_i2c_dev { int (*acquire_lock)(struct dw_i2c_dev *dev); void (*release_lock)(struct dw_i2c_dev *dev); bool pm_runtime_disabled; - bool dynamic_tar_update_enabled; }; #define ACCESS_SWAP 0x00000001 -- cgit From a725eb15db80643a160310ed6bcfd6c5a6c907f2 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 15 Feb 2017 05:23:26 +0300 Subject: uapi: fix linux/if_pppol2tp.h userspace compilation errors Because of interface limitations, provided by libc cannot be included after , therefore any header that includes cannot be included after . Change uapi/linux/l2tp.h, the last uapi header that includes , to include and instead of and use __SOCK_SIZE__ instead of sizeof(struct sockaddr) the same way as uapi/linux/in.h does, to fix linux/if_pppol2tp.h userspace compilation errors like this: In file included from /usr/include/linux/l2tp.h:12:0, from /usr/include/linux/if_pppol2tp.h:21, /usr/include/netinet/in.h:31:8: error: redefinition of 'struct in_addr' Fixes: 47c3e7783be4 ("net: l2tp: deprecate PPPOL2TP_MSG_* in favour of L2TP_MSG_*") Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 85ddb74fcd1c..b23c1914a182 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -9,9 +9,8 @@ #include #include -#ifndef __KERNEL__ -#include -#endif +#include +#include #define IPPROTO_L2TP 115 @@ -31,7 +30,7 @@ struct sockaddr_l2tpip { __u32 l2tp_conn_id; /* Connection ID of tunnel */ /* Pad to size of `struct sockaddr'. */ - unsigned char __pad[sizeof(struct sockaddr) - + unsigned char __pad[__SOCK_SIZE__ - sizeof(__kernel_sa_family_t) - sizeof(__be16) - sizeof(struct in_addr) - sizeof(__u32)]; -- cgit From e70ac171658679ecf6bea4bbd9e9325cd6079d2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Feb 2017 17:11:14 -0800 Subject: tcp: tcp_probe: use spin_lock_bh() tcp_rcv_established() can now run in process context. We need to disable BH while acquiring tcp probe spinlock, or risk a deadlock. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Signed-off-by: Eric Dumazet Reported-by: Ricardo Nabinger Sanchez Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } -- cgit From 5463b3d043826ff8ef487edbd1ef1bfffb677437 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 14 Feb 2017 08:22:20 +1100 Subject: bpf: kernel header files need to be copied into the tools directory Signed-off-by: Stephen Rothwell Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0eb0e87dbe9f..d2b0ac799d03 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -116,6 +116,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -171,6 +177,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); -- cgit From 6ba4d2722d06960102c981322035239cd66f7316 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 8 Feb 2017 20:30:56 +0530 Subject: fuse: fix use after free issue in fuse_dev_do_read() There is a potential race between fuse_dev_do_write() and request_wait_answer() contexts as shown below: TASK 1: __fuse_request_send(): |--spin_lock(&fiq->waitq.lock); |--queue_request(); |--spin_unlock(&fiq->waitq.lock); |--request_wait_answer(): |--if (test_bit(FR_SENT, &req->flags)) TASK 2: fuse_dev_do_write(): |--clears bit FR_SENT, |--request_end(): |--sets bit FR_FINISHED |--spin_lock(&fiq->waitq.lock); |--list_del_init(&req->intr_entry); |--spin_unlock(&fiq->waitq.lock); |--fuse_put_request(); |--queue_interrupt(); |--wake_up_locked(&fiq->waitq); |--wait_event_freezable(); Now, the next fuse_dev_do_read(), see interrupts list is not empty and then calls fuse_read_interrupt() which tries to access the request which is already free'd and gets the below crash: [11432.401266] Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b6b ... [11432.418518] Kernel BUG at ffffff80083720e0 [11432.456168] PC is at __list_del_entry+0x6c/0xc4 [11432.463573] LR is at fuse_dev_do_read+0x1ac/0x474 ... [11432.679999] [] __list_del_entry+0x6c/0xc4 [11432.687794] [] fuse_dev_do_read+0x1ac/0x474 [11432.693180] [] fuse_dev_read+0x6c/0x78 [11432.699082] [] __vfs_read+0xc0/0xe8 [11432.704459] [] vfs_read+0x90/0x108 [11432.709406] [] SyS_read+0x58/0x94 As FR_FINISHED bit is set before deleting the intr_entry with input queue lock in request completion path, do the testing of this flag and queueing atomically with the same lock in queue_interrupt(). Signed-off-by: Sahitya Tummala Signed-off-by: Miklos Szeredi Fixes: fd22d62ed0c3 ("fuse: no fc->lock for iqueue parts") Cc: # 4.2+ --- fs/fuse/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4e06a27ed7f8..b656e1805f04 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -399,6 +399,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); + if (test_bit(FR_FINISHED, &req->flags)) { + spin_unlock(&fiq->waitq.lock); + return; + } if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); wake_up_locked(&fiq->waitq); -- cgit From bb08c04dc867b5f392caec635c097d5d5fcd8c9f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 14 Feb 2017 14:49:21 +0200 Subject: drm/dp/mst: fix kernel oops when turning off secondary monitor 100% reproducible issue found on SKL SkullCanyon NUC with two external DP daisy-chained monitors in DP/MST mode. When turning off or changing the input of the second monitor the machine stops with a kernel oops. This issue happened with 4.8.8 as well as drm/drm-intel-nightly. This issue is traced to an inconsistent control flow in drm_dp_update_payload_part1(): the 'port' pointer is set to NULL at the same time as 'req_payload.num_slots' is set to zero, but the pointer is dereferenced even when req_payload.num_slot is zero. The problematic dereference was introduced in commit dfda0df34 ("drm/mst: rework payload table allocation to conform better") and may impact all versions since v3.18 The fix suggested by Chris Wilson removes the kernel oops and was found to work well after 10mn of monkey-testing with the second monitor power and input buttons Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98990 Fixes: dfda0df34264 ("drm/mst: rework payload table allocation to conform better.") Cc: Dave Airlie Cc: Chris Wilson Cc: Nathan D Ciobanu Cc: Dhinakaran Pandiyan Cc: Sean Paul Cc: # v3.18+ Tested-by: Nathan D Ciobanu Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Pierre-Louis Bossart Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487076561-2169-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index aa644487749c..f59771da52ee 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1817,7 +1817,7 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) mgr->payloads[i].vcpi = req_payload.vcpi; } else if (mgr->payloads[i].num_slots) { mgr->payloads[i].num_slots = 0; - drm_dp_destroy_payload_step1(mgr, port, port->vcpi.vcpi, &mgr->payloads[i]); + drm_dp_destroy_payload_step1(mgr, port, mgr->payloads[i].vcpi, &mgr->payloads[i]); req_payload.payload_state = mgr->payloads[i].payload_state; mgr->payloads[i].start_slot = 0; } -- cgit From f222449c9dfad7c9bb8cb53e64c5c407b172ebbc Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 15 Feb 2017 13:43:32 +0900 Subject: timekeeping: Use deferred printk() in debug code We cannot do printk() from tk_debug_account_sleep_time(), because tk_debug_account_sleep_time() is called under tk_core seq lock. The reason why printk() is unsafe there is that console_sem may invoke scheduler (up()->wake_up_process()->activate_task()), which, in turn, can return back to timekeeping code, for instance, via get_time()->ktime_get(), deadlocking the system on tk_core seq lock. [ 48.950592] ====================================================== [ 48.950622] [ INFO: possible circular locking dependency detected ] [ 48.950622] 4.10.0-rc7-next-20170213+ #101 Not tainted [ 48.950622] ------------------------------------------------------- [ 48.950622] kworker/0:0/3 is trying to acquire lock: [ 48.950653] (tk_core){----..}, at: [] retrigger_next_event+0x4c/0x90 [ 48.950683] but task is already holding lock: [ 48.950683] (hrtimer_bases.lock){-.-...}, at: [] retrigger_next_event+0x38/0x90 [ 48.950714] which lock already depends on the new lock. [ 48.950714] the existing dependency chain (in reverse order) is: [ 48.950714] -> #5 (hrtimer_bases.lock){-.-...}: [ 48.950744] _raw_spin_lock_irqsave+0x50/0x64 [ 48.950775] lock_hrtimer_base+0x28/0x58 [ 48.950775] hrtimer_start_range_ns+0x20/0x5c8 [ 48.950775] __enqueue_rt_entity+0x320/0x360 [ 48.950805] enqueue_rt_entity+0x2c/0x44 [ 48.950805] enqueue_task_rt+0x24/0x94 [ 48.950836] ttwu_do_activate+0x54/0xc0 [ 48.950836] try_to_wake_up+0x248/0x5c8 [ 48.950836] __setup_irq+0x420/0x5f0 [ 48.950836] request_threaded_irq+0xdc/0x184 [ 48.950866] devm_request_threaded_irq+0x58/0xa4 [ 48.950866] omap_i2c_probe+0x530/0x6a0 [ 48.950897] platform_drv_probe+0x50/0xb0 [ 48.950897] driver_probe_device+0x1f8/0x2cc [ 48.950897] __driver_attach+0xc0/0xc4 [ 48.950927] bus_for_each_dev+0x6c/0xa0 [ 48.950927] bus_add_driver+0x100/0x210 [ 48.950927] driver_register+0x78/0xf4 [ 48.950958] do_one_initcall+0x3c/0x16c [ 48.950958] kernel_init_freeable+0x20c/0x2d8 [ 48.950958] kernel_init+0x8/0x110 [ 48.950988] ret_from_fork+0x14/0x24 [ 48.950988] -> #4 (&rt_b->rt_runtime_lock){-.-...}: [ 48.951019] _raw_spin_lock+0x40/0x50 [ 48.951019] rq_offline_rt+0x9c/0x2bc [ 48.951019] set_rq_offline.part.2+0x2c/0x58 [ 48.951049] rq_attach_root+0x134/0x144 [ 48.951049] cpu_attach_domain+0x18c/0x6f4 [ 48.951049] build_sched_domains+0xba4/0xd80 [ 48.951080] sched_init_smp+0x68/0x10c [ 48.951080] kernel_init_freeable+0x160/0x2d8 [ 48.951080] kernel_init+0x8/0x110 [ 48.951080] ret_from_fork+0x14/0x24 [ 48.951110] -> #3 (&rq->lock){-.-.-.}: [ 48.951110] _raw_spin_lock+0x40/0x50 [ 48.951141] task_fork_fair+0x30/0x124 [ 48.951141] sched_fork+0x194/0x2e0 [ 48.951141] copy_process.part.5+0x448/0x1a20 [ 48.951171] _do_fork+0x98/0x7e8 [ 48.951171] kernel_thread+0x2c/0x34 [ 48.951171] rest_init+0x1c/0x18c [ 48.951202] start_kernel+0x35c/0x3d4 [ 48.951202] 0x8000807c [ 48.951202] -> #2 (&p->pi_lock){-.-.-.}: [ 48.951232] _raw_spin_lock_irqsave+0x50/0x64 [ 48.951232] try_to_wake_up+0x30/0x5c8 [ 48.951232] up+0x4c/0x60 [ 48.951263] __up_console_sem+0x2c/0x58 [ 48.951263] console_unlock+0x3b4/0x650 [ 48.951263] vprintk_emit+0x270/0x474 [ 48.951293] vprintk_default+0x20/0x28 [ 48.951293] printk+0x20/0x30 [ 48.951324] kauditd_hold_skb+0x94/0xb8 [ 48.951324] kauditd_thread+0x1a4/0x56c [ 48.951324] kthread+0x104/0x148 [ 48.951354] ret_from_fork+0x14/0x24 [ 48.951354] -> #1 ((console_sem).lock){-.....}: [ 48.951385] _raw_spin_lock_irqsave+0x50/0x64 [ 48.951385] down_trylock+0xc/0x2c [ 48.951385] __down_trylock_console_sem+0x24/0x80 [ 48.951385] console_trylock+0x10/0x8c [ 48.951416] vprintk_emit+0x264/0x474 [ 48.951416] vprintk_default+0x20/0x28 [ 48.951416] printk+0x20/0x30 [ 48.951446] tk_debug_account_sleep_time+0x5c/0x70 [ 48.951446] __timekeeping_inject_sleeptime.constprop.3+0x170/0x1a0 [ 48.951446] timekeeping_resume+0x218/0x23c [ 48.951477] syscore_resume+0x94/0x42c [ 48.951477] suspend_enter+0x554/0x9b4 [ 48.951477] suspend_devices_and_enter+0xd8/0x4b4 [ 48.951507] enter_state+0x934/0xbd4 [ 48.951507] pm_suspend+0x14/0x70 [ 48.951507] state_store+0x68/0xc8 [ 48.951538] kernfs_fop_write+0xf4/0x1f8 [ 48.951538] __vfs_write+0x1c/0x114 [ 48.951538] vfs_write+0xa0/0x168 [ 48.951568] SyS_write+0x3c/0x90 [ 48.951568] __sys_trace_return+0x0/0x10 [ 48.951568] -> #0 (tk_core){----..}: [ 48.951599] lock_acquire+0xe0/0x294 [ 48.951599] ktime_get_update_offsets_now+0x5c/0x1d4 [ 48.951629] retrigger_next_event+0x4c/0x90 [ 48.951629] on_each_cpu+0x40/0x7c [ 48.951629] clock_was_set_work+0x14/0x20 [ 48.951660] process_one_work+0x2b4/0x808 [ 48.951660] worker_thread+0x3c/0x550 [ 48.951660] kthread+0x104/0x148 [ 48.951690] ret_from_fork+0x14/0x24 [ 48.951690] other info that might help us debug this: [ 48.951690] Chain exists of: tk_core --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock [ 48.951721] Possible unsafe locking scenario: [ 48.951721] CPU0 CPU1 [ 48.951721] ---- ---- [ 48.951721] lock(hrtimer_bases.lock); [ 48.951751] lock(&rt_b->rt_runtime_lock); [ 48.951751] lock(hrtimer_bases.lock); [ 48.951751] lock(tk_core); [ 48.951782] *** DEADLOCK *** [ 48.951782] 3 locks held by kworker/0:0/3: [ 48.951782] #0: ("events"){.+.+.+}, at: [] process_one_work+0x1f8/0x808 [ 48.951812] #1: (hrtimer_work){+.+...}, at: [] process_one_work+0x1f8/0x808 [ 48.951843] #2: (hrtimer_bases.lock){-.-...}, at: [] retrigger_next_event+0x38/0x90 [ 48.951843] stack backtrace: [ 48.951873] CPU: 0 PID: 3 Comm: kworker/0:0 Not tainted 4.10.0-rc7-next-20170213+ [ 48.951904] Workqueue: events clock_was_set_work [ 48.951904] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 48.951934] [] (show_stack) from [] (dump_stack+0xac/0xe0) [ 48.951934] [] (dump_stack) from [] (print_circular_bug+0x1d0/0x308) [ 48.951965] [] (print_circular_bug) from [] (validate_chain+0xf50/0x1324) [ 48.951965] [] (validate_chain) from [] (__lock_acquire+0x468/0x7e8) [ 48.951995] [] (__lock_acquire) from [] (lock_acquire+0xe0/0x294) [ 48.951995] [] (lock_acquire) from [] (ktime_get_update_offsets_now+0x5c/0x1d4) [ 48.952026] [] (ktime_get_update_offsets_now) from [] (retrigger_next_event+0x4c/0x90) [ 48.952026] [] (retrigger_next_event) from [] (on_each_cpu+0x40/0x7c) [ 48.952056] [] (on_each_cpu) from [] (clock_was_set_work+0x14/0x20) [ 48.952056] [] (clock_was_set_work) from [] (process_one_work+0x2b4/0x808) [ 48.952087] [] (process_one_work) from [] (worker_thread+0x3c/0x550) [ 48.952087] [] (worker_thread) from [] (kthread+0x104/0x148) [ 48.952087] [] (kthread) from [] (ret_from_fork+0x14/0x24) Replace printk() with printk_deferred(), which does not call into the scheduler. Fixes: 0bf43f15db85 ("timekeeping: Prints the amounts of time spent during suspend") Reported-and-tested-by: Tony Lindgren Signed-off-by: Sergey Senozhatsky Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Peter Zijlstra Cc: "Rafael J . Wysocki" Cc: Steven Rostedt Cc: John Stultz Cc: "[4.9+]" Link: http://lkml.kernel.org/r/20170215044332.30449-1-sergey.senozhatsky@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index ca9fb800336b..38bc4d2208e8 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -75,7 +75,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec, - t->tv_nsec / NSEC_PER_MSEC); + printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", + (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } -- cgit From afe3e4d11bdf50a4c3965eb6465ba6bebbcf5dcf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 14 Feb 2017 21:17:48 -0800 Subject: PCI/PME: Restore pcie_pme_driver.remove In addition to making PME non-modular, d7def2040077 ("PCI/PME: Make explicitly non-modular") removed the pcie_pme_driver .remove() method, pcie_pme_remove(). pcie_pme_remove() freed the PME IRQ that was requested in pci_pme_probe(). The fact that we don't free the IRQ after d7def2040077 causes the following crash when removing a PCIe port device via /sys: ------------[ cut here ]------------ kernel BUG at drivers/pci/msi.c:370! invalid opcode: 0000 [#1] SMP Modules linked in: CPU: 1 PID: 14509 Comm: sh Tainted: G W 4.8.0-rc1-yh-00012-gd29438d RIP: 0010:[] free_msi_irqs+0x65/0x190 ... Call Trace: [] pci_disable_msi+0x34/0x40 [] cleanup_service_irqs+0x27/0x30 [] pcie_port_device_remove+0x2a/0x40 [] pcie_portdrv_remove+0x40/0x50 [] pci_device_remove+0x4b/0xc0 [] __device_release_driver+0xb6/0x150 [] device_release_driver+0x25/0x40 [] pci_stop_bus_device+0x74/0xa0 [] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [] remove_store+0x50/0x70 [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x44/0x60 [] kernfs_fop_write+0x10e/0x190 [] __vfs_write+0x28/0x110 [] ? percpu_down_read+0x44/0x80 [] ? __sb_start_write+0xa7/0xe0 [] ? __sb_start_write+0xa7/0xe0 [] vfs_write+0xc4/0x180 [] SyS_write+0x49/0xa0 [] do_syscall_64+0xa6/0x1b0 [] entry_SYSCALL64_slow_path+0x25/0x25 ... RIP [] free_msi_irqs+0x65/0x190 RSP ---[ end trace f4505e1dac5b95d3 ]--- Segmentation fault Restore pcie_pme_remove(). [bhelgaas: changelog] Fixes: d7def2040077 ("PCI/PME: Make explicitly non-modular") Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: stable@vger.kernel.org # v4.9+ --- drivers/pci/pcie/pme.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 717529331dac..2dd1c68e6de8 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -433,6 +433,17 @@ static int pcie_pme_resume(struct pcie_device *srv) return 0; } +/** + * pcie_pme_remove - Prepare PCIe PME service device for removal. + * @srv - PCIe service device to remove. + */ +static void pcie_pme_remove(struct pcie_device *srv) +{ + pcie_pme_suspend(srv); + free_irq(srv->irq, srv); + kfree(get_service_data(srv)); +} + static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", .port_type = PCI_EXP_TYPE_ROOT_PORT, @@ -441,6 +452,7 @@ static struct pcie_port_service_driver pcie_pme_driver = { .probe = pcie_pme_probe, .suspend = pcie_pme_suspend, .resume = pcie_pme_resume, + .remove = pcie_pme_remove, }; /** -- cgit From cd224553641848dd17800fe559e4ff5d208553e8 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 14 Feb 2017 19:11:44 +0200 Subject: net: xilinx_emaclite: fix receive buffer overflow xilinx_emaclite looks at the received data to try to determine the Ethernet packet length but does not properly clamp it if proto_type == ETH_P_IP or 1500 < proto_type <= 1518, causing a buffer overflow and a panic via skb_panic() as the length exceeds the allocated skb size. Fix those cases. Also add an additional unconditional check with WARN_ON() at the end. Signed-off-by: Anssi Hannula Fixes: bb81b2ddfa19 ("net: add Xilinx emac lite device driver") Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 93dc10b10c09..455774e4741a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -369,7 +369,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, * * Return: Total number of bytes received */ -static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) +static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) { void __iomem *addr; u16 length, proto_type; @@ -409,7 +409,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) /* Check if received ethernet frame is a raw ethernet frame * or an IP packet or an ARP packet */ - if (proto_type > (ETH_FRAME_LEN + ETH_FCS_LEN)) { + if (proto_type > ETH_DATA_LEN) { if (proto_type == ETH_P_IP) { length = ((ntohl(__raw_readl(addr + @@ -417,6 +417,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & XEL_RPLR_LENGTH_MASK); + length = min_t(u16, length, ETH_DATA_LEN); length += ETH_HLEN + ETH_FCS_LEN; } else if (proto_type == ETH_P_ARP) @@ -429,6 +430,9 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) /* Use the length in the frame, plus the header and trailer */ length = proto_type + ETH_HLEN + ETH_FCS_LEN; + if (WARN_ON(length > maxlen)) + length = maxlen; + /* Read from the EmacLite device */ xemaclite_aligned_read((u32 __force *) (addr + XEL_RXBUFF_OFFSET), data, length); @@ -603,7 +607,7 @@ static void xemaclite_rx_handler(struct net_device *dev) skb_reserve(skb, 2); - len = xemaclite_recv_data(lp, (u8 *) skb->data); + len = xemaclite_recv_data(lp, (u8 *) skb->data, len); if (!len) { dev->stats.rx_errors++; -- cgit From acf138f1b00bdd1b7cd9894562ed0c2a1670888e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Tue, 14 Feb 2017 19:11:45 +0200 Subject: net: xilinx_emaclite: fix freezes due to unordered I/O The xilinx_emaclite uses __raw_writel and __raw_readl for register accesses. Those functions do not imply any kind of memory barriers and they may be reordered. The driver does not seem to take that into account, though, and the driver does not satisfy the ordering requirements of the hardware. For clear examples, see xemaclite_mdio_write() and xemaclite_mdio_read() which try to set MDIO address before initiating the transaction. I'm seeing system freezes with the driver with GCC 5.4 and current Linux kernels on Zynq-7000 SoC immediately when trying to use the interface. In commit 123c1407af87 ("net: emaclite: Do not use microblaze and ppc IO functions") the driver was switched from non-generic in_be32/out_be32 (memory barriers, big endian) to __raw_readl/__raw_writel (no memory barriers, native endian), so apparently the device follows system endianness and the driver was originally written with the assumption of memory barriers. Rather than try to hunt for each case of missing barrier, just switch the driver to use iowrite32/ioread32/iowrite32be/ioread32be depending on endianness instead. Tested on little-endian Zynq-7000 ARM SoC FPGA. Signed-off-by: Anssi Hannula Fixes: 123c1407af87 ("net: emaclite: Do not use microblaze and ppc IO functions") Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 116 ++++++++++++++------------ 1 file changed, 62 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 455774e4741a..aa02a03a6d8d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -100,6 +100,14 @@ /* BUFFER_ALIGN(adr) calculates the number of bytes to the next alignment. */ #define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32) adr)) % ALIGNMENT) +#ifdef __BIG_ENDIAN +#define xemaclite_readl ioread32be +#define xemaclite_writel iowrite32be +#else +#define xemaclite_readl ioread32 +#define xemaclite_writel iowrite32 +#endif + /** * struct net_local - Our private per device data * @ndev: instance of the network device @@ -156,15 +164,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata) u32 reg_data; /* Enable the Tx interrupts for the first Buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET); - __raw_writel(reg_data | XEL_TSR_XMIT_IE_MASK, - drvdata->base_addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data | XEL_TSR_XMIT_IE_MASK, + drvdata->base_addr + XEL_TSR_OFFSET); /* Enable the Rx interrupts for the first buffer */ - __raw_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET); + xemaclite_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET); /* Enable the Global Interrupt Enable */ - __raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); + xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); } /** @@ -179,17 +187,17 @@ static void xemaclite_disable_interrupts(struct net_local *drvdata) u32 reg_data; /* Disable the Global Interrupt Enable */ - __raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); + xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); /* Disable the Tx interrupts for the first buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET); - __raw_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK), - drvdata->base_addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK), + drvdata->base_addr + XEL_TSR_OFFSET); /* Disable the Rx interrupts for the first buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_RSR_OFFSET); - __raw_writel(reg_data & (~XEL_RSR_RECV_IE_MASK), - drvdata->base_addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_RSR_OFFSET); + xemaclite_writel(reg_data & (~XEL_RSR_RECV_IE_MASK), + drvdata->base_addr + XEL_RSR_OFFSET); } /** @@ -321,7 +329,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, byte_count = ETH_FRAME_LEN; /* Check if the expected buffer is available */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK)) == 0) { @@ -334,7 +342,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, addr = (void __iomem __force *)((u32 __force)addr ^ XEL_BUFFER_OFFSET); - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK)) != 0) @@ -345,16 +353,16 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, /* Write the frame to the buffer */ xemaclite_aligned_write(data, (u32 __force *) addr, byte_count); - __raw_writel((byte_count & XEL_TPLR_LENGTH_MASK), - addr + XEL_TPLR_OFFSET); + xemaclite_writel((byte_count & XEL_TPLR_LENGTH_MASK), + addr + XEL_TPLR_OFFSET); /* Update the Tx Status Register to indicate that there is a * frame to send. Set the XEL_TSR_XMIT_ACTIVE_MASK flag which * is used by the interrupt handler to check whether a frame * has been transmitted */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); reg_data |= (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK); - __raw_writel(reg_data, addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data, addr + XEL_TSR_OFFSET); return 0; } @@ -379,7 +387,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) addr = (drvdata->base_addr + drvdata->next_rx_buf_to_use); /* Verify which buffer has valid data */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); if ((reg_data & XEL_RSR_RECV_DONE_MASK) == XEL_RSR_RECV_DONE_MASK) { if (drvdata->rx_ping_pong != 0) @@ -396,14 +404,14 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) return 0; /* No data was available */ /* Verify that buffer has valid data */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); if ((reg_data & XEL_RSR_RECV_DONE_MASK) != XEL_RSR_RECV_DONE_MASK) return 0; /* No data was available */ } /* Get the protocol type of the ethernet frame that arrived */ - proto_type = ((ntohl(__raw_readl(addr + XEL_HEADER_OFFSET + + proto_type = ((ntohl(xemaclite_readl(addr + XEL_HEADER_OFFSET + XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & XEL_RPLR_LENGTH_MASK); @@ -412,7 +420,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) if (proto_type > ETH_DATA_LEN) { if (proto_type == ETH_P_IP) { - length = ((ntohl(__raw_readl(addr + + length = ((ntohl(xemaclite_readl(addr + XEL_HEADER_IP_LENGTH_OFFSET + XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & @@ -438,9 +446,9 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) data, length); /* Acknowledge the frame */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); reg_data &= ~XEL_RSR_RECV_DONE_MASK; - __raw_writel(reg_data, addr + XEL_RSR_OFFSET); + xemaclite_writel(reg_data, addr + XEL_RSR_OFFSET); return length; } @@ -467,14 +475,14 @@ static void xemaclite_update_address(struct net_local *drvdata, xemaclite_aligned_write(address_ptr, (u32 __force *) addr, ETH_ALEN); - __raw_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET); + xemaclite_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET); /* Update the MAC address in the EmacLite */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); - __raw_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET); /* Wait for EmacLite to finish with the MAC address update */ - while ((__raw_readl(addr + XEL_TSR_OFFSET) & + while ((xemaclite_readl(addr + XEL_TSR_OFFSET) & XEL_TSR_PROG_MAC_ADDR) != 0) ; } @@ -644,32 +652,32 @@ static irqreturn_t xemaclite_interrupt(int irq, void *dev_id) u32 tx_status; /* Check if there is Rx Data available */ - if ((__raw_readl(base_addr + XEL_RSR_OFFSET) & + if ((xemaclite_readl(base_addr + XEL_RSR_OFFSET) & XEL_RSR_RECV_DONE_MASK) || - (__raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET) + (xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET) & XEL_RSR_RECV_DONE_MASK)) xemaclite_rx_handler(dev); /* Check if the Transmission for the first buffer is completed */ - tx_status = __raw_readl(base_addr + XEL_TSR_OFFSET); + tx_status = xemaclite_readl(base_addr + XEL_TSR_OFFSET); if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) && (tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) { tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK; - __raw_writel(tx_status, base_addr + XEL_TSR_OFFSET); + xemaclite_writel(tx_status, base_addr + XEL_TSR_OFFSET); tx_complete = true; } /* Check if the Transmission for the second buffer is completed */ - tx_status = __raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); + tx_status = xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) && (tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) { tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK; - __raw_writel(tx_status, base_addr + XEL_BUFFER_OFFSET + - XEL_TSR_OFFSET); + xemaclite_writel(tx_status, base_addr + XEL_BUFFER_OFFSET + + XEL_TSR_OFFSET); tx_complete = true; } @@ -702,7 +710,7 @@ static int xemaclite_mdio_wait(struct net_local *lp) /* wait for the MDIO interface to not be busy or timeout after some time. */ - while (__raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) & + while (xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) & XEL_MDIOCTRL_MDIOSTS_MASK) { if (time_before_eq(end, jiffies)) { WARN_ON(1); @@ -738,17 +746,17 @@ static int xemaclite_mdio_read(struct mii_bus *bus, int phy_id, int reg) * MDIO Address register. Set the Status bit in the MDIO Control * register to start a MDIO read transaction. */ - ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); - __raw_writel(XEL_MDIOADDR_OP_MASK | - ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), - lp->base_addr + XEL_MDIOADDR_OFFSET); - __raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(XEL_MDIOADDR_OP_MASK | + ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), + lp->base_addr + XEL_MDIOADDR_OFFSET); + xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); if (xemaclite_mdio_wait(lp)) return -ETIMEDOUT; - rc = __raw_readl(lp->base_addr + XEL_MDIORD_OFFSET); + rc = xemaclite_readl(lp->base_addr + XEL_MDIORD_OFFSET); dev_dbg(&lp->ndev->dev, "xemaclite_mdio_read(phy_id=%i, reg=%x) == %x\n", @@ -785,13 +793,13 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, * Data register. Finally, set the Status bit in the MDIO Control * register to start a MDIO write transaction. */ - ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); - __raw_writel(~XEL_MDIOADDR_OP_MASK & - ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), - lp->base_addr + XEL_MDIOADDR_OFFSET); - __raw_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET); - __raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(~XEL_MDIOADDR_OP_MASK & + ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), + lp->base_addr + XEL_MDIOADDR_OFFSET); + xemaclite_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET); + xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); return 0; } @@ -838,8 +846,8 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) /* Enable the MDIO bus by asserting the enable bit in MDIO Control * register. */ - __raw_writel(XEL_MDIOCTRL_MDIOEN_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(XEL_MDIOCTRL_MDIOEN_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); bus = mdiobus_alloc(); if (!bus) { @@ -1144,8 +1152,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev) } /* Clear the Tx CSR's in case this is a restart */ - __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET); - __raw_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); + xemaclite_writel(0, lp->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); /* Set the MAC address in the EmacLite device */ xemaclite_update_address(lp, ndev->dev_addr); -- cgit From 7627ae6030f56a9a91a5b3867b21f35d79c16e64 Mon Sep 17 00:00:00 2001 From: Marcus Huewe Date: Wed, 15 Feb 2017 01:00:36 +0100 Subject: net: neigh: Fix netevent NETEVENT_DELAY_PROBE_TIME_UPDATE notification When setting a neigh related sysctl parameter, we always send a NETEVENT_DELAY_PROBE_TIME_UPDATE netevent. For instance, when executing sysctl net.ipv6.neigh.wlp3s0.retrans_time_ms=2000 a NETEVENT_DELAY_PROBE_TIME_UPDATE netevent is generated. This is caused by commit 2a4501ae18b5 ("neigh: Send a notification when DELAY_PROBE_TIME changes"). According to the commit's description, it was intended to generate such an event when setting the "delay_first_probe_time" sysctl parameter. In order to fix this, only generate this event when actually setting the "delay_first_probe_time" sysctl parameter. This fix should not have any unintended side-effects, because all but one registered netevent callbacks check for other netevent event types (the registered callbacks were obtained by grepping for "register_netevent_notifier"). The only callback that uses the NETEVENT_DELAY_PROBE_TIME_UPDATE event is mlxsw_sp_router_netevent_event() (in drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c): in case of this event, it only accesses the DELAY_PROBE_TIME of the passed neigh_parms. Fixes: 2a4501ae18b5 ("neigh: Send a notification when DELAY_PROBE_TIME changes") Signed-off-by: Marcus Huewe Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb12e07ffef..e7c12caa20c8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2923,7 +2923,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); - call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); + if (index == NEIGH_VAR_DELAY_PROBE_TIME) + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } -- cgit From 28f4d16570dcf440e54a4d72666d5be452f27d0e Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 15 Feb 2017 10:32:11 -0600 Subject: ibmvnic: Fix endian error when requesting device capabilities When a vNIC client driver requests a faulty device setting, the server returns an acceptable value for the client to request. This 64 bit value was incorrectly being swapped as a 32 bit value, resulting in loss of data. This patch corrects that by using the 64 bit swap function. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5b66b4fd1767..158b49a0a1d6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2389,10 +2389,10 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, case PARTIALSUCCESS: dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", *req_value, - (long int)be32_to_cpu(crq->request_capability_rsp. + (long int)be64_to_cpu(crq->request_capability_rsp. number), name); release_sub_crqs_no_irqs(adapter); - *req_value = be32_to_cpu(crq->request_capability_rsp.number); + *req_value = be64_to_cpu(crq->request_capability_rsp.number); init_sub_crqs(adapter, 1); return; default: -- cgit From 75224c93fa985f4a6fb983f53208f5c5aa555fbf Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 15 Feb 2017 10:33:33 -0600 Subject: ibmvnic: Fix endian errors in error reporting output Error reports received from firmware were not being converted from big endian values, leading to bogus error codes reported on little endian systems. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 158b49a0a1d6..a07b8d79174c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2186,12 +2186,12 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, if (!found) { dev_err(dev, "Couldn't find error id %x\n", - crq->request_error_rsp.error_id); + be32_to_cpu(crq->request_error_rsp.error_id)); return; } dev_err(dev, "Detailed info for error id %x:", - crq->request_error_rsp.error_id); + be32_to_cpu(crq->request_error_rsp.error_id)); for (i = 0; i < error_buff->len; i++) { pr_cont("%02x", (int)error_buff->buff[i]); @@ -2270,8 +2270,8 @@ static void handle_error_indication(union ibmvnic_crq *crq, dev_err(dev, "Firmware reports %serror id %x, cause %d\n", crq->error_indication. flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "", - crq->error_indication.error_id, - crq->error_indication.error_cause); + be32_to_cpu(crq->error_indication.error_id), + be16_to_cpu(crq->error_indication.error_cause)); error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC); if (!error_buff) -- cgit From 3f91a89d424a79f8082525db5a375e438887bb3e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Feb 2017 13:49:21 +1100 Subject: powerpc/64: Disable use of radix under a hypervisor Currently, if the kernel is running on a POWER9 processor under a hypervisor, it may try to use the radix MMU even though it doesn't have the necessary code to do so (it doesn't negotiate use of radix, and it doesn't do the H_REGISTER_PROC_TBL hcall). If the hypervisor supports both radix and HPT, then it will set up the guest to use HPT (since the guest doesn't request radix in the CAS call), but if the radix feature bit is set in the ibm,pa-features property (which is valid, since ibm,pa-features is defined to represent the capabilities of the processor) the guest will try to use radix, resulting in a crash when it turns the MMU on. This makes the minimal fix for the current code, which is to disable radix unless we are running in hypervisor mode. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/init_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 93abf8a9813d..8e1588021d1c 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -347,7 +347,8 @@ early_param("disable_radix", parse_disable_radix); void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ - if (disable_radix) + /* We don't yet have the machinery to do radix as a guest. */ + if (disable_radix || !(mfmsr() & MSR_HV)) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; if (early_radix_enabled()) -- cgit From bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 15 Feb 2017 22:29:51 -0500 Subject: rhashtable: Revert nested table changes. This reverts commits: 6a25478077d987edc5e2f880590a2bc5fcab4441 9dbbfb0ab6680c6a85609041011484e6658e7d3c 40137906c5f55c252194ef5834130383e639536f It's too risky to put in this late in the release cycle. We'll put these changes into the next merge window instead. Signed-off-by: David S. Miller --- fs/gfs2/glock.c | 28 ++--- include/linux/rhashtable.h | 78 ++++--------- lib/rhashtable.c | 270 +++++++++------------------------------------ net/tipc/net.c | 4 - net/tipc/socket.c | 30 ++--- 5 files changed, 94 insertions(+), 316 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 70e94170af85..94f50cac91c6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1420,32 +1420,26 @@ static struct shrinker glock_shrinker = { * @sdp: the filesystem * @bucket: the bucket * - * Note that the function can be called multiple times on the same - * object. So the user must ensure that the function can cope with - * that. */ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct rhashtable_iter iter; - - rhashtable_walk_enter(&gl_hash_table, &iter); - - do { - gl = ERR_PTR(rhashtable_walk_start(&iter)); - if (gl) - continue; + struct rhash_head *pos; + const struct bucket_table *tbl; + int i; - while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) + rcu_read_lock(); + tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) { if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); - - rhashtable_walk_stop(&iter); - } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); - - rhashtable_walk_exit(&iter); + } + } + rcu_read_unlock(); + cond_resched(); } /** diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,7 +61,6 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets - * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -69,12 +68,10 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing - * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; - unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -84,7 +81,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -377,12 +374,6 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); - #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -398,27 +389,6 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_head __rcu *const *rht_bucket( - const struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : - &tbl->buckets[hash]; -} - -static inline struct rhash_head __rcu **rht_bucket_var( - struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : - &tbl->buckets[hash]; -} - -static inline struct rhash_head __rcu **rht_bucket_insert( - struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : - &tbl->buckets[hash]; -} - /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -438,7 +408,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -463,7 +433,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ tbl, hash, member) /** @@ -478,13 +448,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -515,7 +485,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -548,8 +518,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) /** @@ -595,7 +565,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct bucket_table *tbl; + const struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -727,12 +697,8 @@ slow_path: } elasticity = ht->elasticity; - pprev = rht_bucket_insert(ht, tbl, hash); - data = ERR_PTR(-ENOMEM); - if (!pprev) - goto out; - - rht_for_each_continue(head, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -770,7 +736,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -780,7 +746,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -989,8 +955,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1141,8 +1107,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 172454e6b979..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -32,11 +32,6 @@ #define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 32UL -union nested_table { - union nested_table __rcu *table; - struct rhash_head __rcu *bucket; -}; - static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) @@ -81,9 +76,6 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, /* Never allocate more than 0.5 locks per bucket */ size = min_t(unsigned int, size, tbl->size >> 1); - if (tbl->nest) - size = min(size, 1U << tbl->nest); - if (sizeof(spinlock_t) != 0) { tbl->locks = NULL; #ifdef CONFIG_NUMA @@ -107,45 +99,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, return 0; } -static void nested_table_free(union nested_table *ntbl, unsigned int size) -{ - const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - const unsigned int len = 1 << shift; - unsigned int i; - - ntbl = rcu_dereference_raw(ntbl->table); - if (!ntbl) - return; - - if (size > len) { - size >>= shift; - for (i = 0; i < len; i++) - nested_table_free(ntbl + i, size); - } - - kfree(ntbl); -} - -static void nested_bucket_table_free(const struct bucket_table *tbl) -{ - unsigned int size = tbl->size >> tbl->nest; - unsigned int len = 1 << tbl->nest; - union nested_table *ntbl; - unsigned int i; - - ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); - - for (i = 0; i < len; i++) - nested_table_free(ntbl + i, size); - - kfree(ntbl); -} - static void bucket_table_free(const struct bucket_table *tbl) { - if (tbl->nest) - nested_bucket_table_free(tbl); - if (tbl) kvfree(tbl->locks); @@ -157,59 +112,6 @@ static void bucket_table_free_rcu(struct rcu_head *head) bucket_table_free(container_of(head, struct bucket_table, rcu)); } -static union nested_table *nested_table_alloc(struct rhashtable *ht, - union nested_table __rcu **prev, - unsigned int shifted, - unsigned int nhash) -{ - union nested_table *ntbl; - int i; - - ntbl = rcu_dereference(*prev); - if (ntbl) - return ntbl; - - ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); - - if (ntbl && shifted) { - for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) - INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, - (i << shifted) | nhash); - } - - rcu_assign_pointer(*prev, ntbl); - - return ntbl; -} - -static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets, - gfp_t gfp) -{ - const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - struct bucket_table *tbl; - size_t size; - - if (nbuckets < (1 << (shift + 1))) - return NULL; - - size = sizeof(*tbl) + sizeof(tbl->buckets[0]); - - tbl = kzalloc(size, gfp); - if (!tbl) - return NULL; - - if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0, 0)) { - kfree(tbl); - return NULL; - } - - tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; - - return tbl; -} - static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) @@ -224,17 +126,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); - - size = nbuckets; - - if (tbl == NULL && gfp != GFP_KERNEL) { - tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); - nbuckets = 0; - } if (tbl == NULL) return NULL; - tbl->size = size; + tbl->size = nbuckets; if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); @@ -269,17 +164,12 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, rht_dereference_rcu(old_tbl->future_tbl, ht)); - struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); - int err = -EAGAIN; + struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; + int err = -ENOENT; struct rhash_head *head, *next, *entry; spinlock_t *new_bucket_lock; unsigned int new_hash; - if (new_tbl->nest) - goto out; - - err = -ENOENT; - rht_for_each(entry, old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -312,26 +202,19 @@ out: return err; } -static int rhashtable_rehash_chain(struct rhashtable *ht, +static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); spinlock_t *old_bucket_lock; - int err; old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); spin_lock_bh(old_bucket_lock); - while (!(err = rhashtable_rehash_one(ht, old_hash))) + while (!rhashtable_rehash_one(ht, old_hash)) ; - - if (err == -ENOENT) { - old_tbl->rehash++; - err = 0; - } + old_tbl->rehash++; spin_unlock_bh(old_bucket_lock); - - return err; } static int rhashtable_rehash_attach(struct rhashtable *ht, @@ -363,17 +246,13 @@ static int rhashtable_rehash_table(struct rhashtable *ht) struct bucket_table *new_tbl; struct rhashtable_walker *walker; unsigned int old_hash; - int err; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) return 0; - for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { - err = rhashtable_rehash_chain(ht, old_hash); - if (err) - return err; - } + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) + rhashtable_rehash_chain(ht, old_hash); /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); @@ -392,16 +271,31 @@ static int rhashtable_rehash_table(struct rhashtable *ht) return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } -static int rhashtable_rehash_alloc(struct rhashtable *ht, - struct bucket_table *old_tbl, - unsigned int size) +/** + * rhashtable_expand - Expand hash table while allowing concurrent lookups + * @ht: the hash table to expand + * + * A secondary bucket array is allocated and the hash entries are migrated. + * + * This function may only be called in a context where it is safe to call + * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * + * The caller must ensure that no concurrent resizing occurs by holding + * ht->mutex. + * + * It is valid to have concurrent insertions and deletions protected by per + * bucket locks or concurrent RCU protected lookups and traversals. + */ +static int rhashtable_expand(struct rhashtable *ht) { - struct bucket_table *new_tbl; + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); int err; ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); + old_tbl = rhashtable_last_table(ht, old_tbl); + + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; @@ -430,9 +324,12 @@ static int rhashtable_rehash_alloc(struct rhashtable *ht, */ static int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); unsigned int nelems = atomic_read(&ht->nelems); unsigned int size = 0; + int err; + + ASSERT_RHT_MUTEX(ht); if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); @@ -445,7 +342,15 @@ static int rhashtable_shrink(struct rhashtable *ht) if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; - return rhashtable_rehash_alloc(ht, old_tbl, size); + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); + if (new_tbl == NULL) + return -ENOMEM; + + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } static void rht_deferred_worker(struct work_struct *work) @@ -461,14 +366,11 @@ static void rht_deferred_worker(struct work_struct *work) tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) - err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); + rhashtable_expand(ht); else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) - err = rhashtable_shrink(ht); - else if (tbl->nest) - err = rhashtable_rehash_alloc(ht, tbl, tbl->size); + rhashtable_shrink(ht); - if (!err) - err = rhashtable_rehash_table(ht); + err = rhashtable_rehash_table(ht); mutex_unlock(&ht->mutex); @@ -537,8 +439,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, int elasticity; elasticity = ht->elasticity; - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -575,7 +477,6 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, struct rhash_head *obj, void *data) { - struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -598,11 +499,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - pprev = rht_bucket_insert(ht, tbl, hash); - if (!pprev) - return ERR_PTR(-ENOMEM); - - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -612,7 +509,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -1078,7 +975,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - struct bucket_table *tbl; + const struct bucket_table *tbl; unsigned int i; cancel_work_sync(&ht->run_work); @@ -1089,7 +986,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; - for (pos = rht_dereference(*rht_bucket(tbl, i), ht), + for (pos = rht_dereference(tbl->buckets[i], ht), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1110,70 +1007,3 @@ void rhashtable_destroy(struct rhashtable *ht) return rhashtable_free_and_destroy(ht, NULL, NULL); } EXPORT_SYMBOL_GPL(rhashtable_destroy); - -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) -{ - const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull = - (struct rhash_head __rcu *)NULLS_MARKER(0); - unsigned int index = hash & ((1 << tbl->nest) - 1); - unsigned int size = tbl->size >> tbl->nest; - unsigned int subhash = hash; - union nested_table *ntbl; - - ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); - ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); - subhash >>= tbl->nest; - - while (ntbl && size > (1 << shift)) { - index = subhash & ((1 << shift) - 1); - ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); - size >>= shift; - subhash >>= shift; - } - - if (!ntbl) - return &rhnull; - - return &ntbl[subhash].bucket; - -} -EXPORT_SYMBOL_GPL(rht_bucket_nested); - -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - unsigned int index = hash & ((1 << tbl->nest) - 1); - unsigned int size = tbl->size >> tbl->nest; - union nested_table *ntbl; - unsigned int shifted; - unsigned int nhash; - - ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); - hash >>= tbl->nest; - nhash = index; - shifted = tbl->nest; - ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, nhash); - - while (ntbl && size > (1 << shift)) { - index = hash & ((1 << shift) - 1); - size >>= shift; - hash >>= shift; - nhash |= index << shifted; - shifted += shift; - ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, - nhash); - } - - if (!ntbl) - return NULL; - - return &ntbl[hash].bucket; - -} -EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); diff --git a/net/tipc/net.c b/net/tipc/net.c index ab8a2d5d1e32..28bf4feeb81c 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -110,10 +110,6 @@ int tipc_net_start(struct net *net, u32 addr) char addr_string[16]; tn->own_addr = addr; - - /* Ensure that the new address is visible before we reinit. */ - smp_mb(); - tipc_named_reinit(net); tipc_sk_reinit(net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 370a5912bcb5..800caaa699a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -384,6 +384,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; tn = net_generic(sock_net(sk), tipc_net_id); + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ sock->ops = ops; @@ -393,13 +395,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } - - /* Ensure tsk is visible before we read own_addr. */ - smp_mb(); - - tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, - NAMED_H_SIZE, 0); - msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_shutdown = 0; @@ -2274,27 +2269,24 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, void tipc_sk_reinit(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct rhashtable_iter iter; + const struct bucket_table *tbl; + struct rhash_head *pos; struct tipc_sock *tsk; struct tipc_msg *msg; + int i; - rhashtable_walk_enter(&tn->sk_rht, &iter); - - do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (tsk) - continue; - - while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; msg_set_prevnode(msg, tn->own_addr); msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - - rhashtable_walk_stop(&iter); - } while (tsk == ERR_PTR(-EAGAIN)); + } + rcu_read_unlock(); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) -- cgit From 84588a93d097bace24b9233930f82511d4f34210 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 16 Feb 2017 15:08:20 +0100 Subject: fuse: fix uninitialized flags in pipe_buffer Signed-off-by: Miklos Szeredi Fixes: d82718e348fe ("fuse_dev_splice_read(): switch to add_to_pipe()") Cc: # 4.9+ --- fs/fuse/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b656e1805f04..f11792672977 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1376,6 +1376,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, * code can Oops if the buffer persists after module unload. */ bufs[page_nr].ops = &nosteal_pipe_buf_ops; + bufs[page_nr].flags = 0; ret = add_to_pipe(pipe, &bufs[page_nr++]); if (unlikely(ret < 0)) break; -- cgit From 5d7f5ce15156af205e175e8fa5c669ba40bf0c5e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 16 Feb 2017 07:57:33 -0700 Subject: cfq-iosched: don't call wbt_disable_default() with IRQs disabled wbt_disable_default() calls del_timer_sync() to wait for the wbt timer to finish before disabling throttling. We can't do this with IRQs disable. This fixes a lockdep splat on boot, if non-root cgroups are used. Reported-by: Gabriel C Fixes: 87760e5eef35 ("block: hook up writeback throttling") Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c73a6fcaeb9d..838f07e2b64a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3758,7 +3758,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, } #ifdef CONFIG_CFQ_GROUP_IOSCHED -static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) +static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *cfqq; @@ -3775,15 +3775,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) * spuriously on a newly created cic but there's no harm. */ if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr)) - return; - - /* - * If we have a non-root cgroup, we can depend on that to - * do proper throttling of writes. Turn off wbt for that - * case, if it was enabled by default. - */ - if (nonroot_cg) - wbt_disable_default(cfqd->queue); + return nonroot_cg; /* * Drop reference to queues. New queues will be assigned in new @@ -3804,9 +3796,13 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) } cic->blkcg_serial_nr = serial_nr; + return nonroot_cg; } #else -static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } +static inline bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) +{ + return false; +} #endif /* CONFIG_CFQ_GROUP_IOSCHED */ static struct cfq_queue ** @@ -4448,11 +4444,12 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, const int rw = rq_data_dir(rq); const bool is_sync = rq_is_sync(rq); struct cfq_queue *cfqq; + bool disable_wbt; spin_lock_irq(q->queue_lock); check_ioprio_changed(cic, bio); - check_blkcg_changed(cic, bio); + disable_wbt = check_blkcg_changed(cic, bio); new_queue: cfqq = cic_to_cfqq(cic, is_sync); if (!cfqq || cfqq == &cfqd->oom_cfqq) { @@ -4488,6 +4485,10 @@ new_queue: rq->elv.priv[0] = cfqq; rq->elv.priv[1] = cfqq->cfqg; spin_unlock_irq(q->queue_lock); + + if (disable_wbt) + wbt_disable_default(q); + return 0; } -- cgit From 32b143637e8180f5d5cea54320c769210dea4f19 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Feb 2017 01:43:58 +0100 Subject: ARM: 8657/1: uaccess: consistently check object sizes In commit 76624175dcae ("arm64: uaccess: consistently check object sizes"), the object size checks are moved outside the access_ok() so that bad destinations are detected before hitting the "memset(dest, 0, size)" in the copy_from_user() failure path. This makes the same change for arm, with attention given to possibly extracting the uaccess routines into a common header file for all architectures in the future. Suggested-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Russell King --- arch/arm/include/asm/uaccess.h | 44 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1f59ea051bab..b7e0125c0bbf 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -478,11 +478,10 @@ extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) +__arch_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned int __ua_flags; - check_object_size(to, n, false); __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); @@ -495,18 +494,15 @@ extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) +__arch_copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags; - - check_object_size(from, n, true); __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else - check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } @@ -526,25 +522,49 @@ __clear_user(void __user *addr, unsigned long n) } #else -#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) -#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) +#define __arch_copy_from_user(to, from, n) \ + (memcpy(to, (void __force *)from, n), 0) +#define __arch_copy_to_user(to, from, n) \ + (memcpy((void __force *)to, from, n), 0) #define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0) #endif -static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; + + check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); + res = __arch_copy_from_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; } -static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); + + return __arch_copy_to_user(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } -- cgit From 9e3440481845b2ec22508f60837ee2cab2b6054f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Feb 2017 01:44:37 +0100 Subject: ARM: 8658/1: uaccess: fix zeroing of 64-bit get_user() The 64-bit get_user() wasn't clearing the high word due to a typo in the error handler. The exception handler entry was already correct, though. Noticed during recent usercopy test additions in lib/test_user_copy.c. Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/lib/getuser.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 8ecfd15c3a02..df73914e81c8 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -67,7 +67,7 @@ ENTRY(__get_user_4) ENDPROC(__get_user_4) ENTRY(__get_user_8) - check_uaccess r0, 8, r1, r2, __get_user_bad + check_uaccess r0, 8, r1, r2, __get_user_bad8 #ifdef CONFIG_THUMB2_KERNEL 5: TUSER(ldr) r2, [r0] 6: TUSER(ldr) r3, [r0, #4] -- cgit From d74c67dd7800fc7aae381f272875c337f268806c Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 15 Feb 2017 11:28:45 +0900 Subject: drm/radeon: Use mode h/vdisplay fields to hide out of bounds HW cursor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The crtc_h/vdisplay fields may not match the CRTC viewport dimensions with special modes such as interlaced ones. Fixes the HW cursor disappearing in the bottom half of the screen with interlaced modes. Fixes: 6b16cf7785a4 ("drm/radeon: Hide the HW cursor while it's out of bounds") Cc: stable@vger.kernel.org Reported-by: Ashutosh Kumar Tested-by: Sonny Jiang Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index fb16070b266e..4a4f9533c53b 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,8 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) } if (x <= (crtc->x - w) || y <= (crtc->y - radeon_crtc->cursor_height) || - x >= (crtc->x + crtc->mode.crtc_hdisplay) || - y >= (crtc->y + crtc->mode.crtc_vdisplay)) + x >= (crtc->x + crtc->mode.hdisplay) || + y >= (crtc->y + crtc->mode.vdisplay)) goto out_of_bounds; x += xorigin; -- cgit From 5a81e6a171cdbd1fa8bc1fdd80c23d3d71816fac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 16 Feb 2017 17:49:02 +0100 Subject: vfs: fix uninitialized flags in splice_to_pipe() Flags (PIPE_BUF_FLAG_PACKET, PIPE_BUF_FLAG_GIFT) could remain on the unused part of the pipe ring buffer. Previously splice_to_pipe() left the flags value alone, which could result in incorrect behavior. Uninitialized flags appears to have been there from the introduction of the splice syscall. Signed-off-by: Miklos Szeredi Cc: # 2.6.17+ Signed-off-by: Linus Torvalds --- fs/splice.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/splice.c b/fs/splice.c index 873d83104e79..4ef78aa8ef61 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -204,6 +204,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, buf->len = spd->partial[page_nr].len; buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; + buf->flags = 0; pipe->nrbufs++; page_nr++; -- cgit From 558e8e27e73f53f8a512485be538b07115fe5f3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Feb 2017 12:19:18 -0800 Subject: Revert "nohz: Fix collision between tick and other hrtimers" This reverts commit 24b91e360ef521a2808771633d76ebc68bd5604b and commit 7bdb59f1ad47 ("tick/nohz: Fix possible missing clock reprog after tick soft restart") that depends on it, Pavel reports that it causes occasional boot hangs for him that seem to depend on just how the machine was booted. In particular, his machine hangs at around the PCI fixups of the EHCI USB host controller, but only hangs from cold boot, not from a warm boot. Thomas Gleixner suspecs it's a CPU hotplug interaction, particularly since Pavel also saw suspend/resume issues that seem to be related. We're reverting for now while trying to figure out the root cause. Reported-bisected-and-tested-by: Pavel Machek Acked-by: Frederic Weisbecker Cc: Wanpeng Li Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: stable@kernel.org # reverted commits were marked for stable Signed-off-by: Linus Torvalds --- kernel/time/tick-sched.c | 14 ++------------ kernel/time/tick-sched.h | 2 -- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fc6f740d0277..2c115fdab397 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -725,11 +725,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, */ if (delta == 0) { tick_nohz_restart(ts, now); - /* - * Make sure next tick stop doesn't get fooled by past - * clock deadline - */ - ts->next_tick = 0; goto out; } } @@ -772,7 +767,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, tick = expires; /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && (expires == ts->next_tick)) + if (ts->tick_stopped && (expires == dev->next_event)) goto out; /* @@ -792,8 +787,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, trace_tick_stop(1, TICK_DEP_MASK_NONE); } - ts->next_tick = tick; - /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. @@ -809,10 +802,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, else tick_program_event(tick, 1); out: - /* - * Update the estimated sleep length until the next timer - * (not only the tick). - */ + /* Update the estimated sleep length */ ts->sleep_length = ktime_sub(dev->next_event, now); return tick; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 075444e3d48e..bf38226e5c17 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -27,7 +27,6 @@ enum tick_nohz_mode { * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline * when the CPU returns from nohz sleep. - * @next_tick: Next tick to be fired when in dynticks mode. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -45,7 +44,6 @@ struct tick_sched { unsigned long check_clocks; enum tick_nohz_mode nohz_mode; ktime_t last_tick; - ktime_t next_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; -- cgit From dd62245e73de9138333cb0e7a42c8bc1215c3ce6 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 27 Dec 2016 17:57:04 -0500 Subject: NTB: ntb_transport: fix debugfs_remove_recursive The call to debugfs_remove_recursive(qp->debugfs_dir) of the sub-level directory must not be later than debugfs_remove_recursive(nt_debugfs_dir) of the top-level directory. Otherwise, the sub-level directory will not exist, and it would be invalid (panic) to attempt to remove it. This removes the top-level directory last, after sub-level directories have been cleaned up. Signed-off-by: Allen Hubbe Fixes: e26a5843f ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index f81aa4b18d9f..0e18adb51e4e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -2273,9 +2273,8 @@ module_init(ntb_transport_init); static void __exit ntb_transport_exit(void) { - debugfs_remove_recursive(nt_debugfs_dir); - ntb_unregister_client(&ntb_transport_client); bus_unregister(&ntb_transport_bus); + debugfs_remove_recursive(nt_debugfs_dir); } module_exit(ntb_transport_exit); -- cgit From 9644347c5240d0ee3ba7472ef332aaa4ff4db398 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 30 Jan 2017 14:21:17 -0700 Subject: ntb: ntb_perf missing dmaengine_unmap_put In the normal I/O execution path, ntb_perf is missing a call to dmaengine_unmap_put() after submission. That causes us to leak unmap objects. Signed-off-by: Dave Jiang Fixes: 8a7b6a77 ("ntb: ntb perf tool") Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index e75d4fdc0866..434e1d474f33 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -265,6 +265,8 @@ static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, if (dma_submit_error(cookie)) goto err_set_unmap; + dmaengine_unmap_put(unmap); + atomic_inc(&pctx->dma_sync); dma_async_issue_pending(chan); -- cgit From 8fcd0950c021d7be8493280541332b924b9de962 Mon Sep 17 00:00:00 2001 From: Thomas VanSelus Date: Mon, 13 Feb 2017 16:46:26 -0600 Subject: ntb_transport: Pick an unused queue Fix typo causing ntb_transport_create_queue to select the first queue every time, instead of using the next free queue. Signed-off-by: Thomas VanSelus Signed-off-by: Aaron Sierra Acked-by: Allen Hubbe Fixes: fce8a7bb5 ("PCI-Express Non-Transparent Bridge Support") Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 0e18adb51e4e..02ca45fdd892 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1802,7 +1802,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, node = dev_to_node(&ndev->dev); - free_queue = ffs(nt->qp_bitmap); + free_queue = ffs(nt->qp_bitmap_free); if (!free_queue) goto err; -- cgit From 939ada5fb587840ae4db47846087be4162477b13 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 16 Feb 2017 16:22:36 -0700 Subject: ntb: ntb_hw_intel: link_poll isn't clearing the pending status properly On Skylake hardware, the link_poll isn't clearing the pending interrupt bit. Adding a new function for SKX that handles clearing of status bit the right way. Signed-off-by: Dave Jiang Fixes: 783dfa6c ("ntb: Adding Skylake Xeon NTB support") Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index eca9688bf9d9..c00238491673 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1629,6 +1629,28 @@ static void atom_deinit_dev(struct intel_ntb_dev *ndev) /* Skylake Xeon NTB */ +static int skx_poll_link(struct intel_ntb_dev *ndev) +{ + u16 reg_val; + int rc; + + ndev->reg->db_iowrite(ndev->db_link_mask, + ndev->self_mmio + + ndev->self_reg->db_clear); + + rc = pci_read_config_word(ndev->ntb.pdev, + SKX_LINK_STATUS_OFFSET, ®_val); + if (rc) + return 0; + + if (reg_val == ndev->lnk_sta) + return 0; + + ndev->lnk_sta = reg_val; + + return 1; +} + static u64 skx_db_ioread(void __iomem *mmio) { return ioread64(mmio); @@ -2852,7 +2874,7 @@ static struct intel_b2b_addr xeon_b2b_dsd_addr = { }; static const struct intel_ntb_reg skx_reg = { - .poll_link = xeon_poll_link, + .poll_link = skx_poll_link, .link_is_up = xeon_link_is_up, .db_ioread = skx_db_ioread, .db_iowrite = skx_db_iowrite, -- cgit From e5a1dadec3648019a838b85357b67f241fbb02e8 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 15 Feb 2017 19:15:51 +0100 Subject: reset: fix shared reset triggered_count decrement on error For a shared reset, when the reset is successful, the triggered_count is incremented when trying to call the reset callback, so that another device sharing the same reset line won't trigger it again. If the reset has not been triggered successfully, the trigger_count should be decremented. The code does the opposite, and decrements the trigger_count on success. As a consequence, another device sharing the reset will be able to trigger it again. Fixed be removing negation in from of the error code of the reset function. Fixes: 7da33a37b48f ("reset: allow using reset_control_reset with shared reset") Signed-off-by: Jerome Brunet Acked-by: Martin Blumenstingl Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 10368ed8fd13..b6f5f1e1826c 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -163,7 +163,7 @@ int reset_control_reset(struct reset_control *rstc) } ret = rstc->rcdev->ops->reset(rstc->rcdev, rstc->id); - if (rstc->shared && !ret) + if (rstc->shared && ret) atomic_dec(&rstc->triggered_count); return ret; -- cgit From 2bd624b4611ffee36422782d16e1c944d1351e98 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 15 Feb 2017 20:25:39 +0000 Subject: packet: Do not call fanout_release from atomic contexts Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev"), unfortunately, introduced the following issues. 1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside rcu_read-side critical section. rcu_read_lock disables preemption, most often, which prohibits calling sleeping functions. [ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section! [ ] [ ] rcu_scheduler_active = 1, debug_locks = 0 [ ] 4 locks held by ovs-vswitchd/1969: [ ] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ ] #1: (ovs_mutex){+.+.+.}, at: [] ovs_vport_cmd_del+0x4a/0x100 [openvswitch] [ ] #2: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ ] #3: (rcu_read_lock){......}, at: [] packet_notifier+0x5/0x3f0 [ ] [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] lockdep_rcu_suspicious+0x107/0x110 [ ] [] ___might_sleep+0x57/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] ? vprintk_default+0x1f/0x30 [ ] [] ? printk+0x4d/0x4f [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock). "sleeping function called from invalid context" [ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] ___might_sleep+0x202/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 3. calling dev_remove_pack(&fanout->prot_hook), from inside spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack() -> synchronize_net(), which might sleep. [ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002 [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] __schedule_bug+0x64/0x73 [ ] [] __schedule+0x6b/0xd10 [ ] [] schedule+0x6b/0x80 [ ] [] schedule_timeout+0x38d/0x410 [ ] [] synchronize_sched_expedited+0x53d/0x810 [ ] [] synchronize_rcu_expedited+0xe/0x10 [ ] [] synchronize_net+0x35/0x50 [ ] [] dev_remove_pack+0x13/0x20 [ ] [] fanout_release+0xbe/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 4. fanout_release() races with calls from different CPU. To fix the above problems, remove the call to fanout_release() under rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to __fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure fanout->prot_hook is removed as well. Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev") Reported-by: Eric Dumazet Signed-off-by: Anoob Soman Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0f03f6a53b4d..70f5b6a4683c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; + if (f->num_members == 1) + dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; + if (f->num_members == 0) + __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; - dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1718,7 +1721,12 @@ out: return err; } -static void fanout_release(struct sock *sk) +/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes + * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. + * It is the responsibility of the caller to call fanout_release_data() and + * free the returned packet_fanout (after synchronize_net()) + */ +static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; @@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *sk) if (f) { po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { + if (atomic_dec_and_test(&f->sk_ref)) list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); - } + else + f = NULL; if (po->rollover) kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); + + return f; } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, @@ -2912,6 +2920,7 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; @@ -2951,9 +2960,14 @@ static int packet_release(struct socket *sock) packet_set_ring(sk, &req_u, 1, 1); } - fanout_release(sk); + f = fanout_release(sk); synchronize_net(); + + if (f) { + fanout_release_data(f); + kfree(f); + } /* * Now the socket is dead. No more input will appear. */ @@ -3905,7 +3919,6 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit From 785f35775d968e0f45231b754e945fcb3ed6bded Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Feb 2017 12:56:10 +0300 Subject: dpaa_eth: small leak on error This should be >= instead of > here. It means that we don't increment the free count enough so it becomes off by one. Fixes: 9ad1a3749333 ("dpaa_eth: add support for DPAA Ethernet") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index c9b7ad65e563..726b5693ae8a 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1668,7 +1668,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, free_buffers: /* compensate sw bpool counter changes */ - for (i--; i > 0; i--) { + for (i--; i >= 0; i--) { dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); if (dpaa_bp) { count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); -- cgit From 5edabca9d4cff7f1f2b68f0bac55ef99d9798ba4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 16 Feb 2017 17:22:46 +0100 Subject: dccp: fix freeing skb too early for IPV6_RECVPKTINFO In the current DCCP implementation an skb for a DCCP_PKT_REQUEST packet is forcibly freed via __kfree_skb in dccp_rcv_state_process if dccp_v6_conn_request successfully returns. However, if IPV6_RECVPKTINFO is set on a socket, the address of the skb is saved to ireq->pktopts and the ref count for skb is incremented in dccp_v6_conn_request, so skb is still in use. Nevertheless, it gets freed in dccp_rcv_state_process. Fix by calling consume_skb instead of doing goto discard and therefore calling __kfree_skb. Similar fixes for TCP: fb7e2399ec17f1004c0e0ccfd17439f8759ede01 [TCP]: skb is unexpectedly freed. 0aea76d35c9651d55bbaf746e7914e5f9ae5a25d tcp: SYN packets are now simply consumed Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dccp/input.c b/net/dccp/input.c index ba347184bda9..8fedc2d49770 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - goto discard; + consume_skb(skb); + return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; -- cgit From 22f0708a718daea5e79de2d29b4829de016a4ff4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 17 Feb 2017 19:14:27 +0100 Subject: vxlan: fix oops in dev_fill_metadata_dst Since the commit 0c1d70af924b ("net: use dst_cache for vxlan device") vxlan_fill_metadata_dst() calls vxlan_get_route() passing a NULL dst_cache pointer, so the latter should explicitly check for valid dst_cache ptr. Unfortunately the commit d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") removed said check. As a result is possible to trigger a null pointer access calling vxlan_fill_metadata_dst(), e.g. with: ovs-vsctl add-br ovs-br0 ovs-vsctl add-port ovs-br0 vxlan0 -- set interface vxlan0 \ type=vxlan options:remote_ip=192.168.1.1 \ options:key=1234 options:dst_port=4789 ofport_request=10 ip address add dev ovs-br0 172.16.1.2/24 ovs-vsctl set Bridge ovs-br0 ipfix=@i -- --id=@i create IPFIX \ targets=\"172.16.1.1:1234\" sampling=1 iperf -c 172.16.1.1 -u -l 1000 -b 10M -t 1 -p 1234 This commit addresses the issue passing to vxlan_get_route() the dst_cache already available into the lwt info processed by vxlan_fill_metadata_dst(). Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") Signed-off-by: Paolo Abeni Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 50b62db213b0..30b04cf2bb1e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2439,7 +2439,8 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, - &info->key.u.ipv4.src, dport, sport, NULL, info); + &info->key.u.ipv4.src, dport, sport, + &info->dst_cache, info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); @@ -2450,7 +2451,8 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, - &info->key.u.ipv6.src, dport, sport, NULL, info); + &info->key.u.ipv6.src, dport, sport, + &info->dst_cache, info); if (IS_ERR(ndst)) return PTR_ERR(ndst); dst_release(ndst); -- cgit From 4c03b862b12f980456f9de92db6d508a4999b788 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Feb 2017 16:19:39 -0500 Subject: irda: Fix lockdep annotations in hashbin_delete(). A nested lock depth was added to the hasbin_delete() code but it doesn't actually work some well and results in tons of lockdep splats. Fix the code instead to properly drop the lock around the operation and just keep peeking the head of the hashbin queue. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/irda/irqueue.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index acbe61c7e683..160dc89335e2 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -383,9 +383,6 @@ EXPORT_SYMBOL(hashbin_new); * for deallocating this structure if it's complex. If not the user can * just supply kfree, which should take care of the job. */ -#ifdef CONFIG_LOCKDEP -static int hashbin_lock_depth = 0; -#endif int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) { irda_queue_t* queue; @@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;); /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags, - hashbin_lock_depth++); - } + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); /* * Free the entries in the hashbin, TODO: use hashbin_clear when * it has been shown to work */ for (i = 0; i < HASHBIN_SIZE; i ++ ) { - queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); - while (queue ) { - if (free_func) - (*free_func)(queue); - queue = dequeue_first( - (irda_queue_t**) &hashbin->hb_queue[i]); + while (1) { + queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); + + if (!queue) + break; + + if (free_func) { + if (hashbin->hb_type & HB_LOCK) + spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); + free_func(queue); + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); + } } } @@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) hashbin->magic = ~HB_MAGIC; /* Release lock */ - if ( hashbin->hb_type & HB_LOCK) { + if (hashbin->hb_type & HB_LOCK) spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); -#ifdef CONFIG_LOCKDEP - hashbin_lock_depth--; -#endif - } /* * Free the hashbin structure -- cgit From 69e05170ef0d0c00e1098dd6a625b44f39903a6a Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Mon, 2 Jan 2017 14:35:05 -0600 Subject: ARM: multi_v7_defconfig: enable Qualcomm RPMCC This patch enables the Qualcomm RPM based Clock Controller present on A-family boards. Signed-off-by: Andy Gross Acked-by: Bjorn Andersson Signed-off-by: Olof Johansson Signed-off-by: Arnd Bergmann --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 028d2b70e3b5..f57ec511e7ae 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -824,6 +824,7 @@ CONFIG_QCOM_SMSM=y CONFIG_QCOM_WCNSS_CTRL=m CONFIG_ROCKCHIP_PM_DOMAINS=y CONFIG_COMMON_CLK_QCOM=y +CONFIG_QCOM_CLK_RPM=y CONFIG_CHROME_PLATFORMS=y CONFIG_STAGING_BOARD=y CONFIG_CROS_EC_CHARDEV=m -- cgit From fc98c3c8c9dcafd67adcce69e6ce3191d5306c9c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 18 Feb 2017 03:42:54 -0800 Subject: printk: use rcuidle console tracepoint Use rcuidle console tracepoint because, apparently, it may be issued from an idle CPU: hw-breakpoint: Failed to enable monitor mode on CPU 0. hw-breakpoint: CPU 0 failed to disable vector catch =============================== [ ERR: suspicious RCU usage. ] 4.10.0-rc8-next-20170215+ #119 Not tainted ------------------------------- ./include/trace/events/printk.h:32 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 2, debug_locks = 0 RCU used illegally from extended quiescent state! 2 locks held by swapper/0/0: #0: (cpu_pm_notifier_lock){......}, at: [] cpu_pm_exit+0x10/0x54 #1: (console_lock){+.+.+.}, at: [] vprintk_emit+0x264/0x474 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc8-next-20170215+ #119 Hardware name: Generic OMAP4 (Flattened Device Tree) console_unlock vprintk_emit vprintk_default printk reset_ctrl_regs dbg_cpu_pm_notify notifier_call_chain cpu_pm_exit omap_enter_idle_coupled cpuidle_enter_state cpuidle_enter_state_coupled do_idle cpu_startup_entry start_kernel This RCU warning, however, is suppressed by lockdep_off() in printk(). lockdep_off() increments the ->lockdep_recursion counter and thus disables RCU_LOCKDEP_WARN() and debug_lockdep_rcu_enabled(), which want lockdep to be enabled "current->lockdep_recursion == 0". Link: http://lkml.kernel.org/r/20170217015932.11898-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Reported-by: Tony Lindgren Tested-by: Tony Lindgren Acked-by: Paul E. McKenney Acked-by: Steven Rostedt (VMware) Cc: Petr Mladek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Lindgren Cc: Russell King Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8b2696420abb..4ba3d34938c0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1516,7 +1516,7 @@ static void call_console_drivers(int level, { struct console *con; - trace_console(text, len); + trace_console_rcuidle(text, len); if (!console_drivers) return; -- cgit From 00ea1ceebe0d9f2dc1cc2b7bd575a00100c27869 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 18 Feb 2017 19:00:45 -0500 Subject: ipv6: release dst on error in ip6_dst_lookup_tail If ip6_dst_lookup_tail has acquired a dst and fails the IPv4-mapped check, release the dst before returning an error. Fixes: ec5e3b0a1d41 ("ipv6: Inhibit IPv4-mapped src address on the wire.") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e164684456df..7cebee58e55b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1022,8 +1022,10 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && - !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) - return -EAFNOSUPPORT; + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { + err = -EAFNOSUPPORT; + goto out_err_release; + } return 0; -- cgit From fd3fc0b4d7305fa7246622dcc0dec69c42443f45 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 31 Jan 2017 10:16:00 +0100 Subject: scsi: don't BUG_ON() empty DMA transfers Don't crash the machine just because of an empty transfer. Use WARN_ON() combined with returning an error. Found by Dmitry Vyukov and syzkaller. [ Changed to "WARN_ON_ONCE()". Al has a patch that should fix the root cause, but a BUG_ON() is not acceptable in any case, and a WARN_ON() might still be a cause of excessive log spamming. NOTE! If this warning ever triggers, we may end up leaking resources, since this doesn't bother to try to clean the command up. So this WARN_ON_ONCE() triggering does imply real problems. But BUG_ON() is much worse. People really need to stop using BUG_ON() for "this shouldn't ever happen". It makes pretty much any bug worse. - Linus ] Signed-off-by: Johannes Thumshirn Reported-by: Dmitry Vyukov Cc: James Bottomley Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/scsi/scsi_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9e1e141af9c..78db07fd8055 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1040,7 +1040,8 @@ int scsi_init_io(struct scsi_cmnd *cmd) bool is_mq = (rq->mq_ctx != NULL); int error; - BUG_ON(!blk_rq_nr_phys_segments(rq)); + if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) + return -EINVAL; error = scsi_init_sgtable(rq, &cmd->sdb); if (error) -- cgit From 137d01df511b3afe1f05499aea05f3bafc0fb221 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Feb 2017 07:15:27 +0000 Subject: Fix missing sanity check in /dev/sg What happens is that a write to /dev/sg is given a request with non-zero ->iovec_count combined with zero ->dxfer_len. Or with ->dxferp pointing to an array full of empty iovecs. Having write permission to /dev/sg shouldn't be equivalent to the ability to trigger BUG_ON() while holding spinlocks... Found by Dmitry Vyukov and syzkaller. [ The BUG_ON() got changed to a WARN_ON_ONCE(), but this fixes the underlying issue. - Linus ] Signed-off-by: Al Viro Reported-by: Dmitry Vyukov Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- drivers/scsi/sg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index dbe5b4b95df0..121de0aaa6ad 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1753,6 +1753,10 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) return res; iov_iter_truncate(&i, hp->dxfer_len); + if (!iov_iter_count(&i)) { + kfree(iov); + return -EINVAL; + } res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC); kfree(iov); -- cgit From c470abd4fde40ea6a0846a2beab642a578c0b8cd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Feb 2017 14:34:00 -0800 Subject: Linux 4.10 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 503dae1de8ef..f1e6a02a0c19 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Fearless Coyote # *DOCUMENTATION* -- cgit From fabeb165afd52a3fb05b9b68e5a3550609b8e157 Mon Sep 17 00:00:00 2001 From: Michał Kępień Date: Wed, 8 Mar 2017 09:20:10 -0800 Subject: Input: sparse-keymap - use a managed allocation for keymap copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platform drivers use devm_input_allocate_device() together with sparse_keymap_setup() in their .probe callbacks. While using the former simplifies error handling, using the latter necessitates calling sparse_keymap_free() in the error path and upon module unloading to avoid leaking the copy of the keymap allocated by sparse_keymap_setup(). To help prevent such leaks and enable simpler error handling, make sparse_keymap_setup() use devm_kmemdup() to create the keymap copy so that it gets automatically freed. This works for both managed and non-managed input devices as the keymap is freed after the last reference to the input device is dropped. Note that actions previously taken by sparse_keymap_free(), i.e. taking the input device's mutex and zeroing its keycode and keycodemax fields, are now redundant because the managed keymap will always be freed after the input device is unregistered. Signed-off-by: Michał Kępień Signed-off-by: Dmitry Torokhov --- drivers/input/sparse-keymap.c | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c index e7409c45bdd0..12a3ad83296d 100644 --- a/drivers/input/sparse-keymap.c +++ b/drivers/input/sparse-keymap.c @@ -160,12 +160,12 @@ static int sparse_keymap_setkeycode(struct input_dev *dev, * @keymap: Keymap in form of array of &key_entry structures ending * with %KE_END type entry * @setup: Function that can be used to adjust keymap entries - * depending on device's deeds, may be %NULL + * depending on device's needs, may be %NULL * * The function calculates size and allocates copy of the original * keymap after which sets up input device event bits appropriately. - * Before destroying input device allocated keymap should be freed - * with a call to sparse_keymap_free(). + * The allocated copy of the keymap is automatically freed when it + * is no longer needed. */ int sparse_keymap_setup(struct input_dev *dev, const struct key_entry *keymap, @@ -180,19 +180,18 @@ int sparse_keymap_setup(struct input_dev *dev, for (e = keymap; e->type != KE_END; e++) map_size++; - map = kcalloc(map_size, sizeof(struct key_entry), GFP_KERNEL); + map = devm_kmemdup(&dev->dev, keymap, map_size * sizeof(*map), + GFP_KERNEL); if (!map) return -ENOMEM; - memcpy(map, keymap, map_size * sizeof(struct key_entry)); - for (i = 0; i < map_size; i++) { entry = &map[i]; if (setup) { error = setup(dev, entry); if (error) - goto err_out; + return error; } switch (entry->type) { @@ -221,10 +220,6 @@ int sparse_keymap_setup(struct input_dev *dev, dev->setkeycode = sparse_keymap_setkeycode; return 0; - - err_out: - kfree(map); - return error; } EXPORT_SYMBOL(sparse_keymap_setup); @@ -232,29 +227,13 @@ EXPORT_SYMBOL(sparse_keymap_setup); * sparse_keymap_free - free memory allocated for sparse keymap * @dev: Input device using sparse keymap * - * This function is used to free memory allocated by sparse keymap + * This function used to free memory allocated by sparse keymap * in an input device that was set up by sparse_keymap_setup(). - * NOTE: It is safe to cal this function while input device is - * still registered (however the drivers should care not to try to - * use freed keymap and thus have to shut off interrupts/polling - * before freeing the keymap). + * Since sparse_keymap_setup() now uses a managed allocation for the + * keymap copy, use of this function is deprecated. */ void sparse_keymap_free(struct input_dev *dev) { - unsigned long flags; - - /* - * Take event lock to prevent racing with input_get_keycode() - * and input_set_keycode() if we are called while input device - * is still registered. - */ - spin_lock_irqsave(&dev->event_lock, flags); - - kfree(dev->keycode); - dev->keycode = NULL; - dev->keycodemax = 0; - - spin_unlock_irqrestore(&dev->event_lock, flags); } EXPORT_SYMBOL(sparse_keymap_free); -- cgit