From e63b063ecd248ad9f54a961ddf2a6d97da944456 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Sun, 12 May 2019 22:57:43 +0200 Subject: [PATCH 001/207] clk: meson: fix MPLL 50M binding id typo MPLL_5OM (the capital letter o) should indeed be MPLL_50M (the number) Fix this before it gets used. Fixes: 25db146aa726 ("dt-bindings: clk: meson: add g12a periph clock controller bindings") Reported-by: Martin Blumenstingl Acked-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Signed-off-by: Jerome Brunet --- drivers/clk/meson/g12a.c | 4 ++-- drivers/clk/meson/g12a.h | 2 +- include/dt-bindings/clock/g12a-clkc.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 739f64fdf1e3..206fafd299ea 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -2734,8 +2734,8 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = { [CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw, [CLKID_MALI_1] = &g12a_mali_1.hw, [CLKID_MALI] = &g12a_mali.hw, - [CLKID_MPLL_5OM_DIV] = &g12a_mpll_50m_div.hw, - [CLKID_MPLL_5OM] = &g12a_mpll_50m.hw, + [CLKID_MPLL_50M_DIV] = &g12a_mpll_50m_div.hw, + [CLKID_MPLL_50M] = &g12a_mpll_50m.hw, [CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw, [CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw, [CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw, diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h index 39c41af70804..bcc05cd9882f 100644 --- a/drivers/clk/meson/g12a.h +++ b/drivers/clk/meson/g12a.h @@ -166,7 +166,7 @@ #define CLKID_HDMI_DIV 167 #define CLKID_MALI_0_DIV 170 #define CLKID_MALI_1_DIV 173 -#define CLKID_MPLL_5OM_DIV 176 +#define CLKID_MPLL_50M_DIV 176 #define CLKID_SYS_PLL_DIV16_EN 178 #define CLKID_SYS_PLL_DIV16 179 #define CLKID_CPU_CLK_DYN0_SEL 180 diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h index 82c9e0c020b2..e10470ed7c4f 100644 --- a/include/dt-bindings/clock/g12a-clkc.h +++ b/include/dt-bindings/clock/g12a-clkc.h @@ -130,7 +130,7 @@ #define CLKID_MALI_1_SEL 172 #define CLKID_MALI_1 174 #define CLKID_MALI 175 -#define CLKID_MPLL_5OM 177 +#define CLKID_MPLL_50M 177 #define CLKID_CPU_CLK 187 #define CLKID_PCIE_PLL 201 #define CLKID_VDEC_1 204 From 3ff46efbcd90d3d469de8eddaf03d12293aaa50c Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 12 May 2019 21:43:00 +0200 Subject: [PATCH 002/207] clk: meson: meson8b: fix a typo in the VPU parent names array variable The variable which holds the parent names for the VPU clocks has a typo in it. Fix this typo to make the variable naming in the driver consistent. No functional changes. Fixes: 41785ce562491d ("clk: meson: meson8b: add the VPU clock trees") Signed-off-by: Martin Blumenstingl Signed-off-by: Jerome Brunet --- drivers/clk/meson/meson8b.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index 37cf0f01bb5d..62cd3a7f1f65 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -1761,7 +1761,7 @@ static struct clk_regmap meson8m2_gp_pll = { }, }; -static const char * const mmeson8b_vpu_0_1_parent_names[] = { +static const char * const meson8b_vpu_0_1_parent_names[] = { "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7" }; @@ -1778,8 +1778,8 @@ static struct clk_regmap meson8b_vpu_0_sel = { .hw.init = &(struct clk_init_data){ .name = "vpu_0_sel", .ops = &clk_regmap_mux_ops, - .parent_names = mmeson8b_vpu_0_1_parent_names, - .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), + .parent_names = meson8b_vpu_0_1_parent_names, + .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names), .flags = CLK_SET_RATE_PARENT, }, }; @@ -1837,8 +1837,8 @@ static struct clk_regmap meson8b_vpu_1_sel = { .hw.init = &(struct clk_init_data){ .name = "vpu_1_sel", .ops = &clk_regmap_mux_ops, - .parent_names = mmeson8b_vpu_0_1_parent_names, - .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), + .parent_names = meson8b_vpu_0_1_parent_names, + .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names), .flags = CLK_SET_RATE_PARENT, }, }; From 01dfdd7b4693496854ac92d1ebfb18d7b108f777 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 20 Apr 2019 11:32:57 +0200 Subject: [PATCH 003/207] ARM: dts: meson8: fix GPU interrupts and drop an undocumented property The interrupts in Amlogic's vendor kernel sources are all contiguous. There are two typos leading to pp2 and pp4 as well as ppmmu2 and ppmmu4 incorrectly sharing the same interrupt line. Fix this by using interrupt 170 for pp2 and 171 for ppmmu2. Also drop the undocumented "switch-delay" which is a left-over from my experiments with an early lima kernel driver when it was still out-of-tree and required this property on Amlogic SoCs. Fixes: 7d3f6b536e72c9 ("ARM: dts: meson8: add the Mali-450 MP6 GPU") Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 7ef442462ea4..40c11b6b217a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -248,8 +248,8 @@ mali: gpu@c0000 { , , , - , - , + , + , , , , @@ -264,7 +264,6 @@ mali: gpu@c0000 { clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ From f3b7cbe2200f867e167ae701d7164b58406e9c90 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 20 Apr 2019 11:32:58 +0200 Subject: [PATCH 004/207] ARM: dts: meson8b: drop undocumented property from the Mali GPU node Drop the undocumented "switch-delay" which is a left-over from my experiments with an early lima kernel driver when it was still out-of-tree and required this property on Amlogic SoCs. Fixes: c3ea80b6138cae ("ARM: dts: meson8b: add the Mali-450 MP2 GPU") Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8b.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 800cd65fc50a..4b919590dae5 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -229,7 +229,6 @@ mali: gpu@c0000 { clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ From 26d65140e92a626e39c73c9abf769fd174bf5076 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 12 May 2019 21:39:36 +0200 Subject: [PATCH 005/207] ARM: dts: meson8b: fix the operating voltage of the Mali GPU Amlogic's vendor kernel defines an OPP for the GPU on Meson8b boards with a voltage of 1.15V. It turns out that the vendor kernel relies on the bootloader to set up the voltage. The bootloader however sets a fixed voltage of 1.10V. Amlogic's patched u-boot sources (uboot-2015-01-15-23a3562521) confirm this: $ grep -oiE "VDD(EE|AO)_VOLTAGE[ ]+[0-9]+" board/amlogic/configs/m8b_* board/amlogic/configs/m8b_m100_v1.h:VDDAO_VOLTAGE 1100 board/amlogic/configs/m8b_m101_v1.h:VDDAO_VOLTAGE 1100 board/amlogic/configs/m8b_m102_v1.h:VDDAO_VOLTAGE 1100 board/amlogic/configs/m8b_m200_v1.h:VDDAO_VOLTAGE 1100 board/amlogic/configs/m8b_m201_v1.h:VDDEE_VOLTAGE 1100 board/amlogic/configs/m8b_m201_v1.h:VDDEE_VOLTAGE 1100 board/amlogic/configs/m8b_m202_v1.h:VDDEE_VOLTAGE 1100 Another hint at this is the VDDEE voltage on the EC-100 and Odroid-C1 boards. The VDDEE regulator supplies the Mali GPU. It's basically a copy of the VCCK (CPU supply) which means it's limited to 0.86V to 1.14V. Update the operating voltage of the Mali GPU on Meson8b to 1.10V so it matches with what the vendor u-boot sets. Fixes: c3ea80b6138cae ("ARM: dts: meson8b: add the Mali-450 MP2 GPU") Signed-off-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8b.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 4b919590dae5..ec67f49116d9 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -163,23 +163,23 @@ gpu_opp_table: gpu-opp-table { opp-255000000 { opp-hz = /bits/ 64 <255000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-364300000 { opp-hz = /bits/ 64 <364300000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-425000000 { opp-hz = /bits/ 64 <425000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-510000000 { opp-hz = /bits/ 64 <510000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-637500000 { opp-hz = /bits/ 64 <637500000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; turbo-mode; }; }; From b2b5921fe4b363ff29fea9183aca089231a6bafc Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 21 May 2019 10:43:35 +0200 Subject: [PATCH 006/207] mtd: rawnand: initialize ntargets with maxchips memorg->ntargets is initialized with '1'. It should be initialized with the maxchips argument from nand_scan() instead. Otherwise multi chip support errors out on the secondary chip selects when trying to call nand_reset() on them: WARNING: CPU: 0 PID: 1 at drivers/mtd/nand/raw/internals.h:114 nand_reset_op+0x194/0x1c4 With this memorg->ntargets is initialized with the maximum number of chip selects supported by the driver. After having detected the number of actually connected chips memory->ntargets is updated with that number. Fixes: 32813e288414 ("mtd: rawnand: Get rid of chip->numchips") Signed-off-by: Sascha Hauer Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 2cf71060d6f8..3a14b79ddfae 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4666,7 +4666,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) memorg = nanddev_get_memorg(&chip->base); memorg->planes_per_lun = 1; memorg->luns_per_target = 1; - memorg->ntargets = 1; /* * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx) @@ -5031,6 +5030,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, if (ret) return ret; + memorg->ntargets = maxchips; + /* Read the flash type */ ret = nand_detect(chip, table); if (ret) { From 7b785645e8f13e17cbce492708cf6e7039d32e46 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 24 May 2019 10:12:46 -0400 Subject: [PATCH 007/207] mm: fix page cache convergence regression Since a28334862993 ("page cache: Finish XArray conversion"), on most major Linux distributions, the page cache doesn't correctly transition when the hot data set is changing, and leaves the new pages thrashing indefinitely instead of kicking out the cold ones. On a freshly booted, freshly ssh'd into virtual machine with 1G RAM running stock Arch Linux: [root@ham ~]# ./reclaimtest.sh + dd of=workingset-a bs=1M count=0 seek=600 + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + ./mincore workingset-a 153600/153600 workingset-a + dd of=workingset-b bs=1M count=0 seek=600 + cat workingset-b + cat workingset-b + cat workingset-b + cat workingset-b + ./mincore workingset-a workingset-b 104029/153600 workingset-a 120086/153600 workingset-b + cat workingset-b + cat workingset-b + cat workingset-b + cat workingset-b + ./mincore workingset-a workingset-b 104029/153600 workingset-a 120268/153600 workingset-b workingset-b is a 600M file on a 1G host that is otherwise entirely idle. No matter how often it's being accessed, it won't get cached. While investigating, I noticed that the non-resident information gets aggressively reclaimed - /proc/vmstat::workingset_nodereclaim. This is a problem because a workingset transition like this relies on the non-resident information tracked in the page cache tree of evicted file ranges: when the cache faults are refaults of recently evicted cache, we challenge the existing active set, and that allows a new workingset to establish itself. Tracing the shrinker that maintains this memory revealed that all page cache tree nodes were allocated to the root cgroup. This is a problem, because 1) the shrinker sizes the amount of non-resident information it keeps to the size of the cgroup's other memory and 2) on most major Linux distributions, only kernel threads live in the root cgroup and everything else gets put into services or session groups: [root@ham ~]# cat /proc/self/cgroup 0::/user.slice/user-0.slice/session-c1.scope As a result, we basically maintain no non-resident information for the workloads running on the system, thus breaking the caching algorithm. Looking through the code, I found the culprit in the above-mentioned patch: when switching from the radix tree to xarray, it dropped the __GFP_ACCOUNT flag from the tree node allocations - the flag that makes sure the allocated memory gets charged to and tracked by the cgroup of the calling process - in this case, the one doing the fault. To fix this, allow xarray users to specify per-tree flag that makes xarray allocate nodes using __GFP_ACCOUNT. Then restore the page cache tree annotation to request such cgroup tracking for the cache nodes. With this patch applied, the page cache correctly converges on new workingsets again after just a few iterations: [root@ham ~]# ./reclaimtest.sh + dd of=workingset-a bs=1M count=0 seek=600 + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + cat workingset-a + ./mincore workingset-a 153600/153600 workingset-a + dd of=workingset-b bs=1M count=0 seek=600 + cat workingset-b + ./mincore workingset-a workingset-b 124607/153600 workingset-a 87876/153600 workingset-b + cat workingset-b + ./mincore workingset-a workingset-b 81313/153600 workingset-a 133321/153600 workingset-b + cat workingset-b + ./mincore workingset-a workingset-b 63036/153600 workingset-a 153600/153600 workingset-b Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Signed-off-by: Matthew Wilcox (Oracle) --- fs/inode.c | 2 +- include/linux/xarray.h | 1 + lib/xarray.c | 12 ++++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index df6542ec3b88..2bf21e2c90fc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0e01e6129145..5921599b6dc4 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -265,6 +265,7 @@ enum xa_lock_type { #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) +#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) diff --git a/lib/xarray.c b/lib/xarray.c index 6be3acbb861f..446b956c9188 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; @@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); @@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) if (node) { xas->xa_alloc = NULL; } else { - node = kmem_cache_alloc(radix_tree_node_cachep, - GFP_NOWAIT | __GFP_NOWARN); + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; + + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; From 5c089fd0c73411f2170ab795c9ffc16718c7d007 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 14 May 2019 16:05:45 -0400 Subject: [PATCH 008/207] idr: Fix idr_get_next race with idr_remove If the entry is deleted from the IDR between the call to radix_tree_iter_find() and rcu_dereference_raw(), idr_get_next() will return NULL, which will end the iteration prematurely. We should instead continue to the next entry in the IDR. This only happens if the iteration is protected by the RCU lock. Most IDR users use a spinlock or semaphore to exclude simultaneous modifications. It was noticed once the PID allocator was converted to use the IDR, as it uses the RCU lock, but there may be other users elsewhere in the kernel. We can't use the normal pattern of calling radix_tree_deref_retry() (which catches both a retry entry in a leaf node and a node entry in the root) as the IDR supports storing entries which are unaligned, which will trigger an infinite loop if they are encountered. Instead, we have to explicitly check whether the entry is a retry entry. Fixes: 0a835c4f090a ("Reimplement IDR and IDA using the radix tree") Reported-by: Brendan Gregg Tested-by: Brendan Gregg Signed-off-by: Matthew Wilcox (Oracle) --- lib/idr.c | 14 +++++++-- tools/testing/radix-tree/idr-test.c | 46 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index c34e256d2f01..66a374892482 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; + void *entry = NULL; unsigned long base = idr->idr_base; unsigned long id = *nextid; id = (id < base) ? 0 : id - base; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); + radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) { + entry = rcu_dereference_raw(*slot); + if (!entry) + continue; + if (!xa_is_internal(entry)) + break; + if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry)) + break; + slot = radix_tree_iter_retry(&iter); + } if (!slot) return NULL; id = iter.index + base; @@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid) return NULL; *nextid = id; - return rcu_dereference_raw(*slot); + return entry; } EXPORT_SYMBOL(idr_get_next); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 1b63bdb7688f..fe33be4c2475 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -287,6 +287,51 @@ static void idr_align_test(struct idr *idr) } } +DEFINE_IDR(find_idr); + +static void *idr_throbber(void *arg) +{ + time_t start = time(NULL); + int id = *(int *)arg; + + rcu_register_thread(); + do { + idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL); + idr_remove(&find_idr, id); + } while (time(NULL) < start + 10); + rcu_unregister_thread(); + + return NULL; +} + +void idr_find_test_1(int anchor_id, int throbber_id) +{ + pthread_t throbber; + time_t start = time(NULL); + + pthread_create(&throbber, NULL, idr_throbber, &throbber_id); + + BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id, + anchor_id + 1, GFP_KERNEL) != anchor_id); + + do { + int id = 0; + void *entry = idr_get_next(&find_idr, &id); + BUG_ON(entry != xa_mk_value(id)); + } while (time(NULL) < start + 11); + + pthread_join(throbber, NULL); + + idr_remove(&find_idr, anchor_id); + BUG_ON(!idr_is_empty(&find_idr)); +} + +void idr_find_test(void) +{ + idr_find_test_1(100000, 0); + idr_find_test_1(0, 100000); +} + void idr_checks(void) { unsigned long i; @@ -368,6 +413,7 @@ void idr_checks(void) idr_u32_test(1); idr_u32_test(0); idr_align_test(&idr); + idr_find_test(); } #define module_init(x) From 12fd2aee6db765ab4e97c4a37e6d1f6c10e74ee6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 9 Mar 2019 22:25:27 -0500 Subject: [PATCH 009/207] XArray tests: Add check_insert A simple test which just checks that inserting an entry into an empty array succeeds. Try various different interesting indices. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 5d4bad8bd96a..9d631a7b6a70 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -38,6 +38,12 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_index(index), gfp); } +static void xa_insert_index(struct xarray *xa, unsigned long index) +{ + XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index), + GFP_KERNEL) != 0); +} + static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { u32 id; @@ -338,6 +344,37 @@ static noinline void check_xa_shrink(struct xarray *xa) } } +static noinline void check_insert(struct xarray *xa) +{ + unsigned long i; + + for (i = 0; i < 1024; i++) { + xa_insert_index(xa, i); + XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL); + XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL); + xa_erase_index(xa, i); + } + + for (i = 10; i < BITS_PER_LONG; i++) { + xa_insert_index(xa, 1UL << i); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL); + xa_erase_index(xa, 1UL << i); + + xa_insert_index(xa, (1UL << i) - 1); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL); + XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL); + xa_erase_index(xa, (1UL << i) - 1); + } + + xa_insert_index(xa, ~0UL); + XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL); + XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL); + xa_erase_index(xa, ~0UL); + + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_cmpxchg(struct xarray *xa) { void *FIVE = xa_mk_value(5); @@ -1527,6 +1564,7 @@ static int xarray_checks(void) check_xa_mark(&array); check_xa_shrink(&array); check_xas_erase(&array); + check_insert(&array); check_cmpxchg(&array); check_reserve(&array); check_reserve(&xa0); From db56c5128e6625cb16efc4910b60627e46f608e3 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 21 May 2019 15:54:05 +0800 Subject: [PATCH 010/207] irqchip/irq-csky-mpintc: Support auto irq deliver to all cpus The csky,mpintc could deliver a external irq to one cpu or all cpus, but it couldn't deliver a external irq to a group of cpus with cpu_mask. So we only use auto deliver mode when affinity mask_val is equal to cpu_present_mask. There is no limitation for only two cpus in SMP system. Signed-off-by: Guo Ren Cc: Marc Zyngier Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-csky-mpintc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c index c67c961ab6cc..a4c1aacba1ff 100644 --- a/drivers/irqchip/irq-csky-mpintc.c +++ b/drivers/irqchip/irq-csky-mpintc.c @@ -89,8 +89,19 @@ static int csky_irq_set_affinity(struct irq_data *d, if (cpu >= nr_cpu_ids) return -EINVAL; - /* Enable interrupt destination */ - cpu |= BIT(31); + /* + * The csky,mpintc could support auto irq deliver, but it only + * could deliver external irq to one cpu or all cpus. So it + * doesn't support deliver external irq to a group of cpus + * with cpu_mask. + * SO we only use auto deliver mode when affinity mask_val is + * equal to cpu_present_mask. + * + */ + if (cpumask_equal(mask_val, cpu_present_mask)) + cpu = 0; + else + cpu |= BIT(31); writel_relaxed(cpu, INTCG_base + INTCG_CIDSTR + offset); From eb737b8f446044df327b30f24416be0cae35d4aa Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 4 Jun 2019 13:17:51 +0300 Subject: [PATCH 011/207] irqchip/ti-sci-inta: Fix kernel crash if irq_create_fwspec_mapping fail irq_create_fwspec_mapping() can fail, returning 0 as parent_virq. In this case vint_desc is going to be NULL in ti_sci_inta_alloc_irq() which will cause NULL pointer dereference. Also note that irq_create_fwspec_mapping() returns 'unsigned int' so the check '<=' was wrong. Use -EINVAL if irq_create_fwspec_mapping() returned with 0. Signed-off-by: Peter Ujfalusi Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-ti-sci-inta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 011b60a49e3f..ef4d625d2d80 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -159,9 +159,9 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom parent_fwspec.param[1] = vint_desc->vint_id; parent_virq = irq_create_fwspec_mapping(&parent_fwspec); - if (parent_virq <= 0) { + if (parent_virq == 0) { kfree(vint_desc); - return ERR_PTR(parent_virq); + return ERR_PTR(-EINVAL); } vint_desc->parent_virq = parent_virq; From 6d4d367d0e9ffab4d64a3436256a6a052dc1195d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 5 Jun 2019 09:34:10 +0100 Subject: [PATCH 012/207] irqchip/mips-gic: Use the correct local interrupt map registers The MIPS GIC contains a block of registers used to map local interrupts to a particular CPU interrupt pin. Since these registers are found at a consecutive range of addresses we access them using an index, via the (read|write)_gic_v[lo]_map accessor functions. We currently use values from enum mips_gic_local_interrupt as those indices. Unfortunately whilst enum mips_gic_local_interrupt provides the correct offsets for bits in the pending & mask registers, the ordering of the map registers is subtly different... Compared with the ordering of pending & mask bits, the map registers move the FDC from the end of the list to index 3 after the timer interrupt. As a result the performance counter & software interrupts are therefore at indices 4-6 rather than indices 3-5. Notably this causes problems with performance counter interrupts being incorrectly mapped on some systems, and presumably will also cause problems for FDC interrupts. Introduce a function to map from enum mips_gic_local_interrupt to the index of the corresponding map register, and use it to ensure we access the map registers for the correct interrupts. Signed-off-by: Paul Burton Fixes: a0dc5cb5e31b ("irqchip: mips-gic: Simplify gic_local_irq_domain_map()") Fixes: da61fcf9d62a ("irqchip: mips-gic: Use irq_cpu_online to (un)mask all-VP(E) IRQs") Reported-and-tested-by: Archer Yan Cc: Thomas Gleixner Cc: Jason Cooper Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Marc Zyngier --- arch/mips/include/asm/mips-gic.h | 30 ++++++++++++++++++++++++++++++ drivers/irqchip/irq-mips-gic.c | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h index 558059a8f218..0277b56157af 100644 --- a/arch/mips/include/asm/mips-gic.h +++ b/arch/mips/include/asm/mips-gic.h @@ -314,6 +314,36 @@ static inline bool mips_gic_present(void) return IS_ENABLED(CONFIG_MIPS_GIC) && mips_gic_base; } +/** + * mips_gic_vx_map_reg() - Return GIC_Vx__MAP register offset + * @intr: A GIC local interrupt + * + * Determine the index of the GIC_VL__MAP or GIC_VO__MAP register + * within the block of GIC map registers. This is almost the same as the order + * of interrupts in the pending & mask registers, as used by enum + * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the + * interrupts after it... + * + * Return: The map register index corresponding to @intr. + * + * The return value is suitable for use with the (read|write)_gic_v[lo]_map + * accessor functions. + */ +static inline unsigned int +mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) +{ + /* WD, Compare & Timer are 1:1 */ + if (intr <= GIC_LOCAL_INT_TIMER) + return intr; + + /* FDC moves to after Timer... */ + if (intr == GIC_LOCAL_INT_FDC) + return GIC_LOCAL_INT_TIMER + 1; + + /* As a result everything else is offset by 1 */ + return intr + 1; +} + /** * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq * diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index d32268cc1174..f3985469c221 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d) intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); - write_gic_vl_map(intr, cd->map); + write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); if (cd->mask) write_gic_vl_smask(BIT(intr)); } @@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, spin_lock_irqsave(&gic_lock, flags); for_each_online_cpu(cpu) { write_gic_vl_other(mips_cm_vp_id(cpu)); - write_gic_vo_map(intr, map); + write_gic_vo_map(mips_gic_vx_map_reg(intr), map); } spin_unlock_irqrestore(&gic_lock, flags); From a050fa5476d418fc16b25abe168b3d38ba11e13c Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Mon, 13 May 2019 19:42:06 +0800 Subject: [PATCH 013/207] irqchip/gic-v3-its: Fix command queue pointer comparison bug When we run several VMs with PCI passthrough and GICv4 enabled, not pinning vCPUs, we will occasionally see below warnings in dmesg: ITS queue timeout (65440 65504 480) ITS cmd its_build_vmovp_cmd failed The reason for the above issue is that in BUILD_SINGLE_CMD_FUNC: 1. Post the write command. 2. Release the lock. 3. Start to read GITS_CREADR to get the reader pointer. 4. Compare the reader pointer to the target pointer. 5. If reader pointer does not reach the target, sleep 1us and continue to try. If we have several processors running the above concurrently, other CPUs will post write commands while the 1st CPU is waiting the completion. So we may have below issue: phase 1: ---rd_idx-----from_idx-----to_idx--0--------- wait 1us: phase 2: --------------from_idx-----to_idx--0-rd_idx-- That is the rd_idx may fly ahead of to_idx, and if in case to_idx is near the wrap point, rd_idx will wrap around. So the below condition will not be met even after 1s: if (from_idx < to_idx && rd_idx >= to_idx) There is another theoretical issue. For a slow and busy ITS, the initial rd_idx may fall behind from_idx a lot, just as below: ---rd_idx---0--from_idx-----to_idx----------- This will cause the wait function exit too early. Actually, it does not make much sense to use from_idx to judge if to_idx is wrapped, but we need a initial rd_idx when lock is still acquired, and it can be used to judge whether to_idx is wrapped and the current rd_idx is wrapped. We switch to a method of calculating the delta of two adjacent reads and accumulating it to get the sum, so that we can get the real rd_idx from the wrapped value even when the queue is almost full. Cc: Thomas Gleixner Cc: Jason Cooper Signed-off-by: Heyi Guo Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 35 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1e364d3ad9c5..f0523916232d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -744,32 +744,43 @@ static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd) } static int its_wait_for_range_completion(struct its_node *its, - struct its_cmd_block *from, + u64 prev_idx, struct its_cmd_block *to) { - u64 rd_idx, from_idx, to_idx; + u64 rd_idx, to_idx, linear_idx; u32 count = 1000000; /* 1s! */ - from_idx = its_cmd_ptr_to_offset(its, from); + /* Linearize to_idx if the command set has wrapped around */ to_idx = its_cmd_ptr_to_offset(its, to); + if (to_idx < prev_idx) + to_idx += ITS_CMD_QUEUE_SZ; + + linear_idx = prev_idx; while (1) { + s64 delta; + rd_idx = readl_relaxed(its->base + GITS_CREADR); - /* Direct case */ - if (from_idx < to_idx && rd_idx >= to_idx) - break; + /* + * Compute the read pointer progress, taking the + * potential wrap-around into account. + */ + delta = rd_idx - prev_idx; + if (rd_idx < prev_idx) + delta += ITS_CMD_QUEUE_SZ; - /* Wrapped case */ - if (from_idx >= to_idx && rd_idx >= to_idx && rd_idx < from_idx) + linear_idx += delta; + if (linear_idx >= to_idx) break; count--; if (!count) { - pr_err_ratelimited("ITS queue timeout (%llu %llu %llu)\n", - from_idx, to_idx, rd_idx); + pr_err_ratelimited("ITS queue timeout (%llu %llu)\n", + to_idx, linear_idx); return -1; } + prev_idx = rd_idx; cpu_relax(); udelay(1); } @@ -786,6 +797,7 @@ void name(struct its_node *its, \ struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \ synctype *sync_obj; \ unsigned long flags; \ + u64 rd_idx; \ \ raw_spin_lock_irqsave(&its->lock, flags); \ \ @@ -807,10 +819,11 @@ void name(struct its_node *its, \ } \ \ post: \ + rd_idx = readl_relaxed(its->base + GITS_CREADR); \ next_cmd = its_post_commands(its); \ raw_spin_unlock_irqrestore(&its->lock, flags); \ \ - if (its_wait_for_range_completion(its, cmd, next_cmd)) \ + if (its_wait_for_range_completion(its, rd_idx, next_cmd)) \ pr_err_ratelimited("ITS cmd %ps failed\n", builder); \ } From 41b3588dba6ef4b7995735a97e47ff0aeea6c276 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 29 May 2019 23:55:57 -0700 Subject: [PATCH 014/207] clk: ti: clkctrl: Fix returning uninitialized data If we do a clk_get() for a clock that does not exists, we have _ti_omap4_clkctrl_xlate() return uninitialized data if no match is found. This can be seen in some cases with SLAB_DEBUG enabled: Unable to handle kernel paging request at virtual address 5a5a5a5a ... clk_hw_create_clk.part.33 sysc_notifier_call notifier_call_chain blocking_notifier_call_chain device_add Let's fix this by setting a found flag only when we find a match. Reported-by: Tomi Valkeinen Fixes: 88a172526c32 ("clk: ti: add support for clkctrl clocks") Signed-off-by: Tony Lindgren Tested-by: Peter Ujfalusi Tested-by: Tomi Valkeinen Signed-off-by: Stephen Boyd --- drivers/clk/ti/clkctrl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 8e834317c97d..975995eea15c 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -229,6 +229,7 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec, { struct omap_clkctrl_provider *provider = data; struct omap_clkctrl_clk *entry; + bool found = false; if (clkspec->args_count != 2) return ERR_PTR(-EINVAL); @@ -238,11 +239,13 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec, list_for_each_entry(entry, &provider->clocks, node) { if (entry->reg_offset == clkspec->args[0] && - entry->bit_offset == clkspec->args[1]) + entry->bit_offset == clkspec->args[1]) { + found = true; break; + } } - if (!entry) + if (!found) return ERR_PTR(-EINVAL); return entry->clk; From 1571c029a2ff289683ddb0a32253850363bcb8a7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 6 Jun 2019 11:10:28 +0200 Subject: [PATCH 015/207] dax: Fix xarray entry association for mixed mappings When inserting entry into xarray, we store mapping and index in corresponding struct pages for memory error handling. When it happened that one process was mapping file at PMD granularity while another process at PTE granularity, we could wrongly deassociate PMD range and then reassociate PTE range leaving the rest of struct pages in PMD range without mapping information which could later cause missed notifications about memory errors. Fix the problem by calling the association / deassociation code if and only if we are really going to update the xarray (deassociating and associating zero or empty entries is just no-op so there's no reason to complicate the code with trying to avoid the calls for these cases). Cc: Fixes: d2c997c0f145 ("fs, dax: use page->mapping to warn if truncate...") Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index f74386293632..9fd908f3df32 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -728,12 +728,11 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + void *old; + dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); - } - - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -742,7 +741,7 @@ static void *dax_insert_entry(struct xa_state *xas, * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ - void *old = dax_lock_entry(xas, new_entry); + old = dax_lock_entry(xas, new_entry); WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); entry = new_entry; From d2ba3b1714d754f190ac0527713f9b44513b5857 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 8 Jun 2019 20:04:46 +0200 Subject: [PATCH 016/207] parisc: Fix module loading error with JUMP_LABEL feature Commit 62217beb394e ("parisc: Add static branch and JUMP_LABEL feature") missed to add code to handle PCREL64 relocations which are generated when creating a jump label on a 64-bit kernel. This patch fixes module load errors like this one: # modprobe -v ipv6 insmod /lib/modules/5.2.0-rc1-JeR/kernel/net/ipv6/ipv6.ko modprobe: ERROR: could not insert 'ipv6': Exec format error dmesg reports: module ipv6: Unknown relocation: 72 Reported-by: Jeroen Roovers Tested-by: Jeroen Roovers Fixes: 62217beb394e ("parisc: Add static branch and JUMP_LABEL feature") Signed-off-by: Helge Deller --- arch/parisc/kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index f241ded9239b..1f0f29a289d3 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -786,6 +786,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 32-bit PC relative address */ *loc = val - dot - 8 + addend; break; + case R_PARISC_PCREL64: + /* 64-bit PC relative address */ + *loc64 = val - dot - 8 + addend; + break; case R_PARISC_DIR64: /* 64-bit effective address */ *loc64 = val + addend; From 18df7577adae6c6c778bf774b3aebcacbc1fb439 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 9 Jun 2019 20:17:44 +0200 Subject: [PATCH 017/207] efi/memreserve: deal with memreserve entries in unmapped memory Ensure that the EFI memreserve entries can be accessed, even if they are located in memory that the kernel (e.g., a crashkernel) omits from the linear map. Fixes: 80424b02d42b ("efi: Reduce the amount of memblock reservations ...") Cc: # 5.0+ Reported-by: Jonathan Richardson Reviewed-by: Jonathan Richardson Tested-by: Jonathan Richardson Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 16b2137d117c..4b7cf7bc0ded 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -1009,14 +1009,16 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) /* first try to find a slot in an existing linked list entry */ for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { - rsv = __va(prsv); + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { rsv->entry[index].base = addr; rsv->entry[index].size = size; + memunmap(rsv); return 0; } + memunmap(rsv); } /* no slot found - allocate a new linked list entry */ @@ -1024,7 +1026,13 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) if (!rsv) return -ENOMEM; - rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); + /* + * The memremap() call above assumes that a linux_efi_memreserve entry + * never crosses a page boundary, so let's ensure that this remains true + * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by + * using SZ_4K explicitly in the size calculation below. + */ + rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K); atomic_set(&rsv->count, 1); rsv->entry[0].base = addr; rsv->entry[0].size = size; From a483fcab38b43fb34a7f12ab1daadd3907f150e2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 May 2019 15:28:28 +0200 Subject: [PATCH 018/207] efi/bgrt: Drop BGRT status field reserved bits check Starting with ACPI 6.2 bits 1 and 2 of the BGRT status field are no longer reserved. These bits are now used to indicate if the image needs to be rotated before being displayed. The first device using these bits has now shown up (the GPD MicroPC) and the reserved bits check causes us to reject the valid BGRT table on this device. Rather then changing the reserved bits check, allowing only the 2 new bits, instead just completely remove it so that we do not end up with a similar problem when more bits are added in the future. Signed-off-by: Hans de Goede Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-bgrt.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c index a2384184a7de..b07c17643210 100644 --- a/drivers/firmware/efi/efi-bgrt.c +++ b/drivers/firmware/efi/efi-bgrt.c @@ -47,11 +47,6 @@ void __init efi_bgrt_init(struct acpi_table_header *table) bgrt->version); goto out; } - if (bgrt->status & 0xfe) { - pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", - bgrt->status); - goto out; - } if (bgrt->image_type != 0) { pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt->image_type); From 2bc42bfba9b247abd93991195b71f35a484531d1 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 3 Jun 2019 09:31:19 +0300 Subject: [PATCH 019/207] ARC: build: Try to guess CROSS_COMPILE with cc-cross-prefix For a long time we used to hard-code CROSS_COMPILE prefix for ARC until it started to cause problems, so we decided to solely rely on CROSS_COMPILE externally set by a user: commit 40660f1fcee8 ("ARC: build: Don't set CROSS_COMPILE in arch's Makefile"). While it works perfectly fine for build-systems where the prefix gets defined anyways for us human beings it's quite an annoying requirement especially given most of time the same one prefix "arc-linux-" is all what we need. It looks like finally we're getting the best of both worlds: 1. W/o cross-toolchain we still may install headers, build .dtb etc 2. W/ cross-toolchain get the kerne built with only ARCH=arc Inspired by [1] & [2]. [1] http://lists.infradead.org/pipermail/linux-snps-arc/2019-May/005788.html [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fc2b47b55f17 A side note: even though "cc-cross-prefix" does its job it pollutes console with output of "which" for all the prefixes it didn't manage to find a matching cross-compiler for like that: | # ARCH=arc make defconfig | which: no arceb-linux-gcc in (~/.local/bin:~/bin:/usr/bin:/usr/sbin) | *** Default configuration is based on 'nsim_hs_defconfig' Suggested-by: Vineet Gupta Reviewed-by: Masahiro Yamada Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index e2b991f75bc5..9cfd2ba7a12d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -8,6 +8,10 @@ KBUILD_DEFCONFIG := nsim_hs_defconfig +ifeq ($(CROSS_COMPILE),) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +endif + cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 From ec9b4feb1e41587c15d43d237844193318389dc3 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 5 Jun 2019 20:32:50 +0300 Subject: [PATCH 020/207] ARC: [plat-hsdk]: unify memory apertures configuration HSDK SoC has memory bridge which allows to configure memory map for different AXI masters in runtime. As of today we adjust memory apertures configuration in U-boot so we have different configuration in case of loading kernel via U-boot and JTAG. It isn't really critical in case of existing platform configuration as configuration differs for unused address space regions or unused AXI masters. However we may face with this issue when we'll bringup new peripherals or touch their address space. Fix that by perform full configuration of memory bridge in HSDK platform code. Basically we simply copy memory bridge configuration code from U-boot. Acked-by: Alexey Brodkin Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/platform.c | 161 ++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 8 deletions(-) diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 2588b842407c..0e70e47358c6 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -35,8 +35,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) -#define CREG_PAE (CREG_BASE + 0x180) -#define CREG_PAE_UPDATE (CREG_BASE + 0x194) #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) @@ -102,20 +100,167 @@ static void __init hsdk_enable_gpio_intc_wire(void) iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); } -static void __init hsdk_init_early(void) +enum hsdk_axi_masters { + M_HS_CORE = 0, + M_HS_RTT, + M_AXI_TUN, + M_HDMI_VIDEO, + M_HDMI_AUDIO, + M_USB_HOST, + M_ETHERNET, + M_SDIO, + M_GPU, + M_DMAC_0, + M_DMAC_1, + M_DVFS +}; + +#define UPDATE_VAL 1 + +/* + * This is modified configuration of AXI bridge. Default settings + * are specified in "Table 111 CREG Address Decoder register reset values". + * + * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'. + * Possible slaves are: + * - 0 => no slave selected + * - 1 => DDR controller port #1 + * - 2 => SRAM controller + * - 3 => AXI tunnel + * - 4 => EBI controller + * - 5 => ROM controller + * - 6 => AXI2APB bridge + * - 7 => DDR controller port #2 + * - 8 => DDR controller port #3 + * - 9 => HS38x4 IOC + * - 10 => HS38x4 DMI + * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm' + * + * Please read ARC HS Development IC Specification, section 17.2 for more + * information about apertures configuration. + * + * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1 + * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210 + * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210 + * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000 + */ + +#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m))) +#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04)) +#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08)) +#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C)) +#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14)) + +#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010)) + +#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180)) +#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194)) + +static void __init hsdk_init_memory_bridge(void) { + u32 reg; + + /* + * M_HS_CORE has one unique register - BOOT. + * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first + * aperture to be masked by 'boot mirror'. + */ + reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3); + writel(reg, CREG_AXI_M_HS_CORE_BOOT); + writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE)); + writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE)); + writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT)); + + writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN)); + writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN)); + writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST)); + writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET)); + writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO)); + writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_GPU)); + writel(0x77777777, CREG_AXI_M_SLV1(M_GPU)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU)); + writel(0x76543210, CREG_AXI_M_OFT1(M_GPU)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); + + writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS)); + writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS)); + /* * PAE remapping for DMA clients does not work due to an RTL bug, so * CREG_PAE register must be programmed to all zeroes, otherwise it * will cause problems with DMA to/from peripherals even if PAE40 is * not used. */ + writel(0x00000000, CREG_PAE); + writel(UPDATE_VAL, CREG_PAE_UPDT); +} - /* Default is 1, which means "PAE offset = 4GByte" */ - writel_relaxed(0, (void __iomem *) CREG_PAE); - - /* Really apply settings made above */ - writel(1, (void __iomem *) CREG_PAE_UPDATE); +static void __init hsdk_init_early(void) +{ + hsdk_init_memory_bridge(); /* * Switch SDIO external ciu clock divider from default div-by-8 to From 9c8434516bd96062266232c62343c32b99160d51 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Mar 2019 10:43:30 -0700 Subject: [PATCH 021/207] MAINTAINERS: BCM2835: Add internal Broadcom mailing list There is a patchwork instance behind bcm-kernel-feedback-list that is helpful to track submissions for the Broadcom ARM-SoC maintainers and make sure there are no patches missed, add this list for the Broadcom BCM2835 architecture. Signed-off-by: Florian Fainelli --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5cfbea4ce575..956f2911f5d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3110,6 +3110,7 @@ F: arch/arm/mach-bcm/ BROADCOM BCM2835 ARM ARCHITECTURE M: Eric Anholt M: Stefan Wahren +L: bcm-kernel-feedback-list@broadcom.com L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://github.com/anholt/linux From 64f35709d5735ddbf8ab52d60ab3d62550b544d7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Mar 2019 10:45:29 -0700 Subject: [PATCH 022/207] MAINTAINERS: BCM53573: Add internal Broadcom mailing list There is a patchwork instance behind bcm-kernel-feedback-list that is helpful to track submissions, add this list for the Broadcom BCM53573 architecture. Signed-off-by: Florian Fainelli --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 956f2911f5d8..4e5c2c5af9d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3140,6 +3140,7 @@ F: arch/arm/boot/dts/bcm953012* BROADCOM BCM53573 ARM ARCHITECTURE M: Rafał Miłecki +L: bcm-kernel-feedback-list@broadcom.com L: linux-arm-kernel@lists.infradead.org S: Maintained F: arch/arm/boot/dts/bcm53573* From 53f2ac9d3aa881ed419054076042898b77c27ee4 Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Fri, 17 May 2019 12:57:53 +0800 Subject: [PATCH 023/207] arm64: dts: ls1028a: Fix CPU idle fail. PSCI spec define 1st parameter's bit 16 of function CPU_SUSPEND to indicate CPU State Type: 0 for standby, 1 for power down. In this case, we want to select standby for CPU idle feature. But current setting wrongly select power down and cause CPU SUSPEND fail every time. Need this fix. Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Ran Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b04581249f0b..bf7f845447ed 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -28,7 +28,7 @@ cpu0: cpu@0 { enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; cpu1: cpu@1 { @@ -38,7 +38,7 @@ cpu1: cpu@1 { enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; l2: l2-cache { @@ -53,13 +53,13 @@ idle-states { */ entry-method = "arm,psci"; - CPU_PH20: cpu-ph20 { - compatible = "arm,idle-state"; - idle-state-name = "PH20"; - arm,psci-suspend-param = <0x00010000>; - entry-latency-us = <1000>; - exit-latency-us = <1000>; - min-residency-us = <3000>; + CPU_PW20: cpu-pw20 { + compatible = "arm,idle-state"; + idle-state-name = "PW20"; + arm,psci-suspend-param = <0x0>; + entry-latency-us = <2000>; + exit-latency-us = <2000>; + min-residency-us = <6000>; }; }; From ca72d88378b2f2444d3ec145dd442d449d3fefbc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Jun 2019 23:35:07 +1000 Subject: [PATCH 024/207] powerpc/mm/64s/hash: Reallocate context ids on fork When using the Hash Page Table (HPT) MMU, userspace memory mappings are managed at two levels. Firstly in the Linux page tables, much like other architectures, and secondly in the SLB (Segment Lookaside Buffer) and HPT. It's the SLB and HPT that are actually used by the hardware to do translations. As part of the series adding support for 4PB user virtual address space using the hash MMU, we added support for allocating multiple "context ids" per process, one for each 512TB chunk of address space. These are tracked in an array called extended_id in the mm_context_t of a process that has done a mapping above 512TB. If such a process forks (ie. clone(2) without CLONE_VM set) it's mm is copied, including the mm_context_t, and then init_new_context() is called to reinitialise parts of the mm_context_t as appropriate to separate the address spaces of the two processes. The key step in ensuring the two processes have separate address spaces is to allocate a new context id for the process, this is done at the beginning of hash__init_new_context(). If we didn't allocate a new context id then the two processes would share mappings as far as the SLB and HPT are concerned, even though their Linux page tables would be separate. For mappings above 512TB, which use the extended_id array, we neglected to allocate new context ids on fork, meaning the parent and child use the same ids and therefore share those mappings even though they're supposed to be separate. This can lead to the parent seeing writes done by the child, which is essentially memory corruption. There is an additional exposure which is that if the child process exits, all its context ids are freed, including the context ids that are still in use by the parent for mappings above 512TB. One or more of those ids can then be reallocated to a third process, that process can then read/write to the parent's mappings above 512TB. Additionally if the freed id is used for the third process's primary context id, then the parent is able to read/write to the third process's mappings *below* 512TB. All of these are fundamental failures to enforce separation between processes. The only mitigating factor is that the bug only occurs if a process creates mappings above 512TB, and most applications still do not create such mappings. Only machines using the hash page table MMU are affected, eg. PowerPC 970 (G5), PA6T, Power5/6/7/8/9. By default Power9 bare metal machines (powernv) use the Radix MMU and are not affected, unless the machine has been explicitly booted in HPT mode (using disable_radix on the kernel command line). KVM guests on Power9 may be affected if the host or guest is configured to use the HPT MMU. LPARs under PowerVM on Power9 are affected as they always use the HPT MMU. Kernels built with PAGE_SIZE=4K are not affected. The fix is relatively simple, we need to reallocate context ids for all extended mappings on fork. Fixes: f384796c40dc ("powerpc/mm: Add support for handling > 512TB address in SLB miss") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_book3s64.c | 46 +++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index f720c5cc0b5e..8751ae2e2d04 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -55,14 +55,48 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been @@ -80,6 +114,10 @@ static int hash__init_new_context(struct mm_struct *mm) if (mm->context.id == 0) slice_init_new_context_exec(mm); + index = realloc_context_ids(&mm->context); + if (index < 0) + return index; + subpage_prot_init_new_context(mm); pkey_mm_init(mm); From 16391bfc862342f285195013b73c1394fab28b97 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jun 2019 12:07:59 +1000 Subject: [PATCH 025/207] selftests/powerpc: Add test of fork with mapping above 512TB This tests that when a process with a mapping above 512TB forks we correctly separate the parent and child address spaces. This exercises the bug in the context id handling fixed in the previous commit. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/.gitignore | 3 +- tools/testing/selftests/powerpc/mm/Makefile | 4 +- .../powerpc/mm/large_vm_fork_separation.c | 87 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index ba919308fe30..d503b8764a8e 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -3,4 +3,5 @@ subpage_prot tempfile prot_sao segv_errors -wild_bctr \ No newline at end of file +wild_bctr +large_vm_fork_separation \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 43d68420e363..f1fbc15800c4 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,8 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ + large_vm_fork_separation TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 +$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c new file mode 100644 index 000000000000..2363a7f3ab0d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that allocating memory beyond the memory limit and then forking is +// handled correctly, ie. the child is able to access the mappings beyond the +// memory limit and the child's writes are not visible to the parent. + +#include +#include +#include +#include +#include +#include + +#include "utils.h" + + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + + +static int test(void) +{ + int p2c[2], c2p[2], rc, status, c, *p; + unsigned long page_size; + pid_t pid; + + page_size = sysconf(_SC_PAGESIZE); + SKIP_IF(page_size != 65536); + + // Create a mapping at 512TB to allocate an extended_id + p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n"); + return 1; + } + + printf("parent writing %p = 1\n", p); + *p = 1; + + FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1); + + pid = fork(); + if (pid == 0) { + FAIL_IF(read(p2c[0], &c, 1) != 1); + + pid = getpid(); + printf("child writing %p = %d\n", p, pid); + *p = pid; + + FAIL_IF(write(c2p[1], &c, 1) != 1); + FAIL_IF(read(p2c[0], &c, 1) != 1); + exit(0); + } + + c = 0; + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(read(c2p[0], &c, 1) != 1); + + // Prevent compiler optimisation + barrier(); + + rc = 0; + printf("parent reading %p = %d\n", p, *p); + if (*p != 1) { + printf("Error: BUG! parent saw child's write! *p = %d\n", *p); + rc = 1; + } + + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(waitpid(pid, &status, 0) == -1); + FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status)); + + if (rc == 0) + printf("success: test completed OK\n"); + + return rc; +} + +int main(void) +{ + return test_harness(test, "large_vm_fork_separation"); +} From 9caec6620f25b6d15646bbdb93062c872ba3b56f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 5 Jun 2019 15:01:39 +0100 Subject: [PATCH 026/207] clk: tegra210: Fix default rates for HDA clocks Currently the default clock rates for the HDA and HDA2CODEC_2X clocks are both 19.2MHz. However, the default rates for these clocks should actually be 51MHz and 48MHz, respectively. The current clock settings results in a distorted output during audio playback. Correct the default clock rates for these clocks by specifying them in the clock init table for Tegra210. Cc: stable@vger.kernel.org Signed-off-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-tegra210.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index ed3c7df75d1e..8b3b3d771813 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -3377,6 +3377,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, + { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 }, + { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 }, /* This MUST be the last entry. */ { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 }, }; From d6ed083f5cc621e15c15b56c3b585fd524dbcb0f Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 17 Jun 2019 00:30:39 +0200 Subject: [PATCH 027/207] MIPS: Fix bounds check virt_addr_valid The bounds check used the uninitialized variable vaddr, it should use the given parameter kaddr instead. When using the uninitialized value the compiler assumed it to be 0 and optimized this function to just return 0 in all cases. This should make the function check the range of the given address and only do the page map check in case it is in the expected range of virtual addresses. Fixes: 074a1e1167af ("MIPS: Bounds check virt_addr_valid") Cc: stable@vger.kernel.org # v4.12+ Cc: Paul Burton Signed-off-by: Hauke Mehrtens Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: f4bug@amsat.org Cc: linux-mips@vger.kernel.org Cc: ysu@wavecomp.com Cc: jcristau@debian.org --- arch/mips/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 50ee7213b432..d79f2b432318 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) bool __virt_addr_valid(const volatile void *kaddr) { - unsigned long vaddr = (unsigned long)vaddr; + unsigned long vaddr = (unsigned long)kaddr; if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) return false; From cd49b84d61b2dfc0360c76d9e6be49f5116ba1a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Jun 2019 15:41:27 +0300 Subject: [PATCH 028/207] mfd: stmfx: Uninitialized variable in stmfx_irq_handler() The problem is that on 64bit systems then we don't clear the higher bits of the "pending" variable. So when we do: ack = pending & ~BIT(STMFX_REG_IRQ_SRC_EN_GPIO); if (ack) { the if (ack) condition relies on uninitialized data. The fix it that I've changed "pending" from an unsigned long to a u32. I changed "n" as well, because that's a number in the 0-10 range and it fits easily inside an int. We do need to add a cast to "pending" when we use it in the for_each_set_bit() loop, but that doesn't cause a problem, it's fine. Fixes: 06252ade9156 ("mfd: Add ST Multi-Function eXpander (STMFX) core driver") Signed-off-by: Dan Carpenter Acked-by: Amelie Delaunay Signed-off-by: Lee Jones --- drivers/mfd/stmfx.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index fe8efba2d45f..7c419c078688 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -204,12 +204,10 @@ static struct irq_chip stmfx_irq_chip = { static irqreturn_t stmfx_irq_handler(int irq, void *data) { struct stmfx *stmfx = data; - unsigned long n, pending; - u32 ack; - int ret; + u32 pending, ack; + int n, ret; - ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, - (u32 *)&pending); + ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, &pending); if (ret) return IRQ_NONE; @@ -224,7 +222,7 @@ static irqreturn_t stmfx_irq_handler(int irq, void *data) return IRQ_NONE; } - for_each_set_bit(n, &pending, STMFX_REG_IRQ_SRC_MAX) + for_each_set_bit(n, (unsigned long *)&pending, STMFX_REG_IRQ_SRC_MAX) handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n)); return IRQ_HANDLED; From 085ebfe937d7a7a5df1729f35a12d6d655fea68c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 May 2019 14:37:24 +0200 Subject: [PATCH 029/207] perf/core: Fix perf_sample_regs_user() mm check perf_sample_regs_user() uses 'current->mm' to test for the presence of userspace, but this is insufficient, consider use_mm(). A better test is: '!(current->flags & PF_KTHREAD)', exec() clears PF_KTHREAD after it sets the new ->mm but before it drops to userspace for the first time. Possibly obsoletes: bf05fc25f268 ("powerpc/perf: Fix oops when kthread execs user process") Reported-by: Ravi Bangoria Reported-by: Young Xiao <92siuyang@gmail.com> Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 4018994f3d87 ("perf: Add ability to attach user level registers dump to sample") Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index abbd4b3b96c2..2e32faac5511 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5923,7 +5923,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; - } else if (current->mm) { + } else if (!(current->flags & PF_KTHREAD)) { perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; From c8edb316b9bb6149193436dfbd240994733e27be Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 14 Jun 2019 10:46:06 -0700 Subject: [PATCH 030/207] clk: Do a DT parent lookup even when index < 0 We want to allow the parent lookup to happen even if the index is some value less than 0. This may be the case if a clk provider only specifies the .name member to match a string in the "clock-names" DT property. We shouldn't require that the index be >= 0 to make this use case work. Fixes: 601b6e93304a ("clk: Allow parents to be specified via clkspec index") Reported-by: Alexandre Mergnat Cc: Jerome Brunet Cc: Chen-Yu Tsai Reviewed-by: Chen-Yu Tsai Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index aa51756fd4d6..87b410d6e51d 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -368,7 +368,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) const char *dev_id = dev ? dev_name(dev) : NULL; struct device_node *np = core->of_node; - if (np && index >= 0) + if (np && (name || index >= 0)) hw = of_clk_get_hw(np, index, name); /* From a019ab40679715ea680cc8561a02888be70bc4e9 Mon Sep 17 00:00:00 2001 From: Li Yang Date: Mon, 22 Apr 2019 13:30:56 -0500 Subject: [PATCH 031/207] arm64: defconfig: Enable FSL_EDMA driver Enables the FSL EDMA driver by default. This also works around an issue that imx-i2c driver keeps deferring the probe because of the DMA is not ready. And currently the DMA engine framework can not correctly tell if the DMA channels will truly become available later (it will never be available if the DMA driver is not enabled). This will cause indefinite messages like below: [ 3.335829] imx-i2c 2180000.i2c: can't get pinctrl, bus recovery not supported [ 3.344455] ina2xx 0-0040: power monitor ina220 (Rshunt = 1000 uOhm) [ 3.350917] lm90 0-004c: 0-004c supply vcc not found, using dummy regulator [ 3.362089] imx-i2c 2180000.i2c: can't get pinctrl, bus recovery not supported [ 3.370741] ina2xx 0-0040: power monitor ina220 (Rshunt = 1000 uOhm) [ 3.377205] lm90 0-004c: 0-004c supply vcc not found, using dummy regulator [ 3.388455] imx-i2c 2180000.i2c: can't get pinctrl, bus recovery not supported ..... Signed-off-by: Li Yang Signed-off-by: Shawn Guo --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d583514258c..6bca5b082ea4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -613,6 +613,7 @@ CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_IMX_SC=m CONFIG_RTC_DRV_XGENE=y CONFIG_DMADEVICES=y +CONFIG_FSL_EDMA=y CONFIG_DMA_BCM2835=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y From 5423f5ce5ca410b3646f355279e4e937d452e622 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 18 Jun 2019 22:31:40 +0200 Subject: [PATCH 032/207] x86/microcode: Fix the microcode load on CPU hotplug for real A recent change moved the microcode loader hotplug callback into the early startup phase which is running with interrupts disabled. It missed that the callbacks invoke sysfs functions which might sleep causing nice 'might sleep' splats with proper debugging enabled. Split the callbacks and only load the microcode in the early startup phase and move the sysfs handling back into the later threaded and preemptible bringup phase where it was before. Fixes: 78f4e932f776 ("x86/microcode, cpuhotplug: Add a microcode loader CPU hotplug callback") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: stable@vger.kernel.org Cc: x86-ml Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906182228350.1766@nanos.tec.linutronix.de --- arch/x86/kernel/cpu/microcode/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index a813987b5552..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -872,7 +875,9 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); From cf18ea7593adbfe68b594e45467379d4ee8ca8ba Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Aug 2018 00:10:39 +0200 Subject: [PATCH 033/207] ARM: dts: Blank D-Link DIR-685 console Leaving this NAS with display and backlight on heats it up and dissipates power. Turn off the screen after 4 minutes, it comes back on when a user touches the keys. Signed-off-by: Linus Walleij --- arch/arm/boot/dts/gemini-dlink-dir-685.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index cfbfbc91a1e1..3613f05f8a80 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -20,7 +20,7 @@ memory@0 { }; chosen { - bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait"; + bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300"; stdout-path = "uart0:19200n8"; }; From 36558020128b1a48b7bddd5792ee70e3f64b04b0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 16 Jun 2019 23:40:13 +0200 Subject: [PATCH 034/207] ARM: dts: gemini Fix up DNS-313 compatible string It's a simple typo in the DNS file, which was pretty serious. No scripts were working properly. Fix it up. Signed-off-by: Linus Walleij --- arch/arm/boot/dts/gemini-dlink-dns-313.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index b12504e10f0b..360642a02a48 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -11,7 +11,7 @@ / { model = "D-Link DNS-313 1-Bay Network Storage Enclosure"; - compatible = "dlink,dir-313", "cortina,gemini"; + compatible = "dlink,dns-313", "cortina,gemini"; #address-cells = <1>; #size-cells = <1>; From 27e23d8975270df6999f8b5b3156fc0c04927451 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2019 15:04:54 +0200 Subject: [PATCH 035/207] ARM: omap2: remove incorrect __init annotation omap3xxx_prm_enable_io_wakeup() is marked __init, but its caller is not, so we get a warning with clang-8: WARNING: vmlinux.o(.text+0x343c8): Section mismatch in reference from the function omap3xxx_prm_late_init() to the function .init.text:omap3xxx_prm_enable_io_wakeup() The function omap3xxx_prm_late_init() references the function __init omap3xxx_prm_enable_io_wakeup(). This is often because omap3xxx_prm_late_init lacks a __init annotation or the annotation of omap3xxx_prm_enable_io_wakeup is wrong. When building with gcc, omap3xxx_prm_enable_io_wakeup() is always inlined, so we never noticed in the past. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Tony Lindgren Reviewed-by: Andrew Murray Signed-off-by: Olof Johansson --- arch/arm/mach-omap2/prm3xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 05858f966f7d..dfa65fc2c82b 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -433,7 +433,7 @@ static void omap3_prm_reconfigure_io_chain(void) * registers, and omap3xxx_prm_reconfigure_io_chain() must be called. * No return value. */ -static void __init omap3xxx_prm_enable_io_wakeup(void) +static void omap3xxx_prm_enable_io_wakeup(void) { if (prm_features & PRM_HAS_IO_WAKEUP) omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, From db13a5ba2732755cf13320f3987b77cf2a71e790 Mon Sep 17 00:00:00 2001 From: Stefan Hellermann Date: Mon, 17 Jun 2019 15:43:59 +0200 Subject: [PATCH 036/207] MIPS: ath79: fix ar933x uart parity mode While trying to get the uart with parity working I found setting even parity enabled odd parity insted. Fix the register settings to match the datasheet of AR9331. A similar patch was created by 8devices, but not sent upstream. https://github.com/8devices/openwrt-8devices/commit/77c5586ade3bb72cda010afad3f209ed0c98ea7c Signed-off-by: Stefan Hellermann Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org --- arch/mips/include/asm/mach-ath79/ar933x_uart.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h index c2917b39966b..bba2c8837951 100644 --- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h +++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h @@ -27,8 +27,8 @@ #define AR933X_UART_CS_PARITY_S 0 #define AR933X_UART_CS_PARITY_M 0x3 #define AR933X_UART_CS_PARITY_NONE 0 -#define AR933X_UART_CS_PARITY_ODD 1 -#define AR933X_UART_CS_PARITY_EVEN 2 +#define AR933X_UART_CS_PARITY_ODD 2 +#define AR933X_UART_CS_PARITY_EVEN 3 #define AR933X_UART_CS_IF_MODE_S 2 #define AR933X_UART_CS_IF_MODE_M 0x3 #define AR933X_UART_CS_IF_MODE_NONE 0 From 1e091c3bbf51d34d5d96337a59ce5ab2ac3ba2cc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 11 Jun 2019 11:01:16 -0400 Subject: [PATCH 037/207] svcrdma: Ignore source port when computing DRC hash The DRC appears to be effectively empty after an RPC/RDMA transport reconnect. The problem is that each connection uses a different source port, which defeats the DRC hash. Clients always have to disconnect before they send retransmissions to reset the connection's credit accounting, thus every retransmit on NFS/RDMA will miss the DRC. An NFS/RDMA client's IP source port is meaningless for RDMA transports. The transport layer typically sets the source port value on the connection to a random ephemeral port. The server already ignores it for the "secure port" check. See commit 16e4d93f6de7 ("NFSD: Ignore client's source port on RDMA transports"). The Linux NFS server's DRC resolves XID collisions from the same source IP address by using the checksum of the first 200 bytes of the RPC call header. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 027a3b07d329..0004535c0188 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -211,9 +211,14 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, /* Save client advertised inbound read limit for use later in accept. */ newxprt->sc_ord = param->initiator_depth; - /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + /* The remote port is arbitrary and not under the control of the + * client ULP. Set it to a fixed value so that the DRC continues + * to be effective after a reconnect. + */ + rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0); + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); From 1196364f21ffe5d1e6d83cafd6a2edb89404a3ae Mon Sep 17 00:00:00 2001 From: Kevin Darbyshire-Bryant Date: Wed, 19 Jun 2019 15:08:18 +0100 Subject: [PATCH 038/207] MIPS: fix build on non-linux hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit calc_vmlinuz_load_addr.c requires SZ_64K to be defined for alignment purposes. It included "../../../../include/linux/sizes.h" to define that size, however "sizes.h" tries to include which assumes linux system headers. These may not exist eg. the following error was encountered when building Linux for OpenWrt under macOS: In file included from arch/mips/boot/compressed/calc_vmlinuz_load_addr.c:16: arch/mips/boot/compressed/../../../../include/linux/sizes.h:11:10: fatal error: 'linux/const.h' file not found ^~~~~~~~~~ Change makefile to force building on local linux headers instead of system headers. Also change eye-watering relative reference in include file spec. Thanks to Jo-Philip Wich & Petr Štetiar for assistance in tracking this down & fixing. Suggested-by: Jo-Philipp Wich Signed-off-by: Petr Štetiar Signed-off-by: Kevin Darbyshire-Bryant Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org --- arch/mips/boot/compressed/Makefile | 2 ++ arch/mips/boot/compressed/calc_vmlinuz_load_addr.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 3c453a1f1ff1..172801ed35b8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE $(call if_changed,objcopy) +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + # Calculate the load address of the compressed kernel image hostprogs-y := calc_vmlinuz_load_addr diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 240f1d12df75..080b926d2623 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -9,7 +9,7 @@ #include #include #include -#include "../../../../include/linux/sizes.h" +#include int main(int argc, char *argv[]) { From 461e274b2821901ca9c084f43074ad099848199f Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 26 May 2019 21:40:27 +0530 Subject: [PATCH 039/207] auxdisplay/cfag12864bfb.c: Convert to use vm_map_pages_zero() While using mmap, the incorrect values of length and vm_pgoff are ignored and this driver goes ahead with mapping cfag12864b_buffer to user vma. Convert vm_insert_pages() to use vm_map_pages_zero(). We could later "fix" these drivers to behave according to the normal vm_pgoff offsetting simply by removing the _zero suffix on the function name and if that causes regressions, it gives us an easy way to revert. Signed-off-by: Souptick Joarder Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/cfag12864bfb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c index 40c8a552a478..4074886b7bc8 100644 --- a/drivers/auxdisplay/cfag12864bfb.c +++ b/drivers/auxdisplay/cfag12864bfb.c @@ -52,8 +52,9 @@ static const struct fb_var_screeninfo cfag12864bfb_var = { static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { - return vm_insert_page(vma, vma->vm_start, - virt_to_page(cfag12864b_buffer)); + struct page *pages = virt_to_page(cfag12864b_buffer); + + return vm_map_pages_zero(vma, &pages, 1); } static struct fb_ops cfag12864bfb_ops = { From f4bb1f895aa07dfcb96169192ff7c9154620df87 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 26 May 2019 21:41:10 +0530 Subject: [PATCH 040/207] auxdisplay/ht16k33.c: Convert to use vm_map_pages_zero() While using mmap, the incorrect values of length and vm_pgoff are ignored and this driver goes ahead with mapping fbdev.buffer to user vma. Convert vm_insert_pages() to use vm_map_pages_zero(). We could later "fix" these drivers to behave according to the normal vm_pgoff offsetting simply by removing the _zero suffix on the function name and if that causes regressions, it gives us an easy way to revert. Signed-off-by: Souptick Joarder Acked-by: Robin van der Gracht Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/ht16k33.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index 21393ec3b9a4..9c0bb771751d 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -223,9 +223,9 @@ static const struct backlight_ops ht16k33_bl_ops = { static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct ht16k33_priv *priv = info->par; + struct page *pages = virt_to_page(priv->fbdev.buffer); - return vm_insert_page(vma, vma->vm_start, - virt_to_page(priv->fbdev.buffer)); + return vm_map_pages_zero(vma, &pages, 1); } static struct fb_ops ht16k33_fb_ops = { From 32f010deab575199df4ebe7b6aec20c17bb7eccd Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 19 Jun 2019 13:27:16 -0700 Subject: [PATCH 041/207] x86/resctrl: Prevent possible overrun during bitmap operations While the DOC at the beginning of lib/bitmap.c explicitly states that "The number of valid bits in a given bitmap does _not_ need to be an exact multiple of BITS_PER_LONG.", some of the bitmap operations do indeed access BITS_PER_LONG portions of the provided bitmap no matter the size of the provided bitmap. For example, if find_first_bit() is provided with an 8 bit bitmap the operation will access BITS_PER_LONG bits from the provided bitmap. While the operation ensures that these extra bits do not affect the result, the memory is still accessed. The capacity bitmasks (CBMs) are typically stored in u32 since they can never exceed 32 bits. A few instances exist where a bitmap_* operation is performed on a CBM by simply pointing the bitmap operation to the stored u32 value. The consequence of this pattern is that some bitmap_* operations will access out-of-bounds memory when interacting with the provided CBM. This same issue has previously been addressed with commit 49e00eee0061 ("x86/intel_rdt: Fix out-of-bounds memory access in CBM tests") but at that time not all instances of the issue were fixed. Fix this by using an unsigned long to store the capacity bitmask data that is passed to bitmap functions. Fixes: e651901187ab ("x86/intel_rdt: Introduce "bit_usage" to display cache allocations details") Fixes: f4e80d67a527 ("x86/intel_rdt: Resctrl files reflect pseudo-locked information") Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults") Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: stable Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/58c9b6081fd9bf599af0dfc01a6fdd335768efef.1560975645.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++-------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 869cbef5da81..f9d8ed6ab03b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, */ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; + unsigned long val = *_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - if (*val == 0) + if (val == 0) return; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + *_val = (u32)val; } /* From 3647e42b55dcbf3b93457eb750660676e8df5010 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Jun 2019 10:56:03 +0100 Subject: [PATCH 042/207] afs: Fix over zealous "vnode modified" warnings Occasionally, warnings like this: vnode modified 2af7 on {10000b:1} [exp 2af2] YFS.FetchStatus(vnode) are emitted into the kernel log. This indicates that when we were applying the updated vnode (file) status retrieved from the server to an inode we saw that the data version number wasn't what we were expecting (in this case it's 0x2af7 rather than 0x2af2). We've usually received a callback from the server prior to this point - or the callback promise has lapsed - so the warning is merely informative and the state is to be expected. Fix this by only emitting the warning if the we still think that we have a valid callback promise and haven't received a callback. Also change the format slightly so so that the new data version doesn't look like part of the text, the like is prefixed with "kAFS: " and the message is ranked as a warning. Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Ian Wienand Signed-off-by: David Howells --- fs/afs/inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..dd8931345a8d 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -207,11 +207,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) From a6853b9ce81a8f32f3c13c30ae951bb6830a896a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2019 16:49:35 +0100 Subject: [PATCH 043/207] afs: Fix vlserver record corruption Because I made the afs_call struct share pointers to an afs_server object and an afs_vlserver object to save space, afs_put_call() calls afs_put_server() on afs_vlserver object (which is only meant for the afs_server object) because it sees that call->server isn't NULL. This means that the afs_vlserver object gets unpredictably and randomly modified, depending on what config options are set (such as lockdep). Fix this by getting rid of the union and having two non-overlapping pointers in the afs_call struct. Fixes: ffba718e9354 ("afs: Get rid of afs_call::reply[]") Signed-off-by: David Howells --- fs/afs/internal.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2073c1a3ab4b..c9495c8dea93 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -113,10 +113,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ From 90fa9b64523a645a97edc0bdcf2d74759957eeee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2019 16:49:35 +0100 Subject: [PATCH 044/207] afs: Fix uninitialised spinlock afs_volume::cb_break_lock Fix the cb_break_lock spinlock in afs_volume struct by initialising it when the volume record is allocated. Also rename the lock to cb_v_break_lock to distinguish it from the lock of the same name in the afs_server struct. Without this, the following trace may be observed when a volume-break callback is received: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 2 PID: 50 Comm: kworker/2:1 Not tainted 5.2.0-rc1-fscache+ #3045 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Workqueue: afs SRXAFSCB_CallBack Call Trace: dump_stack+0x67/0x8e register_lock_class+0x23b/0x421 ? check_usage_forwards+0x13c/0x13c __lock_acquire+0x89/0xf73 lock_acquire+0x13b/0x166 ? afs_break_callbacks+0x1b2/0x3dd _raw_write_lock+0x2c/0x36 ? afs_break_callbacks+0x1b2/0x3dd afs_break_callbacks+0x1b2/0x3dd ? trace_event_raw_event_afs_server+0x61/0xac SRXAFSCB_CallBack+0x11f/0x16c process_one_work+0x2c5/0x4ee ? worker_thread+0x234/0x2ac worker_thread+0x1d8/0x2ac ? cancel_delayed_work_sync+0xf/0xf kthread+0x11f/0x127 ? kthread_park+0x76/0x76 ret_from_fork+0x24/0x30 Fixes: 68251f0a6818 ("afs: Fix whole-volume callback handling") Signed-off-by: David Howells --- fs/afs/callback.c | 4 ++-- fs/afs/internal.h | 2 +- fs/afs/volume.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..915010464572 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c9495c8dea93..8252d69bd3e4 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -618,7 +618,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index f6eba2def0a1..3e8dbee09f87 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -47,6 +47,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); From 2cd42d19cffa0ec3dfb57b1b3e1a07a9bf4ed80a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Jun 2019 18:12:02 +0100 Subject: [PATCH 045/207] afs: Fix setting of i_blocks The setting of i_blocks, which is calculated from i_size, has got accidentally misordered relative to the setting of i_size when initially setting up an inode. Further, i_blocks isn't updated by afs_apply_status() when the size is updated. To fix this, break the i_size/i_blocks setting out into a helper function and call it from both places. Fixes: a58823ac4589 ("afs: Fix application of status and callback to be under same lock") Signed-off-by: David Howells --- fs/afs/inode.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index dd8931345a8d..18a50d4febcf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -55,6 +55,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren dump_stack(); } +/* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + /* * Initialise an inode from the vnode status. */ @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -232,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } From 240b4cc8fd5db138b675297d4226ec46594d9b3b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Jun 2019 09:05:41 +0200 Subject: [PATCH 046/207] scsi: vmw_pscsi: Fix use-after-free in pvscsi_queue_lck() Once we unlock adapter->hw_lock in pvscsi_queue_lck() nothing prevents just queued scsi_cmnd from completing and freeing the request. Thus cmd->cmnd[0] dereference can dereference already freed request leading to kernel crashes or other issues (which one of our customers observed). Store cmd->cmnd[0] in a local variable before unlocking adapter->hw_lock to fix the issue. CC: Signed-off-by: Jan Kara Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index ecee4b3ff073..377b07b2feeb 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd struct pvscsi_adapter *adapter = shost_priv(host); struct pvscsi_ctx *ctx; unsigned long flags; + unsigned char op; spin_lock_irqsave(&adapter->hw_lock, flags); @@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd } cmd->scsi_done = done; + op = cmd->cmnd[0]; dev_dbg(&cmd->device->sdev_gendev, - "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]); + "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op); spin_unlock_irqrestore(&adapter->hw_lock, flags); - pvscsi_kick_io(adapter, cmd->cmnd[0]); + pvscsi_kick_io(adapter, op); return 0; } From 637dfa0fad6d91a9a709dc70549a6d20fa77f615 Mon Sep 17 00:00:00 2001 From: Cedric Hombourger Date: Thu, 13 Jun 2019 10:52:50 +0200 Subject: [PATCH 047/207] MIPS: have "plain" make calls build dtbs for selected platforms scripts/package/builddeb calls "make dtbs_install" after executing a plain make (i.e. no build targets specified). It will fail if dtbs were not built beforehand. Match the arm64 architecture where DTBs get built by the "all" target. Signed-off-by: Cedric Hombourger [paul.burton@mips.com: s/builddep/builddeb] Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.1+ --- arch/mips/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 8f4486c4415b..eceff9b75b22 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -17,6 +17,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@ vmlinux.64: vmlinux $(call cmd,64) -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): $(vmlinux-32) FORCE From 919aef44d73d5d0c04213cb1bc31149cc074e65e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 19 Jun 2019 13:47:44 -0400 Subject: [PATCH 048/207] x86/efi: fix a -Wtype-limits compilation warning Compiling a kernel with W=1 generates this warning, arch/x86/platform/efi/quirks.c:731:16: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running ...") Signed-off-by: Qian Cai Acked-by: "Prakhya, Sai Praneeth" Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 632b83885867..3b9fd679cea9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so * page faulting on these addresses isn't expected. */ - if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + if (phys_addr <= 0x0fff) return; /* From 60c112b0ada09826cc4ae6a4e55df677f76f1313 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 21 Jun 2019 10:20:18 -0600 Subject: [PATCH 049/207] io_uring: ensure req->file is cleared on allocation Stephen reports: I hit the following General Protection Fault when testing io_uring via the io_uring engine in fio. This was on a VM running 5.2-rc5 and the latest version of fio. The issue occurs for both null_blk and fake NVMe drives. I have not tested bare metal or real NVMe SSDs. The fio script used is given below. [io_uring] time_based=1 runtime=60 filename=/dev/nvme2n1 (note /dev/nullb0 also fails) ioengine=io_uring bs=4k rw=readwrite direct=1 fixedbufs=1 sqthread_poll=1 sqthread_poll_cpu=0 general protection fault: 0000 [#1] SMP PTI CPU: 0 PID: 872 Comm: io_uring-sq Not tainted 5.2.0-rc5-cpacket-io-uring #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 RIP: 0010:fput_many+0x7/0x90 Code: 01 48 85 ff 74 17 55 48 89 e5 53 48 8b 1f e8 a0 f9 ff ff 48 85 db 48 89 df 75 f0 5b 5d f3 c3 0f 1f 40 00 0f 1f 44 00 00 89 f6 48 29 77 38 74 01 c3 55 48 89 e5 53 48 89 fb 65 48 \ RSP: 0018:ffffadeb817ebc50 EFLAGS: 00010246 RAX: 0000000000000004 RBX: ffff8f46ad477480 RCX: 0000000000001805 RDX: 0000000000000000 RSI: 0000000000000001 RDI: f18b51b9a39552b5 RBP: ffffadeb817ebc58 R08: ffff8f46b7a318c0 R09: 000000000000015d R10: ffffadeb817ebce8 R11: 0000000000000020 R12: ffff8f46ad4cd000 R13: 00000000fffffff7 R14: ffffadeb817ebe30 R15: 0000000000000004 FS: 0000000000000000(0000) GS:ffff8f46b7a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055828f0bbbf0 CR3: 0000000232176004 CR4: 00000000003606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? fput+0x13/0x20 io_free_req+0x20/0x40 io_put_req+0x1b/0x20 io_submit_sqe+0x40a/0x680 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 io_submit_sqes+0xb9/0x160 ? io_submit_sqes+0xb9/0x160 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __schedule+0x3f2/0x6a0 ? __switch_to_asm+0x34/0x70 io_sq_thread+0x1af/0x470 ? __switch_to_asm+0x34/0x70 ? wait_woken+0x80/0x80 ? __switch_to+0x85/0x410 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __schedule+0x3f2/0x6a0 kthread+0x105/0x140 ? io_submit_sqes+0x160/0x160 ? kthread+0x105/0x140 ? io_submit_sqes+0x160/0x160 ? kthread_destroy_worker+0x50/0x50 ret_from_fork+0x35/0x40 which occurs because using a kernel side submission thread isn't valid without using fixed files (registered through io_uring_register()). This causes io_uring to put the request after logging an error, but before the file field is set in the request. If it happens to be non-zero, we attempt to fput() garbage. Fix this by ensuring that req->file is initialized when the request is allocated. Cc: stable@vger.kernel.org # 5.1+ Reported-by: Stephen Bates Tested-by: Stephen Bates Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 86a2bd721900..485832deb7ea 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -1801,10 +1802,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || From 975a6166a8584ee4a1b8bd93098e49dc101d7171 Mon Sep 17 00:00:00 2001 From: Tian Baofeng Date: Wed, 12 Jun 2019 16:18:10 +0800 Subject: [PATCH 050/207] efibc: Replace variable set function in notifier call Replace the variable set function from "efivar_entry_set" to "efivar_entry_set_safe" in efibc panic notifier. In safe function parameter "block" will set to false and will call "efivar_entry_set_nonblocking"to set efi variables. efivar_entry_set_nonblocking is guaranteed to not block and is suitable for calling from crash/panic handlers. In UEFI android platform, when warm reset happens, with this change, efibc will not block the reboot process. Otherwise, set variable will call queue work and send to other offlined cpus then cause another panic, finally will cause reboot failure. Signed-off-by: Tian Baofeng Signed-off-by: Luo XinanX Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efibc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c index 61e099826cbb..35dccc88ac0a 100644 --- a/drivers/firmware/efi/efibc.c +++ b/drivers/firmware/efi/efibc.c @@ -43,11 +43,13 @@ static int efibc_set_variable(const char *name, const char *value) efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data); memcpy(&entry->var.VendorGuid, &guid, sizeof(guid)); - ret = efivar_entry_set(entry, - EFI_VARIABLE_NON_VOLATILE - | EFI_VARIABLE_BOOTSERVICE_ACCESS - | EFI_VARIABLE_RUNTIME_ACCESS, - size, entry->var.Data, NULL); + ret = efivar_entry_set_safe(entry->var.VariableName, + entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE + | EFI_VARIABLE_BOOTSERVICE_ACCESS + | EFI_VARIABLE_RUNTIME_ACCESS, + false, size, entry->var.Data); + if (ret) pr_err("failed to set %s EFI variable: 0x%x\n", name, ret); From ea136a112d89bade596314a1ae49f748902f4727 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Jun 2019 19:14:46 +0100 Subject: [PATCH 051/207] x86/apic: Fix integer overflow on 10 bit left shift of cpu_khz The left shift of unsigned int cpu_khz will overflow for large values of cpu_khz, so cast it to a long long before shifting it to avoid overvlow. For example, this can happen when cpu_khz is 4194305, i.e. ~4.2 GHz. Addresses-Coverity: ("Unintentional integer overflow") Fixes: 8c3ba8d04924 ("x86, apic: ack all pending irqs when crashed/on kexec") Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20190619181446.13635-1-colin.king@canonical.com --- arch/x86/kernel/apic/apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 177aa8ef2afa..85be316665b4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void) if (queued) { if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); + max_loops = (long long)cpu_khz << 10; + max_loops -= ntsc - tsc; } else { max_loops--; } From 2e5db6eb3c23e5dc8171eb8f6af7a97ef9fcf3a9 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Wed, 19 Jun 2019 14:29:42 +0200 Subject: [PATCH 052/207] be2net: fix link failure after ethtool offline test Certain cards in conjunction with certain switches need a little more time for link setup that results in ethtool link test failure after offline test. Patch adds a loop that waits for a link setup finish. Changes in v2: - added fixes header Fixes: 4276e47e2d1c ("be2net: Add link test to list of ethtool self tests.") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller --- .../net/ethernet/emulex/benet/be_ethtool.c | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 8a6785173228..492f8769ac12 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -891,7 +891,7 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) { struct be_adapter *adapter = netdev_priv(netdev); - int status; + int status, cnt; u8 link_status = 0; if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { @@ -902,6 +902,9 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM); + /* check link status before offline tests */ + link_status = netif_carrier_ok(netdev); + if (test->flags & ETH_TEST_FL_OFFLINE) { if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0) test->flags |= ETH_TEST_FL_FAILED; @@ -922,13 +925,26 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, test->flags |= ETH_TEST_FL_FAILED; } - status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); - if (status) { - test->flags |= ETH_TEST_FL_FAILED; - data[4] = -1; - } else if (!link_status) { + /* link status was down prior to test */ + if (!link_status) { test->flags |= ETH_TEST_FL_FAILED; data[4] = 1; + return; + } + + for (cnt = 10; cnt; cnt--) { + status = be_cmd_link_status_query(adapter, NULL, &link_status, + 0); + if (status) { + test->flags |= ETH_TEST_FL_FAILED; + data[4] = -1; + break; + } + + if (link_status) + break; + + msleep_interruptible(500); } } From aad1dcc4f011ea409850e040363dff1e59aa4175 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Jun 2019 15:34:07 +0200 Subject: [PATCH 053/207] ppp: mppe: Add softdep to arc4 The arc4 crypto is mandatory at ppp_mppe probe time, so let's put a softdep line, so that the corresponding module gets prepared gracefully. Without this, a simple inclusion to initrd via dracut failed due to the missing dependency, for example. Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_mppe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index ff61dd8748de..66c8e65f6872 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -63,6 +63,7 @@ MODULE_AUTHOR("Frank Cusack "); MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); +MODULE_SOFTDEP("pre: arc4"); MODULE_VERSION("1.0.2"); static unsigned int From a1e5388b4d5fc78688e5e9ee6641f779721d6291 Mon Sep 17 00:00:00 2001 From: Roland Hii Date: Wed, 19 Jun 2019 22:13:48 +0800 Subject: [PATCH 054/207] net: stmmac: fixed new system time seconds value calculation When ADDSUB bit is set, the system time seconds field is calculated as the complement of the seconds part of the update value. For example, if 3.000000001 seconds need to be subtracted from the system time, this field is calculated as 2^32 - 3 = 4294967296 - 3 = 0x100000000 - 3 = 0xFFFFFFFD Previously, the 0x100000000 is mistakenly written as 100000000. This is further simplified from sec = (0x100000000ULL - sec); to sec = -sec; Fixes: ba1ffd74df74 ("stmmac: fix PTP support for GMAC4") Signed-off-by: Roland Hii Signed-off-by: Ong Boon Leong Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 2dcdf761d525..020159622559 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -112,7 +112,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, * programmed with (2^32 – ) */ if (gmac4) - sec = (100000000ULL - sec); + sec = -sec; value = readl(ioaddr + PTP_TCR); if (value & PTP_TCR_TSCTRLSSR) From d0bb82fd60183868f46c8ccc595a3d61c3334a18 Mon Sep 17 00:00:00 2001 From: Roland Hii Date: Wed, 19 Jun 2019 22:41:48 +0800 Subject: [PATCH 055/207] net: stmmac: set IC bit when transmitting frames with HW timestamp When transmitting certain PTP frames, e.g. SYNC and DELAY_REQ, the PTP daemon, e.g. ptp4l, is polling the driver for the frame transmit hardware timestamp. The polling will most likely timeout if the tx coalesce is enabled due to the Interrupt-on-Completion (IC) bit is not set in tx descriptor for those frames. This patch will ignore the tx coalesce parameter and set the IC bit when transmitting PTP frames which need to report out the frame transmit hardware timestamp to user space. Fixes: f748be531d70 ("net: stmmac: Rework coalesce timer and fix multi-queue races") Signed-off-by: Roland Hii Signed-off-by: Ong Boon Leong Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06dd51f47cfd..06358fe5b245 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2947,12 +2947,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage tx mitigation */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); @@ -3166,12 +3169,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * element in case of no SG. */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); From 8ac8a01092b2added0749ef937037bf1912e13e3 Mon Sep 17 00:00:00 2001 From: Sergej Benilov Date: Thu, 20 Jun 2019 11:02:18 +0200 Subject: [PATCH 056/207] sis900: fix TX completion Since commit 605ad7f184b60cfaacbc038aa6c55ee68dee3c89 "tcp: refine TSO autosizing", outbound throughput is dramatically reduced for some connections, as sis900 is doing TX completion within idle states only. Make TX completion happen after every transmitted packet. Test: netperf before patch: > netperf -H remote -l -2000000 -- -s 1000000 MIGRATED TCP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 95.223.112.76 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 327680 327680 253.44 0.06 after patch: > netperf -H remote -l -10000000 -- -s 1000000 MIGRATED TCP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 95.223.112.76 () port 0 AF_INET : demo Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 327680 327680 5.38 14.89 Thx to Dave Miller and Eric Dumazet for helpful hints Signed-off-by: Sergej Benilov Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 67f9bb6e941b..9b036c857b1d 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); @@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev) sw32(txdp, sis_priv->tx_ring_dma); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); } /** @@ -1618,7 +1618,7 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) spin_unlock_irqrestore(&sis_priv->lock, flags); return NETDEV_TX_OK; } - sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len); + sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len); sw32(cr, TxENA | sr32(cr)); sis_priv->cur_tx ++; @@ -1674,7 +1674,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) do { status = sr32(isr); - if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0) + if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0) /* nothing intresting happened */ break; handled = 1; @@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) /* Rx interrupt */ sis900_rx(net_dev); - if (status & (TxURN | TxERR | TxIDLE)) + if (status & (TxURN | TxERR | TxIDLE | TxDESC)) /* Tx interrupt */ sis900_finish_xmit(net_dev); @@ -1896,8 +1896,8 @@ static void sis900_finish_xmit (struct net_device *net_dev) if (tx_status & OWN) { /* The packet is not transmitted yet (owned by hardware) ! - * Note: the interrupt is generated only when Tx Machine - * is idle, so this is an almost impossible case */ + * Note: this is an almost impossible condition + * in case of TxDESC ('descriptor interrupt') */ break; } @@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); From c492d4c74dd3f87559883ffa0f94a8f1ae3fe5f5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 20 Jun 2019 18:39:28 +0800 Subject: [PATCH 057/207] tipc: change to use register_pernet_device This patch is to fix a dst defcnt leak, which can be reproduced by doing: # ip net a c; ip net a s; modprobe tipc # ip net e s ip l a n eth1 type veth peer n eth1 netns c # ip net e c ip l s lo up; ip net e c ip l s eth1 up # ip net e s ip l s lo up; ip net e s ip l s eth1 up # ip net e c ip a a 1.1.1.2/8 dev eth1 # ip net e s ip a a 1.1.1.1/8 dev eth1 # ip net e c tipc b e m udp n u1 localip 1.1.1.2 # ip net e s tipc b e m udp n u1 localip 1.1.1.1 # ip net d c; ip net d s; rmmod tipc and it will get stuck and keep logging the error: unregister_netdevice: waiting for lo to become free. Usage count = 1 The cause is that a dst is held by the udp sock's sk_rx_dst set on udp rx path with udp_early_demux == 1, and this dst (eventually holding lo dev) can't be released as bearer's removal in tipc pernet .exit happens after lo dev's removal, default_device pernet .exit. "There are two distinct types of pernet_operations recognized: subsys and device. At creation all subsys init functions are called before device init functions, and at destruction all device exit functions are called before subsys exit function." So by calling register_pernet_device instead to register tipc_net_ops, the pernet .exit() will be invoked earlier than loopback dev's removal when a netns is being destroyed, as fou/gue does. Note that vxlan and geneve udp tunnels don't have this issue, as the udp sock is released in their device ndo_stop(). This fix is also necessary for tipc dst_cache, which will hold dsts on tx path and I will introduce in my next patch. Reported-by: Li Shuang Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index ed536c05252a..c8370722f0bb 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -134,7 +134,7 @@ static int __init tipc_init(void) if (err) goto out_sysctl; - err = register_pernet_subsys(&tipc_net_ops); + err = register_pernet_device(&tipc_net_ops); if (err) goto out_pernet; @@ -142,7 +142,7 @@ static int __init tipc_init(void) if (err) goto out_socket; - err = register_pernet_subsys(&tipc_topsrv_net_ops); + err = register_pernet_device(&tipc_topsrv_net_ops); if (err) goto out_pernet_topsrv; @@ -153,11 +153,11 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop(); out_socket: - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: @@ -172,9 +172,9 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { tipc_bearer_cleanup(); - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); tipc_netlink_stop(); tipc_netlink_compat_stop(); tipc_unregister_sysctl(); From 191f5c2ed4b6fabacf1f3500242047bd844d0c3a Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 10 Jun 2019 06:24:04 +0000 Subject: [PATCH 058/207] mtd: spi-nor: use 16-bit WRR command when QE is set on spansion flashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPI memory devices from different manufacturers have widely different configurations for Status, Control and Configuration registers. JEDEC 216C defines a new map for these common register bits and their functions, and describes how the individual bits may be accessed for a specific device. For the JEDEC 216B compliant flashes, we can partially deduce Status and Configuration registers functions by inspecting the 16th DWORD of BFPT. Older flashes that don't declare the SFDP tables (SPANSION FL512SAIFG1 311QQ063 A ©11 SPANSION) let the software decide how to interact with these registers. The commit dcb4b22eeaf4 ("spi-nor: s25fl512s supports region locking") uncovered a probe error for s25fl512s, when the Quad Enable bit CR[1] was set to one in the bootloader. When this bit is one, only the Write Status (01h) command with two data byts may be used, the 01h command with one data byte is not recognized and hence the error when trying to clear the block protection bits. Fix the above by using the Write Status (01h) command with two data bytes when the Quad Enable bit is one. Backward compatibility should be fine. The newly introduced spi_nor_spansion_clear_sr_bp() is tightly coupled with the spansion_quad_enable() function. Both assume that the Write Register with 16 bits, together with the Read Configuration Register (35h) instructions are supported. Fixes: dcb4b22eeaf44f91 ("spi-nor: s25fl512s supports region locking") Reported-by: Geert Uytterhoeven Signed-off-by: Tudor Ambarus Tested-by: Jonas Bonn Tested-by: Geert Uytterhoeven Reviewed-by: Vignesh Raghavendra Tested-by: Vignesh Raghavendra Signed-off-by: Miquel Raynal --- drivers/mtd/spi-nor/spi-nor.c | 119 ++++++++++++++++++++++++++++++---- include/linux/mtd/spi-nor.h | 3 + 2 files changed, 111 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 73172d7f512b..0c2ec1c21434 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1636,6 +1636,95 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor) return 0; } +/** + * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits. + * @nor: pointer to a 'struct spi_nor' + * + * Read-modify-write function that clears the Block Protection bits from the + * Status Register without affecting other bits. + * + * Return: 0 on success, -errno otherwise. + */ +static int spi_nor_clear_sr_bp(struct spi_nor *nor) +{ + int ret; + u8 mask = SR_BP2 | SR_BP1 | SR_BP0; + + ret = read_sr(nor); + if (ret < 0) { + dev_err(nor->dev, "error while reading status register\n"); + return ret; + } + + write_enable(nor); + + ret = write_sr(nor, ret & ~mask); + if (ret) { + dev_err(nor->dev, "write to status register failed\n"); + return ret; + } + + ret = spi_nor_wait_till_ready(nor); + if (ret) + dev_err(nor->dev, "timeout while writing status register\n"); + return ret; +} + +/** + * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection + * bits on spansion flashes. + * @nor: pointer to a 'struct spi_nor' + * + * Read-modify-write function that clears the Block Protection bits from the + * Status Register without affecting other bits. The function is tightly + * coupled with the spansion_quad_enable() function. Both assume that the Write + * Register with 16 bits, together with the Read Configuration Register (35h) + * instructions are supported. + * + * Return: 0 on success, -errno otherwise. + */ +static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor) +{ + int ret; + u8 mask = SR_BP2 | SR_BP1 | SR_BP0; + u8 sr_cr[2] = {0}; + + /* Check current Quad Enable bit value. */ + ret = read_cr(nor); + if (ret < 0) { + dev_err(nor->dev, + "error while reading configuration register\n"); + return ret; + } + + /* + * When the configuration register Quad Enable bit is one, only the + * Write Status (01h) command with two data bytes may be used. + */ + if (ret & CR_QUAD_EN_SPAN) { + sr_cr[1] = ret; + + ret = read_sr(nor); + if (ret < 0) { + dev_err(nor->dev, + "error while reading status register\n"); + return ret; + } + sr_cr[0] = ret & ~mask; + + ret = write_sr_cr(nor, sr_cr); + if (ret) + dev_err(nor->dev, "16-bit write register failed\n"); + return ret; + } + + /* + * If the Quad Enable bit is zero, use the Write Status (01h) command + * with one data byte. + */ + return spi_nor_clear_sr_bp(nor); +} + /* Used when the "_ext_id" is two bytes at most */ #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ .id = { \ @@ -3660,6 +3749,8 @@ static int spi_nor_init_params(struct spi_nor *nor, default: /* Kept only for backward compatibility purpose. */ params->quad_enable = spansion_quad_enable; + if (nor->clear_sr_bp) + nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp; break; } @@ -3912,17 +4003,13 @@ static int spi_nor_init(struct spi_nor *nor) { int err; - /* - * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up - * with the software protection bits set - */ - if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL || - JEDEC_MFR(nor->info) == SNOR_MFR_INTEL || - JEDEC_MFR(nor->info) == SNOR_MFR_SST || - nor->info->flags & SPI_NOR_HAS_LOCK) { - write_enable(nor); - write_sr(nor, 0); - spi_nor_wait_till_ready(nor); + if (nor->clear_sr_bp) { + err = nor->clear_sr_bp(nor); + if (err) { + dev_err(nor->dev, + "fail to clear block protection bits\n"); + return err; + } } if (nor->quad_enable) { @@ -4047,6 +4134,16 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, if (info->flags & SPI_S3AN) nor->flags |= SNOR_F_READY_XSR_RDY; + /* + * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up + * with the software protection bits set. + */ + if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL || + JEDEC_MFR(nor->info) == SNOR_MFR_INTEL || + JEDEC_MFR(nor->info) == SNOR_MFR_SST || + nor->info->flags & SPI_NOR_HAS_LOCK) + nor->clear_sr_bp = spi_nor_clear_sr_bp; + /* Parse the Serial Flash Discoverable Parameters table. */ ret = spi_nor_init_params(nor, ¶ms); if (ret) diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index b3d360b0ee3d..9f57cdfcc93d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -373,6 +373,8 @@ struct flash_info; * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is * @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode + * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from + * the SPI NOR Status Register. * completely locked * @priv: the private data */ @@ -410,6 +412,7 @@ struct spi_nor { int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*quad_enable)(struct spi_nor *nor); + int (*clear_sr_bp)(struct spi_nor *nor); void *priv; }; From 45d5cb137c3638b3a310f41b31d8e79daf647f14 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Jun 2019 21:44:37 +0800 Subject: [PATCH 059/207] net/sched: cbs: Fix error path of cbs_module_init If register_qdisc fails, we should unregister netdevice notifier. Reported-by: Hulk Robot Fixes: e0a7683d30e9 ("net/sched: cbs: fix port_rate miscalculation") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/sched/sch_cbs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index e16a3d37d2bc..732e109c3055 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -549,12 +549,17 @@ static struct notifier_block cbs_device_notifier = { static int __init cbs_module_init(void) { - int err = register_netdevice_notifier(&cbs_device_notifier); + int err; + err = register_netdevice_notifier(&cbs_device_notifier); if (err) return err; - return register_qdisc(&cbs_qdisc_ops); + err = register_qdisc(&cbs_qdisc_ops); + if (err) + unregister_netdevice_notifier(&cbs_device_notifier); + + return err; } static void __exit cbs_module_exit(void) From 3cf10132ac8d536565f2c02f60a3aeb315863a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Tue, 18 Jun 2019 17:58:34 +0200 Subject: [PATCH 060/207] ARM: dts: imx6ul: fix PWM[1-4] interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the i.MX6UL/L RM, table 3.1 "ARM Cortex A7 domain interrupt summary", the interrupts for the PWM[1-4] go from 83 to 86. Fixes: b9901fe84f02 ("ARM: dts: imx6ul: add pwm[1-4] nodes") Signed-off-by: Sébastien Szymanski Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index bbf010c73336..a7f6d1d58e20 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -358,7 +358,7 @@ tsc: tsc@2040000 { pwm1: pwm@2080000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02080000 0x4000>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6UL_CLK_PWM1>, <&clks IMX6UL_CLK_PWM1>; clock-names = "ipg", "per"; @@ -369,7 +369,7 @@ pwm1: pwm@2080000 { pwm2: pwm@2084000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02084000 0x4000>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6UL_CLK_PWM2>, <&clks IMX6UL_CLK_PWM2>; clock-names = "ipg", "per"; @@ -380,7 +380,7 @@ pwm2: pwm@2084000 { pwm3: pwm@2088000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02088000 0x4000>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6UL_CLK_PWM3>, <&clks IMX6UL_CLK_PWM3>; clock-names = "ipg", "per"; @@ -391,7 +391,7 @@ pwm3: pwm@2088000 { pwm4: pwm@208c000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x0208c000 0x4000>; - interrupts = ; + interrupts = ; clocks = <&clks IMX6UL_CLK_PWM4>, <&clks IMX6UL_CLK_PWM4>; clock-names = "ipg", "per"; From 9014143bab2f3bc0b9e5db3bc8d00e2a43e50fbd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 23 Jun 2019 14:27:17 +0300 Subject: [PATCH 061/207] fork: don't check parent_tidptr with CLONE_PIDFD Give userspace a cheap and reliable way to tell whether CLONE_PIDFD is supported by the kernel or not. The easiest way is to pass an invalid file descriptor value in parent_tidptr, perform the syscall and verify that parent_tidptr has been changed to a valid file descriptor value. CLONE_PIDFD uses parent_tidptr to return pidfds. CLONE_PARENT_SETTID will use parent_tidptr to return the tid of the parent. The two flags cannot be used together. Old kernels that only support CLONE_PARENT_SETTID will not verify the value pointed to by parent_tidptr. This behavior is unchanged even with the introduction of CLONE_PIDFD. However, if CLONE_PIDFD is specified the kernel will currently check the value pointed to by parent_tidptr before placing the pidfd in the memory pointed to. EINVAL will be returned if the value in parent_tidptr is not 0. If CLONE_PIDFD is supported and fd 0 is closed, then the returned pidfd can and likely will be 0 and parent_tidptr will be unchanged. This means userspace must either check CLONE_PIDFD support beforehand or check that fd 0 is not closed when invoking CLONE_PIDFD. The check for pidfd == 0 was introduced during the v5.2 merge window by commit b3e583825266 ("clone: add CLONE_PIDFD") to ensure that CLONE_PIDFD could be potentially extended by passing in flags through the return argument. However, that extension would look horrible, and with the upcoming introduction of the clone3 syscall in v5.3 there is no need to extend legacy clone syscall this way. (Even if it would need to be extended, CLONE_DETACHED can be reused with CLONE_PIDFD.) So remove the pidfd == 0 check. Userspace that needs to be portable to kernels without CLONE_PIDFD support can then be advised to initialize pidfd to -1 and check the pidfd value returned by CLONE_PIDFD. Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Dmitry V. Levin Signed-off-by: Christian Brauner --- kernel/fork.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..39a3adaa4ad1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1822,8 +1822,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1834,16 +1832,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* From bee19cd8f241ab3cd1bf79e03884e5371f9ef514 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 23 Jun 2019 14:28:00 +0300 Subject: [PATCH 062/207] samples: make pidfd-metadata fail gracefully on older kernels Initialize pidfd to an invalid descriptor, to fail gracefully on those kernels that do not implement CLONE_PIDFD and leave pidfd unchanged. Signed-off-by: Dmitry V. Levin Signed-off-by: Christian Brauner --- samples/pidfd/pidfd-metadata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c index 14b454448429..c459155daf9a 100644 --- a/samples/pidfd/pidfd-metadata.c +++ b/samples/pidfd/pidfd-metadata.c @@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd) int main(int argc, char *argv[]) { - int pidfd = 0, ret = EXIT_FAILURE; + int pidfd = -1, ret = EXIT_FAILURE; char buf[4096] = { 0 }; pid_t pid; int procfd, statusfd; @@ -91,7 +91,11 @@ int main(int argc, char *argv[]) pid = pidfd_clone(CLONE_PIDFD, &pidfd); if (pid < 0) - exit(ret); + err(ret, "CLONE_PIDFD"); + if (pidfd == -1) { + warnx("CLONE_PIDFD is not supported by the kernel"); + goto out; + } procfd = pidfd_metadata_fd(pid, pidfd); close(pidfd); From 63b2de12b7eeacfb2edbe005f5c3cff17a2a02e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Jun 2019 22:06:05 +0300 Subject: [PATCH 063/207] mfd: stmfx: Fix an endian bug in stmfx_irq_handler() It's not okay to cast a "u32 *" to "unsigned long *" when you are doing a for_each_set_bit() loop because that will break on big endian systems. Fixes: 386145601b82 ("mfd: stmfx: Uninitialized variable in stmfx_irq_handler()") Reported-by: Linus Torvalds Signed-off-by: Dan Carpenter Tested-by: Amelie Delaunay Signed-off-by: Lee Jones --- drivers/mfd/stmfx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index 7c419c078688..857991cb3cbb 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -204,6 +204,7 @@ static struct irq_chip stmfx_irq_chip = { static irqreturn_t stmfx_irq_handler(int irq, void *data) { struct stmfx *stmfx = data; + unsigned long bits; u32 pending, ack; int n, ret; @@ -222,7 +223,8 @@ static irqreturn_t stmfx_irq_handler(int irq, void *data) return IRQ_NONE; } - for_each_set_bit(n, (unsigned long *)&pending, STMFX_REG_IRQ_SRC_MAX) + bits = pending; + for_each_set_bit(n, &bits, STMFX_REG_IRQ_SRC_MAX) handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n)); return IRQ_HANDLED; From 9354544cbccf68da1b047f8fb7b47630e3c8a59d Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Sun, 23 Jun 2019 21:26:58 -0700 Subject: [PATCH 064/207] net/tls: fix page double free on TX cleanup With commit 94850257cf0f ("tls: Fix tls_device handling of partial records") a new path was introduced to cleanup partial records during sk_proto_close. This path does not handle the SW KTLS tx_list cleanup. This is unnecessary though since the free_resources calls for both SW and offload paths will cleanup a partial record. The visible effect is the following warning, but this bug also causes a page double free. WARNING: CPU: 7 PID: 4000 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 RIP: 0010:sk_stream_kill_queues+0x103/0x110 RSP: 0018:ffffb6df87e07bd0 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff8c21db4971c0 RCX: 0000000000000007 RDX: ffffffffffffffa0 RSI: 000000000000001d RDI: ffff8c21db497270 RBP: ffff8c21db497270 R08: ffff8c29f4748600 R09: 000000010020001a R10: ffffb6df87e07aa0 R11: ffffffff9a445600 R12: 0000000000000007 R13: 0000000000000000 R14: ffff8c21f03f2900 R15: ffff8c21f03b8df0 Call Trace: inet_csk_destroy_sock+0x55/0x100 tcp_close+0x25d/0x400 ? tcp_check_oom+0x120/0x120 tls_sk_proto_close+0x127/0x1c0 inet_release+0x3c/0x60 __sock_release+0x3d/0xb0 sock_close+0x11/0x20 __fput+0xd8/0x210 task_work_run+0x84/0xa0 do_exit+0x2dc/0xb90 ? release_sock+0x43/0x90 do_group_exit+0x3a/0xa0 get_signal+0x295/0x720 do_signal+0x36/0x610 ? SYSC_recvfrom+0x11d/0x130 exit_to_usermode_loop+0x69/0xb0 do_syscall_64+0x173/0x180 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fe9b9abc10d RSP: 002b:00007fe9b19a1d48 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: fffffffffffffe00 RBX: 0000000000000006 RCX: 00007fe9b9abc10d RDX: 0000000000000002 RSI: 0000000000000080 RDI: 00007fe948003430 RBP: 00007fe948003410 R08: 00007fe948003430 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00005603739d9080 R13: 00007fe9b9ab9f90 R14: 00007fe948003430 R15: 0000000000000000 Fixes: 94850257cf0f ("tls: Fix tls_device handling of partial records") Signed-off-by: Dirk van der Merwe Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/tls.h | 15 --------------- net/tls/tls_main.c | 3 ++- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 4a55ce6a303f..53d96bca220d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -373,21 +373,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx) return !!ctx->partially_sent_record; } -static inline int tls_complete_pending_work(struct sock *sk, - struct tls_context *ctx, - int flags, long *timeo) -{ - int rc = 0; - - if (unlikely(sk->sk_write_pending)) - rc = wait_on_pending_writer(sk, timeo); - - if (!rc && tls_is_partially_sent_record(ctx)) - rc = tls_push_partial_record(sk, ctx, flags); - - return rc; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fc81ae18cc44..e2b69e805d46 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) goto skip_tx_cleanup; } - if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + if (unlikely(sk->sk_write_pending) && + !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); /* We need these for tls_sw_fallback handling of other packets */ From 55655e3d1197fff16a7a05088fb0e5eba50eac55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jun 2019 02:38:20 -0700 Subject: [PATCH 065/207] net/packet: fix memory leak in packet_set_ring() syzbot found we can leak memory in packet_set_ring(), if user application provides buggy parameters. Fixes: 7f953ab2ba46 ("af_packet: TX_RING support for TPACKET_V3") Signed-off-by: Eric Dumazet Cc: Sowmini Varadhan Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a29d66da7394..0b4cf94f0233 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4314,7 +4314,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; - goto out; + goto out_free_pg_vec; } } break; @@ -4378,6 +4378,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, prb_shutdown_retire_blk_timer(po, rb_queue); } +out_free_pg_vec: if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: From 2bf4ecbcc7d837903c5967c9274f41b1ad29d530 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 21 Jun 2019 17:26:35 +0200 Subject: [PATCH 066/207] net: macb: do not copy the mac address if NULL This patch fixes the MAC address setup in the probe. The MAC address retrieved using of_get_mac_address was checked for not containing an error, but it may also be NULL which wasn't tested. Fix it by replacing IS_ERR with IS_ERR_OR_NULL. Fixes: 541ddc66d665 ("net: macb: support of_get_mac_address new ERR_PTR error") Signed-off-by: Antoine Tenart Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2375a13bb446..262a28ff81fc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4180,7 +4180,7 @@ static int macb_probe(struct platform_device *pdev) if (PTR_ERR(mac) == -EPROBE_DEFER) { err = -EPROBE_DEFER; goto err_out_free_netdev; - } else if (!IS_ERR(mac)) { + } else if (!IS_ERR_OR_NULL(mac)) { ether_addr_copy(bp->dev->dev_addr, mac); } else { macb_get_hwaddr(bp); From 4f07b80c973348a99b5d2a32476a2e7877e94a05 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 25 Jun 2019 00:28:19 +0800 Subject: [PATCH 067/207] tipc: check msg->req data len in tipc_nl_compat_bearer_disable This patch is to fix an uninit-value issue, reported by syzbot: BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:981 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310 memchr+0xce/0x110 lib/string.c:981 string_is_valid net/tipc/netlink_compat.c:176 [inline] tipc_nl_compat_bearer_disable+0x2a1/0x480 net/tipc/netlink_compat.c:449 __tipc_nl_compat_doit net/tipc/netlink_compat.c:327 [inline] tipc_nl_compat_doit+0x3ac/0xb00 net/tipc/netlink_compat.c:360 tipc_nl_compat_handle net/tipc/netlink_compat.c:1178 [inline] tipc_nl_compat_recv+0x1b1b/0x27b0 net/tipc/netlink_compat.c:1281 TLV_GET_DATA_LEN() may return a negtive int value, which will be used as size_t (becoming a big unsigned long) passed into memchr, cause this issue. Similar to what it does in tipc_nl_compat_bearer_enable(), this fix is to return -EINVAL when TLV_GET_DATA_LEN() is negtive in tipc_nl_compat_bearer_disable(), as well as in tipc_nl_compat_link_stat_dump() and tipc_nl_compat_link_reset_stats(). v1->v2: - add the missing Fixes tags per Eric's request. Fixes: 0762216c0ad2 ("tipc: fix uninit-value in tipc_nl_compat_bearer_enable") Fixes: 8b66fee7f8ee ("tipc: fix uninit-value in tipc_nl_compat_link_reset_stats") Reported-by: syzbot+30eaa8bf392f7fafffaf@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index c6a04c09d075..cf155061c472 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -539,7 +543,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, name = (char *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -817,7 +825,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; From 904d88d743b0c94092c5117955eab695df8109e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 24 Jun 2019 18:45:11 +0200 Subject: [PATCH 068/207] qmi_wwan: Fix out-of-bounds read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The syzbot reported Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 print_address_description+0x67/0x231 mm/kasan/report.c:188 __kasan_report.cold+0x1a/0x32 mm/kasan/report.c:317 kasan_report+0xe/0x20 mm/kasan/common.c:614 qmi_wwan_probe+0x342/0x360 drivers/net/usb/qmi_wwan.c:1417 usb_probe_interface+0x305/0x7a0 drivers/usb/core/driver.c:361 really_probe+0x281/0x660 drivers/base/dd.c:509 driver_probe_device+0x104/0x210 drivers/base/dd.c:670 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:777 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 Caused by too many confusing indirections and casts. id->driver_info is a pointer stored in a long. We want the pointer here, not the address of it. Thanks-to: Hillf Danton Reported-by: syzbot+b68605d7fadd21510de1@syzkaller.appspotmail.com Cc: Kristian Evensen Fixes: e4bf63482c30 ("qmi_wwan: Add quirk for Quectel dynamic config") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d080f8048e52..8b4ad10cf940 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1482,7 +1482,7 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)&id->driver_info; + info = (void *)id->driver_info; if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { if (desc->bNumEndpoints == 2) From 913a90bc5a3a06b1f04c337320e9aeee2328dd77 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 4 Jun 2019 09:59:53 +0530 Subject: [PATCH 069/207] perf/ioctl: Add check for the sample_period value perf_event_open() limits the sample_period to 63 bits. See: 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period to 63 bits") Make ioctl() consistent with it. Also on PowerPC, negative sample_period could cause a recursive PMIs leading to a hang (reported when running perf-fuzzer). Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: maddy@linux.vnet.ibm.com Cc: mpe@ellerman.id.au Fixes: 0819b2e30ccb ("perf: Limit perf_event_attr::sample_period to 63 bits") Link: https://lkml.kernel.org/r/20190604042953.914-1-ravi.bangoria@linux.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2e32faac5511..8d1c62df20a7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; From e321d02db87af7840da29ef833a2a71fc0eab198 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 May 2019 15:08:30 -0700 Subject: [PATCH 070/207] perf/x86: Disable extended registers for non-supported PMUs The perf fuzzer caused Skylake machine to crash: [ 9680.085831] Call Trace: [ 9680.088301] [ 9680.090363] perf_output_sample_regs+0x43/0xa0 [ 9680.094928] perf_output_sample+0x3aa/0x7a0 [ 9680.099181] perf_event_output_forward+0x53/0x80 [ 9680.103917] __perf_event_overflow+0x52/0xf0 [ 9680.108266] ? perf_trace_run_bpf_submit+0xc0/0xc0 [ 9680.113108] perf_swevent_hrtimer+0xe2/0x150 [ 9680.117475] ? check_preempt_wakeup+0x181/0x230 [ 9680.122091] ? check_preempt_curr+0x62/0x90 [ 9680.126361] ? ttwu_do_wakeup+0x19/0x140 [ 9680.130355] ? try_to_wake_up+0x54/0x460 [ 9680.134366] ? reweight_entity+0x15b/0x1a0 [ 9680.138559] ? __queue_work+0x103/0x3f0 [ 9680.142472] ? update_dl_rq_load_avg+0x1cd/0x270 [ 9680.147194] ? timerqueue_del+0x1e/0x40 [ 9680.151092] ? __remove_hrtimer+0x35/0x70 [ 9680.155191] __hrtimer_run_queues+0x100/0x280 [ 9680.159658] hrtimer_interrupt+0x100/0x220 [ 9680.163835] smp_apic_timer_interrupt+0x6a/0x140 [ 9680.168555] apic_timer_interrupt+0xf/0x20 [ 9680.172756] The XMM registers can only be collected by PEBS hardware events on the platforms with PEBS baseline support, e.g. Icelake, not software/probe events. Add capabilities flag PERF_PMU_CAP_EXTENDED_REGS to indicate the PMU which support extended registers. For X86, the extended registers are XMM registers. Add has_extended_regs() to check if extended registers are applied. The generic code define the mask of extended registers as 0 if arch headers haven't overridden it. Originally-by: Peter Zijlstra (Intel) Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 878068ea270e ("perf/x86: Support outputting XMM registers") Link: https://lkml.kernel.org/r/1559081314-9714-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 1 + arch/x86/include/uapi/asm/perf_regs.h | 3 +++ include/linux/perf_event.h | 1 + include/linux/perf_regs.h | 8 ++++++++ kernel/events/core.c | 18 ++++++++++++++---- 5 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7acc526b4ad2..6cb38ab02c8a 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2020,6 +2020,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; + x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ x86_pmu.pebs_no_xmm_regs = 1; diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0ab99c7b652d..2bca72f3028b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -241,6 +241,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_INTERRUPT 0x01 #define PERF_PMU_CAP_NO_NMI 0x02 #define PERF_PMU_CAP_AUX_NO_SG 0x04 +#define PERF_PMU_CAP_EXTENDED_REGS 0x08 #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 476747456bca..2d12e97d5e7b 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -11,6 +11,11 @@ struct perf_regs { #ifdef CONFIG_HAVE_PERF_REGS #include + +#ifndef PERF_REG_EXTENDED_MASK +#define PERF_REG_EXTENDED_MASK 0 +#endif + u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); @@ -18,6 +23,9 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs, struct pt_regs *regs_user_copy); #else + +#define PERF_REG_EXTENDED_MASK 0 + static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { return 0; diff --git a/kernel/events/core.c b/kernel/events/core.c index 8d1c62df20a7..f85929ce13be 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10036,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static inline bool has_extended_regs(struct perf_event *event) +{ + return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) || + (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK); +} + static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) { struct perf_event_context *ctx = NULL; @@ -10067,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) perf_event_ctx_unlock(event->group_leader, ctx); if (!ret) { + if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) && + has_extended_regs(event)) + ret = -EOPNOTSUPP; + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && - event_has_any_exclude_flag(event)) { - if (event->destroy) - event->destroy(event); + event_has_any_exclude_flag(event)) ret = -EINVAL; - } + + if (ret && event->destroy) + event->destroy(event); } if (ret) From 90d424915ab6550826d297fd62df8ee255345b95 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 May 2019 15:08:31 -0700 Subject: [PATCH 071/207] perf/x86/regs: Check reserved bits The perf fuzzer triggers a warning which map to: if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) return 0; The bits between XMM registers and generic registers are reserved. But perf_reg_validate() doesn't check these bits. Add PERF_REG_X86_RESERVED for reserved bits on X86. Check the reserved bits in perf_reg_validate(). Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 878068ea270e ("perf/x86: Support outputting XMM registers") Link: https://lkml.kernel.org/r/1559081314-9714-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/perf_regs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 07c30ee17425..bb7e1132290b 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return regs_get_register(regs, pt_regs_offset[idx]); } +#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \ + ~((1ULL << PERF_REG_X86_MAX) - 1)) + #ifdef CONFIG_X86_32 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ @@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; @@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; From dce86ac75d772047e9bc606154704aa73bfd4c83 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 May 2019 15:08:32 -0700 Subject: [PATCH 072/207] perf/x86: Clean up PEBS_XMM_REGS Use generic macro PERF_REG_EXTENDED_MASK to replace PEBS_XMM_REGS to avoid duplication. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/1559081314-9714-3-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 4 ++-- arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 18 ------------------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..7708a6fb5f4a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -561,13 +561,13 @@ int x86_pmu_hw_config(struct perf_event *event) } /* sample_regs_user never support XMM registers */ - if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) return -EINVAL; /* * Besides the general purpose registers, XMM registers may * be collected in PEBS on some platforms, e.g. Icelake */ - if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { if (x86_pmu.pebs_no_xmm_regs) return -EINVAL; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 6cb38ab02c8a..955b2c688f23 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) pebs_data_cfg |= PEBS_DATACFG_GP; if ((sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_XMM_REGS)) + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) pebs_data_cfg |= PEBS_DATACFG_XMMS; if (sample_type & PERF_SAMPLE_BRANCH_STACK) { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a6ac2f4f76fc..d3b6e90c80d3 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -121,24 +121,6 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) -#define PEBS_XMM_REGS \ - ((1ULL << PERF_REG_X86_XMM0) | \ - (1ULL << PERF_REG_X86_XMM1) | \ - (1ULL << PERF_REG_X86_XMM2) | \ - (1ULL << PERF_REG_X86_XMM3) | \ - (1ULL << PERF_REG_X86_XMM4) | \ - (1ULL << PERF_REG_X86_XMM5) | \ - (1ULL << PERF_REG_X86_XMM6) | \ - (1ULL << PERF_REG_X86_XMM7) | \ - (1ULL << PERF_REG_X86_XMM8) | \ - (1ULL << PERF_REG_X86_XMM9) | \ - (1ULL << PERF_REG_X86_XMM10) | \ - (1ULL << PERF_REG_X86_XMM11) | \ - (1ULL << PERF_REG_X86_XMM12) | \ - (1ULL << PERF_REG_X86_XMM13) | \ - (1ULL << PERF_REG_X86_XMM14) | \ - (1ULL << PERF_REG_X86_XMM15)) - /* * Per register state. */ From cd6b984f6d8cd615755b5404a51b7efe45215f28 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 May 2019 15:08:33 -0700 Subject: [PATCH 073/207] perf/x86: Remove pmu->pebs_no_xmm_regs We don't need pmu->pebs_no_xmm_regs anymore, the capabilities PERF_PMU_CAP_EXTENDED_REGS can be used to check if XMM registers collection is supported. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/1559081314-9714-4-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/events/intel/ds.c | 6 ++---- arch/x86/events/perf_event.h | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 7708a6fb5f4a..52a97463cb24 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -568,7 +568,7 @@ int x86_pmu_hw_config(struct perf_event *event) * be collected in PEBS on some platforms, e.g. Icelake */ if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { - if (x86_pmu.pebs_no_xmm_regs) + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) return -EINVAL; if (!event->attr.precise_ip) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 955b2c688f23..505c73dc6a73 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1964,10 +1964,9 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) { + if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - x86_pmu.pebs_no_xmm_regs = 1; - } + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; @@ -2023,7 +2022,6 @@ void __init intel_ds_init(void) x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ - x86_pmu.pebs_no_xmm_regs = 1; x86_pmu.large_pebs_flags &= ~(PERF_SAMPLE_ADDR | PERF_SAMPLE_TIME | diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index d3b6e90c80d3..4e346856ee19 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -650,8 +650,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1, - pebs_no_xmm_regs :1; + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; From 8b12b812f5367c2469fb937da7e28dd321ad8d7b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 May 2019 15:08:34 -0700 Subject: [PATCH 074/207] perf/x86/regs: Use PERF_REG_EXTENDED_MASK Use the macro defined in kernel ABI header to replace the local name. No functional change. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/1559081314-9714-5-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- tools/arch/x86/include/uapi/asm/perf_regs.h | 3 +++ tools/perf/arch/x86/include/perf_regs.h | 1 - tools/perf/arch/x86/util/perf_regs.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/tools/arch/x86/include/uapi/asm/perf_regs.h +++ b/tools/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index b7cd91a9014f..b7321337d100 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -9,7 +9,6 @@ void perf_regs_load(u64 *regs); #define PERF_REGS_MAX PERF_REG_X86_XMM_MAX -#define PERF_XMM_REGS_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) #ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 7886ca5263e3..3666c0076df9 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -277,7 +277,7 @@ uint64_t arch__intr_reg_mask(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .sample_type = PERF_SAMPLE_REGS_INTR, - .sample_regs_intr = PERF_XMM_REGS_MASK, + .sample_regs_intr = PERF_REG_EXTENDED_MASK, .precise_ip = 1, .disabled = 1, .exclude_kernel = 1, @@ -293,7 +293,7 @@ uint64_t arch__intr_reg_mask(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd != -1) { close(fd); - return (PERF_XMM_REGS_MASK | PERF_REGS_MASK); + return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK); } return PERF_REGS_MASK; From 0b24cae4d535045f4c9e177aa228d4e97bad212c Mon Sep 17 00:00:00 2001 From: Dmitry Korotin Date: Mon, 24 Jun 2019 19:05:27 +0000 Subject: [PATCH 075/207] MIPS: Add missing EHB in mtc0 -> mfc0 sequence. Add a missing EHB (Execution Hazard Barrier) in mtc0 -> mfc0 sequence. Without this execution hazard barrier it's possible for the value read back from the KScratch register to be the value from before the mtc0. Reproducible on P5600 & P6600. The hazard is documented in the MIPS Architecture Reference Manual Vol. III: MIPS32/microMIPS32 Privileged Resource Architecture (MD00088), rev 6.03 table 8.1 which includes: Producer | Consumer | Hazard ----------|----------|---------------------------- mtc0 | mfc0 | any coprocessor 0 register Signed-off-by: Dmitry Korotin [paul.burton@mips.com: - Commit message tweaks. - Add Fixes tags. - Mark for stable back to v3.15 where P5600 support was introduced.] Signed-off-by: Paul Burton Fixes: 3d8bfdd03072 ("MIPS: Use C0_KScratch (if present) to hold PGD pointer.") Fixes: 829dcc0a956a ("MIPS: Add MIPS P5600 probe support") Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v3.15+ --- arch/mips/mm/tlbex.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 65b6e85447b1..144ceb0fba88 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p) static void build_restore_work_registers(u32 **p) { if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } @@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { uasm_i_nop(p); } @@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); @@ -1603,15 +1609,17 @@ static void build_setup_pgd(void) uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); uasm_l_tlbl_goaround1(&l, p); UASM_i_SLL(&p, a0, a0, 11); - uasm_i_jr(&p, 31); UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } else { /* PGD in c0_KScratch */ - uasm_i_jr(&p, 31); if (cpu_has_ldpte) UASM_i_MTC0(&p, a0, C0_PWBASE); else UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } #else #ifdef CONFIG_SMP @@ -1625,13 +1633,16 @@ static void build_setup_pgd(void) UASM_i_LA_mostly(&p, a2, pgdc); UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); #endif /* SMP */ - uasm_i_jr(&p, 31); /* if pgd_reg is allocated, save PGD also to scratch register */ - if (pgd_reg != -1) + if (pgd_reg != -1) { UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); - else + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); uasm_i_nop(&p); + } #endif if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); From f2ff671f894151a611eae246a1f25b61d6c0354b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 24 Jun 2019 23:27:51 +0200 Subject: [PATCH 076/207] MAINTAINERS: Correct path to moved files The driver was moved in commit 1838a7b31fcb ("mtd: rawnand: Move drivers for Ingenic SoCs to subfolder"). Signed-off-by: Paul Cercueil Signed-off-by: Paul Burton Cc: Greg Kroah-Hartman Cc: linux-kernel@vger.kernel.org Cc: linux-mips@vger.kernel.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 57f496cff999..0636bdf31511 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7801,7 +7801,7 @@ INGENIC JZ4780 NAND DRIVER M: Harvey Hunt L: linux-mtd@lists.infradead.org S: Maintained -F: drivers/mtd/nand/raw/jz4780_* +F: drivers/mtd/nand/raw/ingenic/ INOTIFY M: Jan Kara From e13e7cd4c0c1cc9984d9b6a8663e10d76b53f2aa Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 22 Jun 2019 08:55:54 +1000 Subject: [PATCH 077/207] powerpc/64s/exception: Fix machine check early corrupting AMR The early machine check runs in real mode, so locking is unnecessary. Worse, the windup does not restore AMR, so this can result in a false KUAP fault after a recoverable machine check hits inside a user copy operation. Fix this similarly to HMI by just avoiding the kuap lock in the early machine check handler (it will be set by the late handler that runs in virtual mode if that runs). If the virtual mode handler is reached, it will lock and restore the AMR. Fixes: 890274c2dc4c0 ("powerpc/64s: Implement KUAP for Radix MMU") Cc: Russell Currey Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6b86055e5251..73ba246ca11d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ From 503d90b30602a3295978e46d844ccc8167400fe6 Mon Sep 17 00:00:00 2001 From: Richard Sailer Date: Wed, 19 Jun 2019 13:33:11 +0200 Subject: [PATCH 078/207] ALSA: hda/realtek: Add quirks for several Clevo notebook barebones This adds 4 SND_PCI_QUIRK(...) lines for several barebone models of the ODM Clevo. The model names are written in regex syntax to describe/match all clevo models that are similar enough and use the same PCI SSID that this fixup works for them. Additionally the lines regarding SSID 0x96e1 and 0x97e1 didn't fix audio for the all our Clevo notebooks using these SSIDs (models Clevo P960* and P970*) since ALC1220_FIXP_CLEVO_PB51ED_PINS swapped pins that are not necesarry to be swapped. This patch initiates ALC1220_FIXUP_CLEVO_P950 instead for these model and fixes the audio. Fixes: 80690a276f44 ("ALSA: hda/realtek - Add quirk for Tuxedo XC 1509") Signed-off-by: Richard Sailer Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 179e4be1f747..35f01f5102da 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2461,9 +2461,10 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), From 6dbc6e6f58556369bf999cd7d9793586f1b0e4b4 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 13 Jun 2019 12:10:23 +0800 Subject: [PATCH 079/207] pinctrl: mcp23s08: Fix add_data and irqchip_add_nested call order Currently probing of the mcp23s08 results in an error message "detected irqchip that is shared with multiple gpiochips: please fix the driver" This is due to the following: Call to mcp23s08_irqchip_setup() with call hierarchy: mcp23s08_irqchip_setup() gpiochip_irqchip_add_nested() gpiochip_irqchip_add_key() gpiochip_set_irq_hooks() Call to devm_gpiochip_add_data() with call hierarchy: devm_gpiochip_add_data() gpiochip_add_data_with_key() gpiochip_add_irqchip() gpiochip_set_irq_hooks() The gpiochip_add_irqchip() returns immediately if there isn't a irqchip but we added a irqchip due to the previous mcp23s08_irqchip_setup() call. So it calls gpiochip_set_irq_hooks() a second time. Fix this by moving the call to devm_gpiochip_add_data before the call to mcp23s08_irqchip_setup Fixes: 02e389e63e35 ("pinctrl: mcp23s08: fix irq setup order") Suggested-by: Marco Felsch Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 568ca96cdb6d..3a235487e38d 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -771,6 +771,10 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, if (ret < 0) goto fail; + ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); + if (ret < 0) + goto fail; + mcp->irq_controller = device_property_read_bool(dev, "interrupt-controller"); if (mcp->irq && mcp->irq_controller) { @@ -812,10 +816,6 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } - ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); - if (ret < 0) - goto fail; - if (one_regmap_config) { mcp->pinctrl_desc.name = devm_kasprintf(dev, GFP_KERNEL, "mcp23xxx-pinctrl.%d", raw_chip_address); From f2818ba3a0125670cb9999bb5a65ebb631a8da2f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 20 Jun 2019 20:30:36 +0200 Subject: [PATCH 080/207] pinctrl: ocelot: fix gpio direction for pins after 31 The third argument passed to REG is not the correct one and ocelot_gpio_set_direction is not working for pins after 31. Fix that by passing the pin number instead of the modulo 32 value. Fixes: da801ab56ad8 pinctrl: ocelot: add MSCC Jaguar2 support Signed-off-by: Alexandre Belloni Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 3b4ca52d2456..d2478db975bd 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -432,7 +432,7 @@ static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev, struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); unsigned int p = pin % 32; - regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, p), BIT(p), + regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, pin), BIT(p), input ? 0 : BIT(p)); return 0; From 4b36082e2e09c2769710756390d54cfca563ed96 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 20 Jun 2019 20:30:37 +0200 Subject: [PATCH 081/207] pinctrl: ocelot: fix pinmuxing for pins after 31 The actual layout for OCELOT_GPIO_ALT[01] when there are more than 32 pins is interleaved, i.e. OCELOT_GPIO_ALT0[0], OCELOT_GPIO_ALT1[0], OCELOT_GPIO_ALT0[1], OCELOT_GPIO_ALT1[1]. Introduce a new REG_ALT macro to facilitate the register offset calculation and use it where necessary. Fixes: da801ab56ad8 pinctrl: ocelot: add MSCC Jaguar2 support Signed-off-by: Alexandre Belloni Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ocelot.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index d2478db975bd..fb76fb2e9ea5 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -396,7 +396,7 @@ static int ocelot_pin_function_idx(struct ocelot_pinctrl *info, return -1; } -#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32))) +#define REG_ALT(msb, info, p) (OCELOT_GPIO_ALT0 * (info)->stride + 4 * ((msb) + ((info)->stride * ((p) / 32)))) static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int selector, unsigned int group) @@ -412,19 +412,21 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, /* * f is encoded on two bits. - * bit 0 of f goes in BIT(pin) of ALT0, bit 1 of f goes in BIT(pin) of - * ALT1 + * bit 0 of f goes in BIT(pin) of ALT[0], bit 1 of f goes in BIT(pin) of + * ALT[1] * This is racy because both registers can't be updated at the same time * but it doesn't matter much for now. */ - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, pin->pin), + regmap_update_bits(info->map, REG_ALT(0, info, pin->pin), BIT(p), f << p); - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, pin->pin), + regmap_update_bits(info->map, REG_ALT(1, info, pin->pin), BIT(p), f << (p - 1)); return 0; } +#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32))) + static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned int pin, bool input) @@ -445,9 +447,9 @@ static int ocelot_gpio_request_enable(struct pinctrl_dev *pctldev, struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); unsigned int p = offset % 32; - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, offset), + regmap_update_bits(info->map, REG_ALT(0, info, offset), BIT(p), 0); - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, offset), + regmap_update_bits(info->map, REG_ALT(1, info, offset), BIT(p), 0); return 0; From dec7e6494e1aea6bf676223da3429cd17ce0af79 Mon Sep 17 00:00:00 2001 From: Gen Zhang Date: Wed, 29 May 2019 09:33:20 +0800 Subject: [PATCH 082/207] dm init: fix incorrect uses of kstrndup() Fix 2 kstrndup() calls with incorrect argument order. Fixes: 6bbc923dfcf5 ("dm: add support to directly boot to a mapped device") Cc: stable@vger.kernel.org # v5.1 Signed-off-by: Gen Zhang Signed-off-by: Mike Snitzer --- drivers/md/dm-init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 352e803f566e..64611633e77c 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) return ERR_PTR(-EINVAL); } /* target_args */ - dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, - DM_MAX_STR_SIZE); + dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE, + GFP_KERNEL); if (!dev->target_args_array[n]) return ERR_PTR(-ENOMEM); @@ -275,7 +275,7 @@ static int __init dm_init_init(void) DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); return -EINVAL; } - str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL); if (!str) return -ENOMEM; From e6feaf215f07dd98d03ee783c9dd4c7f7e55b74d Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 22 Jun 2019 22:44:16 +0200 Subject: [PATCH 083/207] block, bfq: fix operator in BFQQ_TOTALLY_SEEKY By mistake, there is a '&' instead of a '==' in the definition of the macro BFQQ_TOTALLY_SEEKY. This commit replaces the wrong operator with the correct one. Fixes: 7074f076ff15 ("block, bfq: do not tag totally seeky queues as soft rt") Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f8d430f88d25..f9269ae6da9c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -240,7 +240,7 @@ static struct kmem_cache *bfq_pool; * containing only random (seeky) I/O are prevented from being tagged * as soft real-time. */ -#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1) +#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 From 10c9c8e7c09b4d32b31df1bd14673bd6dbfc50be Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 4 Jun 2019 18:27:29 -0700 Subject: [PATCH 084/207] dm init: remove trailing newline from calls to DMERR() and DMINFO() These printing macros already add a trailing newline, so having another one here just makes for blank lines when these prints are enabled. Remove these needless newlines. Fixes: 6bbc923dfcf5 ("dm: add support to directly boot to a mapped device") Signed-off-by: Stephen Boyd Signed-off-by: Mike Snitzer --- drivers/md/dm-init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 64611633e77c..728733a514c7 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -272,7 +272,7 @@ static int __init dm_init_init(void) return 0; if (strlen(create) >= DM_MAX_STR_SIZE) { - DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); + DMERR("Argument is too big. Limit is %d", DM_MAX_STR_SIZE); return -EINVAL; } str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL); @@ -283,7 +283,7 @@ static int __init dm_init_init(void) if (r) goto out; - DMINFO("waiting for all devices to be available before creating mapped devices\n"); + DMINFO("waiting for all devices to be available before creating mapped devices"); wait_for_device_probe(); list_for_each_entry(dev, &devices, list) { From 211ad4b733037f66f9be0a79eade3da7ab11cbb8 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Wed, 5 Jun 2019 21:27:08 +0800 Subject: [PATCH 085/207] dm log writes: make sure super sector log updates are written in order Currently, although we submit super bios in order (and super.nr_entries is incremented by each logged entry), submit_bio() is async so each super sector may not be written to log device in order and then the final nr_entries may be smaller than it should be. This problem can be reproduced by the xfstests generic/455 with ext4: QA output created by 455 -Silence is golden +mark 'end' does not exist Fix this by serializing submission of super sectors to make sure each is written to the log disk in order. Fixes: 0e9cebe724597 ("dm: add log writes target") Cc: stable@vger.kernel.org Signed-off-by: zhangyi (F) Suggested-by: Josef Bacik Reviewed-by: Josef Bacik Signed-off-by: Mike Snitzer --- drivers/md/dm-log-writes.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 9ea2b0291f20..e549392e0ea5 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -60,6 +60,7 @@ #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL +#define WRITE_LOG_SUPER_SECTOR 0 /* * The disk format for this is braindead simple. @@ -115,6 +116,7 @@ struct log_writes_c { struct list_head logging_blocks; wait_queue_head_t wait; struct task_struct *log_kthread; + struct completion super_done; }; struct pending_block { @@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio) bio_put(bio); } +static void log_end_super(struct bio *bio) +{ + struct log_writes_c *lc = bio->bi_private; + + complete(&lc->super_done); + log_end_io(bio); +} + /* * Meant to be called if there is an error, it will free all the pages * associated with the block. @@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); - bio->bi_end_io = log_end_io; + bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ? + log_end_super : log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc) super.nr_entries = cpu_to_le64(lc->logged_entries); super.sectorsize = cpu_to_le32(lc->sectorsize); - if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { + if (write_metadata(lc, &super, sizeof(super), NULL, 0, + WRITE_LOG_SUPER_SECTOR)) { DMERR("Couldn't write super"); return -1; } + /* + * Super sector should be writen in-order, otherwise the + * nr_entries could be rewritten incorrectly by an old bio. + */ + wait_for_completion_io(&lc->super_done); + return 0; } @@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); + init_completion(&lc->super_done); atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); From a0651926553cfe7992166432e418987760882652 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 12 Jun 2019 18:22:26 +0200 Subject: [PATCH 086/207] dm table: don't copy from a NULL pointer in realloc_argv() For the first call to realloc_argv() in dm_split_args(), old_argv is NULL and size is zero. Then memcpy is called, with the NULL old_argv as the source argument and a zero size argument. AFAIK, this is undefined behavior and generates the following warning when compiled with UBSAN on ppc64le: In file included from ./arch/powerpc/include/asm/paca.h:19, from ./arch/powerpc/include/asm/current.h:16, from ./include/linux/sched.h:12, from ./include/linux/kthread.h:6, from drivers/md/dm-core.h:12, from drivers/md/dm-table.c:8: In function 'memcpy', inlined from 'realloc_argv' at drivers/md/dm-table.c:565:3, inlined from 'dm_split_args' at drivers/md/dm-table.c:588:9: ./include/linux/string.h:345:9: error: argument 2 null where non-null expected [-Werror=nonnull] return __builtin_memcpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/md/dm-table.c: In function 'dm_split_args': ./include/linux/string.h:345:9: note: in a call to built-in function '__builtin_memcpy' Signed-off-by: Jerome Marchand Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 350cf0451456..ec8b27e20de3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -561,7 +561,7 @@ static char **realloc_argv(unsigned *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv) { + if (argv && old_argv) { memcpy(argv, old_argv, *size * sizeof(*argv)); *size = new_size; } From 2eba4e640b2c4161e31ae20090a53ee02a518657 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 20 Jun 2019 13:00:19 +0200 Subject: [PATCH 087/207] dm verity: use message limit for data block corruption message DM verity should also use DMERR_LIMIT to limit repeat data block corruption messages. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 720d06531aa3..ea24ff0612e3 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -235,8 +235,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, BUG(); } - DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, - block); + DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name, + type_str, block); if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) DMERR("%s: reached maximum errors", v->data_dev->name); From 38c73529de13e1e10914de7030b659a2f8b01c3b Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Mon, 24 Jun 2019 20:14:06 -0400 Subject: [PATCH 088/207] ipv4: Use return value of inet_iif() for __raw_v4_lookup in the while loop In commit 19e4e768064a8 ("ipv4: Fix raw socket lookup for local traffic"), the dif argument to __raw_v4_lookup() is coming from the returned value of inet_iif() but the change was done only for the first lookup. Subsequent lookups in the while loop still use skb->dev->ifIndex. Fixes: 19e4e768064a8 ("ipv4: Fix raw socket lookup for local traffic") Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0b8e06ca75d6..40a6abbc9cf6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -197,7 +197,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + dif, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); From b8e8a86337c25941cb06e9a1c8ee01ab9aab0cc2 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Tue, 25 Jun 2019 12:08:01 +0900 Subject: [PATCH 089/207] net/ipv6: Fix misuse of proc_dointvec "skip_notify_on_dev_down" /proc/sys/net/ipv6/route/skip_notify_on_dev_down assumes given value to be 0 or 1. Use proc_dointvec_minmax instead of proc_dointvec. Fixes: 7c6bb7d2faaf ("net/ipv6: Add knob to skip DELROUTE message ondevice down") Signed-off-by: Eiichi Tsukata Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11ad62effd56..aade636c6be6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5281,7 +5281,7 @@ static struct ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, From 74684cce5ebd567b01e9bc0e9a1945c70a32f32f Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 7 Jun 2019 10:12:46 -0500 Subject: [PATCH 090/207] clk: socfpga: stratix10: fix divider entry for the emac clocks The fixed dividers for the emac clocks should be 2 not 4. Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Signed-off-by: Stephen Boyd --- drivers/clk/socfpga/clk-s10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 8281dfbf38c2..5bed36e12951 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = { { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux), 0, 0, 0, 0x3C, 1}, { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux), - 0, 0, 4, 0xB0, 0}, + 0, 0, 2, 0xB0, 0}, { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), - 0, 0, 4, 0xB0, 1}, + 0, 0, 2, 0xB0, 1}, { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, From 81c7ed296dcd02bc0b4488246d040e03e633737a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 20 Jun 2019 14:23:45 +0300 Subject: [PATCH 091/207] x86/boot/64: Fix crash if kernel image crosses page table boundary A kernel which boots in 5-level paging mode crashes in a small percentage of cases if KASLR is enabled. This issue was tracked down to the case when the kernel image unpacks in a way that it crosses an 1G boundary. The crash is caused by an overrun of the PMD page table in __startup_64() and corruption of P4D page table allocated next to it. This particular issue is not visible with 4-level paging as P4D page tables are not used. But the P4D and the PUD calculation have similar problems. The PMD index calculation is wrong due to operator precedence, which fails to confine the PMDs in the PMD array on wrap around. The P4D calculation for 5-level paging and the PUD calculation calculate the first index correctly, but then blindly increment it which causes the same issue when a kernel image is located across a 512G and for 5-level paging across a 46T boundary. This wrap around mishandling was introduced when these parts moved from assembly to C. Restore it to the correct behaviour. Fixes: c88d71508e36 ("x86/boot/64: Rewrite startup_64() in C") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20190620112345.28833-1-kirill.shutemov@linux.intel.com --- arch/x86/kernel/head64.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 16b1cbd3a61e..7df5bce4e1be 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -190,18 +190,18 @@ unsigned long __head __startup_64(unsigned long physaddr, pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; - i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; - p4d[i + 0] = (pgdval_t)pud + pgtable_flags; - p4d[i + 1] = (pgdval_t)pud + pgtable_flags; + i = physaddr >> P4D_SHIFT; + p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; + p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; } else { i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)pud + pgtable_flags; pgd[i + 1] = (pgdval_t)pud + pgtable_flags; } - i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; - pud[i + 0] = (pudval_t)pmd + pgtable_flags; - pud[i + 1] = (pudval_t)pmd + pgtable_flags; + i = physaddr >> PUD_SHIFT; + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ @@ -211,8 +211,9 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry += physaddr; for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { - int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; - pmd[idx] = pmd_entry + i * PMD_SIZE; + int idx = i + (physaddr >> PMD_SHIFT); + + pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; } /* From c1887159eb48ba40e775584cfb2a443962cf1a05 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 20 Jun 2019 14:24:22 +0300 Subject: [PATCH 092/207] x86/boot/64: Add missing fixup_pointer() for next_early_pgt access __startup_64() uses fixup_pointer() to access global variables in a position-independent fashion. Access to next_early_pgt was wrapped into the helper, but one instance in the 5-level paging branch was missed. GCC generates a R_X86_64_PC32 PC-relative relocation for the access which doesn't trigger the issue, but Clang emmits a R_X86_64_32S which leads to an invalid memory access and system reboot. Fixes: 187e91fe5e91 ("x86/boot/64/clang: Use fixup_pointer() to access 'next_early_pgt'") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Alexander Potapenko Link: https://lkml.kernel.org/r/20190620112422.29264-1-kirill.shutemov@linux.intel.com --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7df5bce4e1be..29ffa495bd1c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -184,7 +184,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], + physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; From 432c833218dd0f75e7b56bd5e8658b72073158d2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 24 Jun 2019 15:31:50 +0300 Subject: [PATCH 093/207] x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init() Kyle has reported occasional crashes when booting a kernel in 5-level paging mode with KASLR enabled: WARNING: CPU: 0 PID: 0 at arch/x86/mm/init_64.c:87 phys_p4d_init+0x1d4/0x1ea RIP: 0010:phys_p4d_init+0x1d4/0x1ea Call Trace: __kernel_physical_mapping_init+0x10a/0x35c kernel_physical_mapping_init+0xe/0x10 init_memory_mapping+0x1aa/0x3b0 init_range_memory_mapping+0xc8/0x116 init_mem_mapping+0x225/0x2eb setup_arch+0x6ff/0xcf5 start_kernel+0x64/0x53b ? copy_bootdata+0x1f/0xce x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x8a/0x8d secondary_startup_64+0xb6/0xc0 which causes later: BUG: unable to handle page fault for address: ff484d019580eff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page BAD Oops: 0000 [#1] SMP NOPTI RIP: 0010:fill_pud+0x13/0x130 Call Trace: set_pte_vaddr_p4d+0x2e/0x50 set_pte_vaddr+0x6f/0xb0 __native_set_fixmap+0x28/0x40 native_set_fixmap+0x39/0x70 register_lapic_address+0x49/0xb6 early_acpi_boot_init+0xa5/0xde setup_arch+0x944/0xcf5 start_kernel+0x64/0x53b Kyle bisected the issue to commit b569c1843498 ("x86/mm/KASLR: Reduce randomization granularity for 5-level paging to 1GB") Before this commit PAGE_OFFSET was always aligned to P4D_SIZE when booting 5-level paging mode. But now only PUD_SIZE alignment is guaranteed. In the case I was able to reproduce the following vaddr/paddr values were observed in phys_p4d_init(): Iteration vaddr paddr 1 0xff4228027fe00000 0x033fe00000 2 0xff42287f40000000 0x8000000000 'vaddr' in both cases belongs to the same p4d entry. But due to the original assumption that PAGE_OFFSET is aligned to P4D_SIZE this overlap cannot be handled correctly. The code assumes strictly aligned entries and unconditionally increments the index into the P4D table, which creates false duplicate entries. Once the index reaches the end, the last entry in the page table is missing. Aside of that the 'paddr >= paddr_end' condition can evaluate wrong which causes an P4D entry to be cleared incorrectly. Change the loop in phys_p4d_init() to walk purely based on virtual addresses like __kernel_physical_mapping_init() does. This makes it work correctly with unaligned virtual addresses. Fixes: b569c1843498 ("x86/mm/KASLR: Reduce randomization granularity for 5-level paging to 1GB") Reported-by: Kyle Pelton Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Kyle Pelton Acked-by: Baoquan He Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20190624123150.920-1-kirill.shutemov@linux.intel.com --- arch/x86/mm/init_64.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 693aaf28d5fe..0f01c7b1d217 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,23 +671,25 @@ static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, bool init) { - unsigned long paddr_next, paddr_last = paddr_end; - unsigned long vaddr = (unsigned long)__va(paddr); - int i = p4d_index(vaddr); + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; + + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr); + vaddr_end = (unsigned long)__va(paddr_end); if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask, init); - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + p4d_t *p4d = p4d_page + p4d_index(vaddr); pud_t *pud; - vaddr = (unsigned long)__va(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; + paddr = __pa(vaddr); if (paddr >= paddr_end) { + paddr_next = __pa(vaddr_next); if (!after_bootmem && !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); - paddr_last = phys_pud_init(pud, paddr, paddr_end, - page_size_mask, init); + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), + page_size_mask, init); continue; } pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, paddr, paddr_end, + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), page_size_mask, init); spin_lock(&init_mm.page_table_lock); From 19e5e2ae9c883f5651eaaeab2f258e2c4b78fda3 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 26 Jun 2019 11:27:03 +0800 Subject: [PATCH 094/207] csky: Fixup libgcc unwind error The struct rt_sigframe is also defined in libgcc/config/csky/linux-unwind.h of gcc. Although there is no use for the first three word space, we must keep them the same with linux-unwind.h for member position. The BUG is found in glibc test with the tst-cancel02. The BUG is from commit:bf2416829362 of linux-5.2-rc1 merge window. Signed-off-by: Guo Ren Signed-off-by: Mao Han Cc: Arnd Bergmann --- arch/csky/kernel/signal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 04a43cfd4e09..d47a3381aad8 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc) #endif struct rt_sigframe { + /* + * pad[3] is compatible with the same struct defined in + * gcc/libgcc/config/csky/linux-unwind.h + */ + int pad[3]; struct siginfo info; struct ucontext uc; }; From 41de4be6f6efa4132b29af51158cd672d93f2543 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 5 Apr 2019 06:46:02 +0200 Subject: [PATCH 095/207] drm/virtio: move drm_connector_update_edid_property() call drm_connector_update_edid_property can sleep, we must not call it while holding a spinlock. Move the callsite. Fixes: b4b01b4995fb ("drm/virtio: add edid support") Reported-by: Max Filippov Signed-off-by: Gerd Hoffmann Tested-by: Max Filippov Tested-by: Cornelia Huck Acked-by: Cornelia Huck Link: http://patchwork.freedesktop.org/patch/msgid/20190405044602.2334-1-kraxel@redhat.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index e62fe24b1a2e..5bb0f0a084e9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -619,11 +619,11 @@ static void virtio_gpu_cmd_get_edid_cb(struct virtio_gpu_device *vgdev, output = vgdev->outputs + scanout; new_edid = drm_do_get_edid(&output->conn, virtio_get_edid_block, resp); + drm_connector_update_edid_property(&output->conn, new_edid); spin_lock(&vgdev->display_info_lock); old_edid = output->edid; output->edid = new_edid; - drm_connector_update_edid_property(&output->conn, output->edid); spin_unlock(&vgdev->display_info_lock); kfree(old_edid); From 6f496a555d93db7a11d4860b9220d904822f586a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 25 Jun 2019 19:08:54 +0200 Subject: [PATCH 096/207] arm64: kaslr: keep modules inside module region when KASAN is enabled When KASLR and KASAN are both enabled, we keep the modules where they are, and randomize the placement of the kernel so it is within 2 GB of the module region. The reason for this is that putting modules in the vmalloc region (like we normally do when KASLR is enabled) is not possible in this case, given that the entire vmalloc region is already backed by KASAN zero shadow pages, and so allocating dedicated KASAN shadow space as required by loaded modules is not possible. The default module allocation window is set to [_etext - 128MB, _etext] in kaslr.c, which is appropriate for KASLR kernels booted without a seed or with 'nokaslr' on the command line. However, as it turns out, it is not quite correct for the KASAN case, since it still intersects the vmalloc region at the top, where attempts to allocate shadow pages will collide with the KASAN zero shadow pages, causing a WARN() and all kinds of other trouble. So cap the top end to MODULES_END explicitly when running with KASAN. Cc: # 4.9+ Acked-by: Catalin Marinas Tested-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/module.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index dd080837e6a9..ed3706d6b3a0 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -32,6 +32,7 @@ void *module_alloc(unsigned long size) { + u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; gfp_t gfp_mask = GFP_KERNEL; void *p; @@ -39,9 +40,12 @@ void *module_alloc(unsigned long size) if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) gfp_mask |= __GFP_NOWARN; + if (IS_ENABLED(CONFIG_KASAN)) + /* don't exceed the static module region - see below */ + module_alloc_end = MODULES_END; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + MODULES_VSIZE, - gfp_mask, PAGE_KERNEL_EXEC, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && From aa69fb62bea15126e744af2e02acc0d6cf3ed4da Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Jun 2019 21:20:17 -0700 Subject: [PATCH 097/207] arm64/efi: Mark __efistub_stext_offset as an absolute symbol explicitly After r363059 and r363928 in LLVM, a build using ld.lld as the linker with CONFIG_RANDOMIZE_BASE enabled fails like so: ld.lld: error: relocation R_AARCH64_ABS32 cannot be used against symbol __efistub_stext_offset; recompile with -fPIC Fangrui and Peter figured out that ld.lld is incorrectly considering __efistub_stext_offset as a relative symbol because of the order in which symbols are evaluated. _text is treated as an absolute symbol and stext is a relative symbol, making __efistub_stext_offset a relative symbol. Adding ABSOLUTE will force ld.lld to evalute this expression in the right context and does not change ld.bfd's behavior. ld.lld will need to be fixed but the developers do not see a quick or simple fix without some research (see the linked issue for further explanation). Add this simple workaround so that ld.lld can continue to link kernels. Link: https://github.com/ClangBuiltLinux/linux/issues/561 Link: https://github.com/llvm/llvm-project/commit/025a815d75d2356f2944136269aa5874721ec236 Link: https://github.com/llvm/llvm-project/commit/249fde85832c33f8b06c6b4ac65d1c4b96d23b83 Acked-by: Ard Biesheuvel Debugged-by: Fangrui Song Debugged-by: Peter Smith Suggested-by: Fangrui Song Signed-off-by: Nathan Chancellor [will: add comment] Signed-off-by: Will Deacon --- arch/arm64/kernel/image.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 33f14e484040..b22e8ad071b1 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -78,7 +78,11 @@ #ifdef CONFIG_EFI -__efistub_stext_offset = stext - _text; +/* + * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol: + * https://github.com/ClangBuiltLinux/linux/issues/561 + */ +__efistub_stext_offset = ABSOLUTE(stext - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to From 21acee4ecf9c3d9eff545f50e79c321a0c35d9b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Jun 2019 13:10:41 +0300 Subject: [PATCH 098/207] HID: intel-ish-hid: Fix a use after free in load_fw_from_host() We have to print the filename first before we can kfree it. Fixes: 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader client driver") Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index 22ba21457035..aa2dbed30fc3 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -816,9 +816,9 @@ static int load_fw_from_host(struct ishtp_cl_data *client_data) goto end_err_fw_release; release_firmware(fw); - kfree(filename); dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n", filename); + kfree(filename); return 0; end_err_fw_release: From dcf768b0ac868630e7bdb6f2f1c9fe72788012fa Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Fri, 21 Jun 2019 11:17:36 +0200 Subject: [PATCH 099/207] HID: chicony: add another quirk for PixArt mouse I've spotted another Chicony PixArt mouse in the wild, which requires HID_QUIRK_ALWAYS_POLL quirk, otherwise it disconnects each minute. USB ID of this device is 0x04f2:0x0939. We've introduced quirks like this for other models before, so lets add this mouse too. Link: https://github.com/sriemer/fix-linux-mouse#usb-mouse-disconnectsreconnects-every-minute-on-linux Signed-off-by: Oleksandr Natalenko Acked-by: Sebastian Parschauer Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 22456586034a..27ac81109e3e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -272,6 +272,7 @@ #define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d #define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618 #define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053 +#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2 0x0939 #define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123 #define USB_DEVICE_ID_ASUS_AK1D 0x1125 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index fea7f7ff5ab1..4fbba6a7fb66 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -45,6 +45,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET }, From 315ffcc9a1e054bb460f9203058b52dc26b1173d Mon Sep 17 00:00:00 2001 From: Kyle Godbey Date: Sat, 15 Jun 2019 18:15:06 -0500 Subject: [PATCH 100/207] HID: uclogic: Add support for Huion HS64 tablet Add support for Huion HS64 drawing tablet to hid-uclogic Signed-off-by: Kyle Godbey Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-uclogic-core.c | 2 ++ drivers/hid/hid-uclogic-params.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 27ac81109e3e..1c40b436c431 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -573,6 +573,7 @@ #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e +#define USB_DEVICE_ID_HUION_HS64 0x006d #define USB_VENDOR_ID_IBM 0x04b3 #define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100 diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 8fe02d81265d..914fb527ae7a 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -369,6 +369,8 @@ static const struct hid_device_id uclogic_devices[] = { USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_HUION, + USB_DEVICE_ID_HUION_HS64) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 0187c9f8fc22..273d784fff66 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -977,6 +977,8 @@ int uclogic_params_init(struct uclogic_params *params, /* FALL THROUGH */ case VID_PID(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET): + case VID_PID(USB_VENDOR_ID_HUION, + USB_DEVICE_ID_HUION_HS64): case VID_PID(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET): case VID_PID(USB_VENDOR_ID_UCLOGIC, From 3a9a2c86ce6e06dda23fb1e7f7745acaf6d5d0eb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 14 Jun 2019 10:20:27 +0200 Subject: [PATCH 101/207] HID: logitech-dj: Fix forwarding of very long HID++ reports The HID++ spec also defines very long HID++ reports, with a reportid of 0x12. The MX5000 and MX5500 keyboards use 0x12 output reports for sending messages to display on their buildin LCD. Userspace (libmx5000) supports this, in order for this to work when talking to the HID devices instantiated for the keyboard by hid-logitech-dj, we need to properly forward these reports to the device. This commit fixes logi_dj_ll_raw_request not forwarding these reports. Fixes: f2113c3020ef ("HID: logitech-dj: add support for Logitech Bluetooth Mini-Receiver") Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index ce00dc299404..963f48b76bff 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -45,6 +45,7 @@ #define REPORT_ID_HIDPP_SHORT 0x10 #define REPORT_ID_HIDPP_LONG 0x11 +#define REPORT_ID_HIDPP_VERY_LONG 0x12 #define HIDPP_REPORT_SHORT_LENGTH 7 #define HIDPP_REPORT_LONG_LENGTH 20 @@ -1257,7 +1258,8 @@ static int logi_dj_ll_raw_request(struct hid_device *hid, int ret; if ((buf[0] == REPORT_ID_HIDPP_SHORT) || - (buf[0] == REPORT_ID_HIDPP_LONG)) { + (buf[0] == REPORT_ID_HIDPP_LONG) || + (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) { if (count < 2) return -EINVAL; From 0a95fc733da375de0688d0f1fd3a2869a1c1d499 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 14 Jun 2019 16:56:55 +0800 Subject: [PATCH 102/207] HID: multitouch: Add pointstick support for ALPS Touchpad There's a new ALPS touchpad/pointstick combo device that requires MT_CLS_WIN_8_DUAL to make its pointsitck work as a mouse. The device can be found on HP ZBook 17 G5. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1c40b436c431..134686012d75 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -83,6 +83,7 @@ #define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220 #define HID_DEVICE_ID_ALPS_U1 0x1215 #define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C +#define HID_DEVICE_ID_ALPS_1222 0x1222 #define USB_VENDOR_ID_AMI 0x046b diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 1565a307170a..42bb635895cf 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1780,6 +1780,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ALPS_JP, + HID_DEVICE_ID_ALPS_1222) }, /* Lenovo X1 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_DUAL, From b12bbdc5dd883f6575f57e529af26cd2c521b320 Mon Sep 17 00:00:00 2001 From: Hyungwoo Yang Date: Wed, 5 Jun 2019 21:52:27 -0700 Subject: [PATCH 103/207] HID: intel-ish-hid: fix wrong driver_data usage Currently, in suspend() and resume(), ishtp client drivers are using driver_data to get "struct ishtp_cl_device" object which is set by bus driver. It's wrong since the driver_data should not be owned bus. driver_data should be owned by the corresponding ishtp client driver. Due to this, some ishtp client driver like cros_ec_ishtp which uses its driver_data to transfer its data to its child doesn't work correctly. So this patch removes setting driver_data in bus drier and instead of using driver_data to get "struct ishtp_cl_device", since "struct device" is embedded in "struct ishtp_cl_device", we introduce a helper function that returns "struct ishtp_cl_device" from "struct device". Signed-off-by: Hyungwoo Yang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 4 ++-- drivers/hid/intel-ish-hid/ishtp/bus.c | 15 ++++++++++++++- include/linux/intel-ish-client-if.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 56777a43e69c..19102a3be4ca 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -899,7 +899,7 @@ static int hid_ishtp_cl_reset(struct ishtp_cl_device *cl_device) */ static int hid_ishtp_cl_suspend(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); @@ -920,7 +920,7 @@ static int hid_ishtp_cl_suspend(struct device *device) */ static int hid_ishtp_cl_resume(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index fb8ca12955b4..4b4a6047dc72 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -479,7 +479,6 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev, } ishtp_device_ready = true; - dev_set_drvdata(&device->dev, device); return device; } @@ -647,6 +646,20 @@ void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device) } EXPORT_SYMBOL(ishtp_get_drvdata); +/** + * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance + * @device: device instance + * + * Get ish_cl_device instance which embeds device instance in it. + * + * Return: pointer to ishtp_cl_device instance + */ +struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device) +{ + return to_ishtp_cl_device(device); +} +EXPORT_SYMBOL(ishtp_dev_to_cl_device); + /** * ishtp_bus_new_client() - Create a new client * @dev: ISHTP device instance diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 16255c2ca2f4..0d6b4bc191c5 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -103,6 +103,7 @@ void ishtp_put_device(struct ishtp_cl_device *cl_dev); void ishtp_get_device(struct ishtp_cl_device *cl_dev); void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data); void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device); +struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev); int ishtp_register_event_cb(struct ishtp_cl_device *device, void (*read_cb)(struct ishtp_cl_device *)); struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev, From 35594bc7cecf3a78504b590e350570e8f4d7779e Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 29 Apr 2019 11:55:14 +0800 Subject: [PATCH 104/207] pinctrl: mediatek: Ignore interrupts that are wake only during resume Before suspending, mtk-eint would set the interrupt mask to the one in wake_mask. However, some of these interrupts may not have a corresponding interrupt handler, or the interrupt may be disabled. On resume, the eint irq handler would trigger nevertheless, and irq/pm.c:irq_pm_check_wakeup would be called, which would try to call irq_disable. However, if the interrupt is not enabled (irqd_irq_disabled(&desc->irq_data) is true), the call does nothing, and the interrupt is left enabled in the eint driver. Especially for level-sensitive interrupts, this will lead to an interrupt storm on resume. If we detect that an interrupt is only in wake_mask, but not in cur_mask, we can just mask it out immediately (as mtk_eint_resume would do anyway at a later stage in the resume sequence, when restoring cur_mask). Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls") Signed-off-by: Nicolas Boichat Acked-by: Sean Wang Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index f464f8cd274b..737385e86beb 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -318,7 +318,7 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); struct mtk_eint *eint = irq_desc_get_handler_data(desc); unsigned int status, eint_num; - int offset, index, virq; + int offset, mask_offset, index, virq; void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat); int dual_edge, start_level, curr_level; @@ -328,10 +328,24 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) status = readl(reg); while (status) { offset = __ffs(status); + mask_offset = eint_num >> 5; index = eint_num + offset; virq = irq_find_mapping(eint->domain, index); status &= ~BIT(offset); + /* + * If we get an interrupt on pin that was only required + * for wake (but no real interrupt requested), mask the + * interrupt (as would mtk_eint_resume do anyway later + * in the resume sequence). + */ + if (eint->wake_mask[mask_offset] & BIT(offset) && + !(eint->cur_mask[mask_offset] & BIT(offset))) { + writel_relaxed(BIT(offset), reg - + eint->regs->stat + + eint->regs->mask_set); + } + dual_edge = eint->dual_edge[index]; if (dual_edge) { /* From c1f7fec1eb6a2c86d01bc22afce772c743451d88 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Mon, 10 Jun 2019 13:20:10 -0400 Subject: [PATCH 105/207] x86/speculation: Allow guests to use SSBD even if host does not The bits set in x86_spec_ctrl_mask are used to calculate the guest's value of SPEC_CTRL that is written to the MSR before VMENTRY, and control which mitigations the guest can enable. In the case of SSBD, unless the host has enabled SSBD always on mode (by passing "spec_store_bypass_disable=on" in the kernel parameters), the SSBD bit is not set in the mask and the guest can not properly enable the SSBD always on mitigation mode. This has been confirmed by running the SSBD PoC on a guest using the SSBD always on mitigation mode (booted with kernel parameter "spec_store_bypass_disable=on"), and verifying that the guest is vulnerable unless the host is also using SSBD always on mode. In addition, the guest OS incorrectly reports the SSB vulnerability as mitigated. Always set the SSBD bit in x86_spec_ctrl_mask when the host CPU supports it, allowing the guest to use SSBD whether or not the host has chosen to enable the mitigation in any of its modes. Fixes: be6fcb5478e9 ("x86/bugs: Rework spec_ctrl base and mask logic") Signed-off-by: Alejandro Jimenez Signed-off-by: Thomas Gleixner Reviewed-by: Liam Merwick Reviewed-by: Mark Kanda Reviewed-by: Paolo Bonzini Cc: bp@alien8.de Cc: rkrcmar@redhat.com Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1560187210-11054-1-git-send-email-alejandro.j.jimenez@oracle.com --- arch/x86/kernel/cpu/bugs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } + /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } From 1bf72720281770162c87990697eae1ba2f1d917a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 May 2019 09:09:35 +0200 Subject: [PATCH 106/207] cpu/speculation: Warn on unsupported mitigations= parameter Currently, if the user specifies an unsupported mitigation strategy on the kernel command line, it will be ignored silently. The code will fall back to the default strategy, possibly leaving the system more vulnerable than expected. This may happen due to e.g. a simple typo, or, for a stable kernel release, because not all mitigation strategies have been backported. Inform the user by printing a message. Fixes: 98af8452945c5565 ("cpu/speculation: Add 'mitigations=' cmdline option") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Greg Kroah-Hartman Cc: Ben Hutchings Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190516070935.22546-1-geert@linux-m68k.org --- kernel/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 077fde6fb953..551db494f153 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2339,6 +2339,9 @@ static int __init mitigations_parse_cmdline(char *arg) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + else + pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", + arg); return 0; } From 30d8177e8ac776d89d387fad547af6a0f599210e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Jun 2019 16:08:44 +0800 Subject: [PATCH 107/207] bonding: Always enable vlan tx offload We build vlan on top of bonding interface, which vlan offload is off, bond mode is 802.3ad (LACP) and xmit_hash_policy is BOND_XMIT_POLICY_ENCAP34. Because vlan tx offload is off, vlan tci is cleared and skb push the vlan header in validate_xmit_vlan() while sending from vlan devices. Then in bond_xmit_hash, __skb_flow_dissect() fails to get information from protocol headers encapsulated within vlan, because 'nhoff' is points to IP header, so bond hashing is based on layer 2 info, which fails to distribute packets across slaves. This patch always enable bonding's vlan tx offload, pass the vlan packets to the slave devices with vlan tci, let them to handle vlan implementation. Fixes: 278339a42a1b ("bonding: propogate vlan_features to bonding master") Suggested-by: Jiri Pirko Signed-off-by: YueHaibing Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 407f4095a37a..799fc38c5c34 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4320,12 +4320,12 @@ void bond_setup(struct net_device *bond_dev) bond_dev->features |= NETIF_F_NETNS_LOCAL; bond_dev->hw_features = BOND_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; bond_dev->features |= bond_dev->hw_features; + bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } /* Destroy a bonding device. From 4480879251fb89b1c5585112b1ccc8c3333b41af Mon Sep 17 00:00:00 2001 From: Huaping Zhou Date: Wed, 26 Jun 2019 17:47:49 +0200 Subject: [PATCH 108/207] net/smc: hold conns_lock before calling smc_lgr_register_conn() After smc_lgr_create(), the newly created link group is added to smc_lgr_list, thus is accessible from other context. Although link group creation is serialized by smc_create_lgr_pending, the new link group may still be accessed concurrently. For example, if ib_device is no longer active, smc_ib_port_event_work() will call smc_port_terminate(), which in turn will call __smc_lgr_terminate() on every link group of this device. So conns_lock is required here. Signed-off-by: Huaping Zhou Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2d2850adc2a3..4ca50ddf8d16 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -652,7 +652,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) rc = smc_lgr_create(smc, ini); if (rc) goto out; + lgr = conn->lgr; + write_lock_bh(&lgr->conns_lock); smc_lgr_register_conn(conn); /* add smc conn to lgr */ + write_unlock_bh(&lgr->conns_lock); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; From 8c33bf1b0a9663d1742cb19ee71da46a1d8670dd Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 26 Jun 2019 17:47:50 +0200 Subject: [PATCH 109/207] net/smc: Fix error path in smc_init If register_pernet_subsys success in smc_init, we should cleanup it in case any other error. Fixes: 64e28b52c7a6 (net/smc: add pnet table namespace support") Signed-off-by: YueHaibing Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0c874e996f85..7621ec2f539c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2029,7 +2029,7 @@ static int __init smc_init(void) rc = smc_pnet_init(); if (rc) - return rc; + goto out_pernet_subsys; rc = smc_llc_init(); if (rc) { @@ -2080,6 +2080,9 @@ static int __init smc_init(void) proto_unregister(&smc_proto); out_pnet: smc_pnet_exit(); +out_pernet_subsys: + unregister_pernet_subsys(&smc_net_ops); + return rc; } From ee4297420d56a0033a8593e80b33fcc93fda8509 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 27 Jun 2019 00:03:39 +0800 Subject: [PATCH 110/207] team: Always enable vlan tx offload We should rather have vlan_tci filled all the way down to the transmitting netdevice and let it do the hw/sw vlan implementation. Suggested-by: Jiri Pirko Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b48006e7fa2f..36916bf51ee6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2128,12 +2128,12 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->hw_features = TEAM_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; dev->features |= dev->hw_features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } static int team_newlink(struct net *src_net, struct net_device *dev, From ff8391e1b7d2f04aa3e520b3e839fd202de7d9b3 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 25 Jun 2019 15:56:36 -0700 Subject: [PATCH 111/207] RISC-V: defconfig: enable MMC & SPI for RISC-V Currently, riscv upstream defconfig doesn't let you boot through userspace if rootfs is on the SD card. Let's enable MMC & SPI drivers as well so that one can boot to the user space using default config in upstream kernel. While here, enable automatic mounting of devtmpfs to simplify kernel testing with minimal root filesystems. (pjw) Signed-off-by: Atish Patra Reviewed-by: Palmer Dabbelt [paul.walmsley@sifive.com: mention the DEVTMPFS_MOUNT change in the patch description] Signed-off-by: Paul Walmsley --- arch/riscv/configs/defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4f02967e55de..04944fb4fa7a 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -69,6 +69,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_CLK_SIFIVE=y CONFIG_CLK_SIFIVE_FU540_PRCI=y CONFIG_SIFIVE_PLIC=y +CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -84,4 +85,8 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_SPI=y +CONFIG_MMC_SPI=y +CONFIG_MMC=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_RCU_TRACE is not set From 45b03df2864aa4c67f6a648f0a7951116e1ef069 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 25 Jun 2019 15:01:31 +0530 Subject: [PATCH 112/207] riscv: dts: Re-organize the DT nodes As per the convention for any SOC device with external connection, define only device DT node in SOC DTSi file with status = "disabled" and enable device in Board DTS file with status = "okay" Reported-by: Anup Patel Signed-off-by: Yash Shah Signed-off-by: Paul Walmsley --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 6 ++++++ arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 3c06ee4b2b29..40983491b95f 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -163,6 +163,7 @@ uart0: serial@10010000 { interrupt-parent = <&plic0>; interrupts = <4>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; uart1: serial@10011000 { compatible = "sifive,fu540-c000-uart", "sifive,uart0"; @@ -170,6 +171,7 @@ uart1: serial@10011000 { interrupt-parent = <&plic0>; interrupts = <5>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; i2c0: i2c@10030000 { compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; @@ -181,6 +183,7 @@ i2c0: i2c@10030000 { reg-io-width = <1>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi0: spi@10040000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -191,6 +194,7 @@ qspi0: spi@10040000 { clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi1: spi@10041000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -201,6 +205,7 @@ qspi1: spi@10041000 { clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi2: spi@10050000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -210,6 +215,7 @@ qspi2: spi@10050000 { clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4da88707e28f..0b55c53c08c7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -42,7 +42,20 @@ rtcclk: rtcclk { }; }; +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + &qspi0 { + status = "okay"; flash@0 { compatible = "issi,is25wp256", "jedec,spi-nor"; reg = <0>; From 3cdb0157884344c7b40998f31ce43c4ded593cdd Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 26 Jun 2019 08:19:29 -0700 Subject: [PATCH 113/207] dt-bindings: riscv: resolve 'make dt_binding_check' warnings Rob pointed out that one of the examples in the RISC-V 'cpus' YAML schema results in warnings from 'make dt_binding_check'. Fix these. While here, make the whitespace in the second example consistent with the first example. Signed-off-by: Paul Walmsley Cc: Rob Herring Reviewed-by: Rob Herring # for fixing the dtc warnings --- .../devicetree/bindings/riscv/cpus.yaml | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 27f02ec4bb45..f97a4ecd7b91 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -152,17 +152,19 @@ examples: - | // Example 2: Spike ISA Simulator with 1 Hart cpus { - cpu@0 { - device_type = "cpu"; - reg = <0>; - compatible = "riscv"; - riscv,isa = "rv64imafdc"; - mmu-type = "riscv,sv48"; - interrupt-controller { - #interrupt-cells = <1>; - interrupt-controller; - compatible = "riscv,cpu-intc"; - }; - }; + #address-cells = <1>; + #size-cells = <0>; + cpu@0 { + device_type = "cpu"; + reg = <0>; + compatible = "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "riscv,sv48"; + interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; }; ... From 5b18f1289808fee5d04a7e6ecf200189f41a4db6 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Wed, 26 Jun 2019 02:21:16 -0400 Subject: [PATCH 114/207] ipv4: reset rt_iif for recirculated mcast/bcast out pkts Multicast or broadcast egress packets have rt_iif set to the oif. These packets might be recirculated back as input and lookup to the raw sockets may fail because they are bound to the incoming interface (skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function returns rt_iif instead of skb_iif. Hence, the lookup fails. v2: Make it non vrf specific (David Ahern). Reword the changelog to reflect it. Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/route.h | 1 + net/ipv4/ip_output.c | 12 ++++++++++++ net/ipv4/route.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/include/net/route.h b/include/net/route.h index 065b47754f05..55ff71ffb796 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool nopolicy, bool noxfrm, bool will_cache); +struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 16f9159234a2..8c2ec35b6512 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk static int ip_mc_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct rtable *new_rt; int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); @@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, return ret; } + /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting + * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten, + * see ipv4_pktinfo_prepare(). + */ + new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb)); + if (new_rt) { + new_rt->rt_iif = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &new_rt->dst); + } + return dev_loopback_xmit(net, sk, skb); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6cb7cff22db9..8ea0735a6754 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev, } EXPORT_SYMBOL(rt_dst_alloc); +struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) +{ + struct rtable *new_rt; + + new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt->dst.flags); + + if (new_rt) { + new_rt->rt_genid = rt_genid_ipv4(dev_net(dev)); + new_rt->rt_flags = rt->rt_flags; + new_rt->rt_type = rt->rt_type; + new_rt->rt_is_input = rt->rt_is_input; + new_rt->rt_iif = rt->rt_iif; + new_rt->rt_pmtu = rt->rt_pmtu; + new_rt->rt_mtu_locked = rt->rt_mtu_locked; + new_rt->rt_gw_family = rt->rt_gw_family; + if (rt->rt_gw_family == AF_INET) + new_rt->rt_gw4 = rt->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + new_rt->rt_gw6 = rt->rt_gw6; + INIT_LIST_HEAD(&new_rt->rt_uncached); + + new_rt->dst.flags |= DST_HOST; + new_rt->dst.input = rt->dst.input; + new_rt->dst.output = rt->dst.output; + new_rt->dst.error = rt->dst.error; + new_rt->dst.lastuse = jiffies; + new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); + } + return new_rt; +} +EXPORT_SYMBOL(rt_dst_clone); + /* called in rcu_read_lock() section */ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, From 48dd73d08d4dda47ee31cc8611fb16840fc16803 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Sat, 22 Jun 2019 08:46:37 +0000 Subject: [PATCH 115/207] net: aquantia: fix vlans not working over bridged network In configuration of vlan over bridge over aquantia device it was found that vlan tagged traffic is dropped on chip. The reason is that bridge device enables promisc mode, but in atlantic chip vlan filters will still apply. So we have to corellate promisc settings with vlan configuration. The solution is to track in a separate state variable the need of vlan forced promisc. And also consider generic promisc configuration when doing vlan filter config. Fixes: 7975d2aff5af ("net: aquantia: add support of rx-vlan-filter offload") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/aq_filters.c | 10 ++++++++-- .../net/ethernet/aquantia/atlantic/aq_nic.c | 1 + .../net/ethernet/aquantia/atlantic/aq_nic.h | 1 + .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 19 +++++++++++++------ 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 18bc035da850..1fff462a4175 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) return err; if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (hweight < AQ_VLAN_MAX_FILTERS) - err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true); + if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) { + err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, + !(aq_nic->packet_filter & IFF_PROMISC)); + aq_nic->aq_nic_cfg.is_vlan_force_promisc = false; + } else { /* otherwise left in promiscue mode */ + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; + } } return err; @@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic) if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl)) return -EOPNOTSUPP; + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); if (err) return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0da5e161ec5d..41172fbebddd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -126,6 +126,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; cfg->features = cfg->aq_hw_caps->hw_features; + cfg->is_vlan_force_promisc = true; } static int aq_nic_update_link_status(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index eb2e3c7c36f9..0f22f5d5691b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -35,6 +35,7 @@ struct aq_nic_cfg_s { u32 flow_control; u32 link_speed_msk; u32 wol; + bool is_vlan_force_promisc; u16 is_mc_list_enabled; u16 mc_list_count; bool is_autoneg; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 1c7593d54035..13ac2661a473 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -778,8 +778,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, unsigned int packet_filter) { unsigned int i = 0U; + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + + hw_atl_rpfl2promiscuous_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC)); + + hw_atl_rpf_vlan_prom_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC) || + cfg->is_vlan_force_promisc); - hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI), 0); @@ -788,13 +795,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); - self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); + cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) hw_atl_rpfl2_uc_flr_en_set(self, - (self->aq_nic_cfg->is_mc_list_enabled && - (i <= self->aq_nic_cfg->mc_list_count)) ? - 1U : 0U, i); + (cfg->is_mc_list_enabled && + (i <= cfg->mc_list_count)) ? + 1U : 0U, i); return aq_hw_err_from_flags(self); } @@ -1086,7 +1093,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self, static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable) { /* set promisc in case of disabing the vland filter */ - hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable); + hw_atl_rpf_vlan_prom_mode_en_set(self, !enable); return aq_hw_err_from_flags(self); } From 22e72b5e049b95789b34a4cef316c791e7c2fed5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Jun 2019 17:12:57 +0200 Subject: [PATCH 116/207] net: dsa: microchip: Use gpiod_set_value_cansleep() Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly fine for the kernel to sleep for a bit in ksz_switch_register(). Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Linus Walleij Cc: Tristram Ha Cc: Woojung Huh Reviewed-by: Andrew Lunn Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f46086fa9064..db91b213eae1 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev, return PTR_ERR(dev->reset_gpio); if (dev->reset_gpio) { - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); mdelay(10); - gpiod_set_value(dev->reset_gpio, 0); + gpiod_set_value_cansleep(dev->reset_gpio, 0); } mutex_init(&dev->dev_mutex); @@ -487,7 +487,7 @@ void ksz_switch_remove(struct ksz_device *dev) dsa_unregister_switch(dev->ds); if (dev->reset_gpio) - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); } EXPORT_SYMBOL(ksz_switch_remove); From 9b1c1ef13b35fa35051b635ca9fbda39fe6bbc70 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 24 Jun 2019 16:01:08 +0200 Subject: [PATCH 117/207] ipv6: constify rt6_nexthop() There is no functional change in this patch, it only prepares the next one. rt6_nexthop() will be used by ip6_dst_lookup_neigh(), which uses const variables. Signed-off-by: Nicolas Dichtel Reported-by: kbuild test robot Acked-by: Nick Desaulniers Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 +- include/net/ip6_route.h | 4 ++-- net/bluetooth/6lowpan.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/netfilter/nf_flow_table_ip.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 11b9525dff27..311b0cc6eb98 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -350,8 +350,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + const struct in6_addr *nexthop; struct neighbour *neigh; - struct in6_addr *nexthop; int ret; nf_reset(skb); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4790beaa86e0..ee7405e759ba 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -262,8 +262,8 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, - struct in6_addr *daddr) +static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, + const struct in6_addr *daddr) { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 19d27bee285e..1555b0c6f7ec 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -160,10 +160,10 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { - struct lowpan_peer *peer; - struct in6_addr *nexthop; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); int count = atomic_read(&dev->peer_count); + const struct in6_addr *nexthop; + struct lowpan_peer *peer; BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 834475717110..21efcd02f337 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -59,8 +59,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + const struct in6_addr *nexthop; struct neighbour *neigh; - struct in6_addr *nexthop; int ret; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 241317473114..cdfc33517e85 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -439,9 +439,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, struct nf_flowtable *flow_table = priv; struct flow_offload_tuple tuple = {}; enum flow_offload_tuple_dir dir; + const struct in6_addr *nexthop; struct flow_offload *flow; struct net_device *outdev; - struct in6_addr *nexthop; struct ipv6hdr *ip6h; struct rt6_info *rt; From 2c6b55f45d53420d8310d41310e0e2cd41fe073f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 24 Jun 2019 16:01:09 +0200 Subject: [PATCH 118/207] ipv6: fix neighbour resolution with raw socket The scenario is the following: the user uses a raw socket to send an ipv6 packet, destinated to a not-connected network, and specify a connected nh. Here is the corresponding python script to reproduce this scenario: import socket IPPROTO_RAW = 255 send_s = socket.socket(socket.AF_INET6, socket.SOCK_RAW, IPPROTO_RAW) # scapy # p = IPv6(src='fd00:100::1', dst='fd00:200::fa')/ICMPv6EchoRequest() # str(p) req = b'`\x00\x00\x00\x00\x08:@\xfd\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xfd\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfa\x80\x00\x81\xc0\x00\x00\x00\x00' send_s.sendto(req, ('fd00:175::2', 0, 0, 0)) fd00:175::/64 is a connected route and fd00:200::fa is not a connected host. With this scenario, the kernel starts by sending a NS to resolve fd00:175::2. When it receives the NA, it flushes its queue and try to send the initial packet. But instead of sending it, it sends another NS to resolve fd00:200::fa, which obvioulsy fails, thus the packet is dropped. If the user sends again the packet, it now uses the right nh (fd00:175::2). The problem is that ip6_dst_lookup_neigh() uses the rt6i_gateway, which is :: because the associated route is a connected route, thus it uses the dst addr of the packet. Let's use rt6_nexthop() to choose the right nh. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aade636c6be6..97a843cf164c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -218,7 +218,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, { const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); - return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); + return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), + dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) From 471a739a47aa7d582f0cdf9d392957d04632bae2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Jun 2019 00:20:23 +0200 Subject: [PATCH 119/207] PCI: PM: Avoid skipping bus-level PM on platforms without ACPI There are platforms that do not call pm_set_suspend_via_firmware(), so pm_suspend_via_firmware() returns 'false' on them, but the power states of PCI devices (PCIe ports in particular) are changed as a result of powering down core platform components during system-wide suspend. Thus the pm_suspend_via_firmware() checks in pci_pm_suspend_noirq() and pci_pm_resume_noirq() introduced by commit 3e26c5feed2a ("PCI: PM: Skip devices in D0 for suspend-to- idle") are not sufficient to determine that devices left in D0 during suspend will remain in D0 during resume and so the bus-level power management can be skipped for them. For this reason, introduce a new global suspend flag, PM_SUSPEND_FLAG_NO_PLATFORM, set it for suspend-to-idle only and replace the pm_suspend_via_firmware() checks mentioned above with checks against this flag. Fixes: 3e26c5feed2a ("PCI: PM: Skip devices in D0 for suspend-to-idle") Reported-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg Reviewed-by: Mika Westerberg --- drivers/pci/pci-driver.c | 8 ++++---- include/linux/suspend.h | 26 ++++++++++++++++++++++++-- kernel/power/suspend.c | 3 +++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 98af9ecd4a90..ca3793002e2f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -859,7 +859,7 @@ static int pci_pm_suspend_noirq(struct device *dev) pci_dev->bus->self->skip_bus_pm = true; } - if (pci_dev->skip_bus_pm && !pm_suspend_via_firmware()) { + if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) { dev_dbg(dev, "PCI PM: Skipped\n"); goto Fixup; } @@ -914,10 +914,10 @@ static int pci_pm_resume_noirq(struct device *dev) /* * In the suspend-to-idle case, devices left in D0 during suspend will * stay in D0, so it is not necessary to restore or update their - * configuration here and attempting to put them into D0 again may - * confuse some firmware, so avoid doing that. + * configuration here and attempting to put them into D0 again is + * pointless, so avoid doing that. */ - if (!pci_dev->skip_bus_pm || pm_suspend_via_firmware()) + if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform())) pci_pm_default_resume_early(pci_dev); pci_fixup_device(pci_fixup_resume_early, pci_dev); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8594001e8be8..f0d262ad7b78 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -209,8 +209,9 @@ extern int suspend_valid_only_mem(suspend_state_t state); extern unsigned int pm_suspend_global_flags; -#define PM_SUSPEND_FLAG_FW_SUSPEND (1 << 0) -#define PM_SUSPEND_FLAG_FW_RESUME (1 << 1) +#define PM_SUSPEND_FLAG_FW_SUSPEND BIT(0) +#define PM_SUSPEND_FLAG_FW_RESUME BIT(1) +#define PM_SUSPEND_FLAG_NO_PLATFORM BIT(2) static inline void pm_suspend_clear_flags(void) { @@ -227,6 +228,11 @@ static inline void pm_set_resume_via_firmware(void) pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME; } +static inline void pm_set_suspend_no_platform(void) +{ + pm_suspend_global_flags |= PM_SUSPEND_FLAG_NO_PLATFORM; +} + /** * pm_suspend_via_firmware - Check if platform firmware will suspend the system. * @@ -268,6 +274,22 @@ static inline bool pm_resume_via_firmware(void) return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME); } +/** + * pm_suspend_no_platform - Check if platform may change device power states. + * + * To be called during system-wide power management transitions to sleep states + * or during the subsequent system-wide transitions back to the working state. + * + * Return 'true' if the power states of devices remain under full control of the + * kernel throughout the system-wide suspend and resume cycle in progress (that + * is, if a device is put into a certain power state during suspend, it can be + * expected to remain in that state during resume). + */ +static inline bool pm_suspend_no_platform(void) +{ + return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_NO_PLATFORM); +} + /* Suspend-to-idle state machnine. */ enum s2idle_states { S2IDLE_STATE_NONE, /* Not suspended/suspending. */ diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..096211299c07 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state) pm_suspend_target_state = state; + if (state == PM_SUSPEND_TO_IDLE) + pm_set_suspend_no_platform(); + error = platform_suspend_begin(state); if (error) goto Close; From e3f9dada0abe1864bf467e84d201144eddb9ef88 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 21 Jun 2019 13:45:42 -0700 Subject: [PATCH 120/207] dt-bindings: clock: sifive: add MIT license as an option for the header file At Bin Meng's request, add the MIT license as an option for the SiFive FU540 PRCI header file. Signed-off-by: Paul Walmsley Cc: Bin Meng --- include/dt-bindings/clock/sifive-fu540-prci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h index 6a0b70a37d78..3b21d0522c91 100644 --- a/include/dt-bindings/clock/sifive-fu540-prci.h +++ b/include/dt-bindings/clock/sifive-fu540-prci.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Copyright (C) 2018-2019 SiFive, Inc. * Wesley Terpstra From 0db7f5cd4aeba4cc63d0068598b3350eba8bb4cd Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 18 Jun 2019 17:39:15 +0800 Subject: [PATCH 121/207] riscv: mm: Fix code comment Fix the comment since vmalloc_fault doesn't reach flush_tlb_fix_spurious_fault. Signed-off-by: ShihPo Hung Cc: Palmer Dabbelt Cc: Albert Ou Cc: Paul Walmsley Cc: linux-riscv@lists.infradead.org Reviewed-by: Palmer Dabbelt Signed-off-by: Paul Walmsley --- arch/riscv/mm/fault.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3e2708c626a8..f960c3f4ce47 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -272,9 +272,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * entries, but in RISC-V, SFENCE.VMA specifies an * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. - * Relying on flush_tlb_fix_spurious_fault would - * suffice, but the extra traps reduce - * performance. So, eagerly SFENCE.VMA. */ local_flush_tlb_page(addr); From 25bff6d5478b2a02368097015b7d8eb727c87e16 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 25 Jun 2019 00:21:45 +0800 Subject: [PATCH 122/207] sctp: change to hold sk after auth shkey is created successfully Now in sctp_endpoint_init(), it holds the sk then creates auth shkey. But when the creation fails, it doesn't release the sk, which causes a sk defcnf leak, Here to fix it by only holding the sk when auth shkey is created successfully. Fixes: a29a5bd4f5c3 ("[SCTP]: Implement SCTP-AUTH initializations.") Reported-by: syzbot+afabda3890cc2f765041@syzkaller.appspotmail.com Reported-by: syzbot+276ca1c77a19977c0130@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e358437ba29b..69cebb2c998b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -118,10 +118,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - /* Remember who we are attached to. */ - ep->base.sk = sk; - sock_hold(ep->base.sk); - /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); @@ -154,6 +150,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + /* Remember who we are attached to. */ + ep->base.sk = sk; + sock_hold(ep->base.sk); + return ep; nomem_shkey: From 89ed5b519004a7706f50b70f611edbd3aaacff2c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 25 Jun 2019 17:57:49 -0400 Subject: [PATCH 123/207] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET When an application is run that: a) Sets its scheduler to be SCHED_FIFO and b) Opens a memory mapped AF_PACKET socket, and sends frames with the MSG_DONTWAIT flag cleared, its possible for the application to hang forever in the kernel. This occurs because when waiting, the code in tpacket_snd calls schedule, which under normal circumstances allows other tasks to run, including ksoftirqd, which in some cases is responsible for freeing the transmitted skb (which in AF_PACKET calls a destructor that flips the status bit of the transmitted frame back to available, allowing the transmitting task to complete). However, when the calling application is SCHED_FIFO, its priority is such that the schedule call immediately places the task back on the cpu, preventing ksoftirqd from freeing the skb, which in turn prevents the transmitting task from detecting that the transmission is complete. We can fix this by converting the schedule call to a completion mechanism. By using a completion queue, we force the calling task, when it detects there are no more frames to send, to schedule itself off the cpu until such time as the last transmitted skb is freed, allowing forward progress to be made. Tested by myself and the reporter, with good results Change Notes: V1->V2: Enhance the sleep logic to support being interruptible and allowing for honoring to SK_SNDTIMEO (Willem de Bruijn) V2->V3: Rearrage the point at which we wait for the completion queue, to avoid needing to check for ph/skb being null at the end of the loop. Also move the complete call to the skb destructor to avoid needing to modify __packet_set_status. Also gate calling complete on packet_read_pending returning zero to avoid multiple calls to complete. (Willem de Bruijn) Move timeo computation within loop, to re-fetch the socket timeout since we also use the timeo variable to record the return code from the wait_for_complete call (Neil Horman) V3->V4: Willem has requested that the control flow be restored to the previous state. Doing so lets us eliminate the need for the po->wait_on_complete flag variable, and lets us get rid of the packet_next_frame function, but introduces another complexity. Specifically, but using the packet pending count, we can, if an applications calls sendmsg multiple times with MSG_DONTWAIT set, each set of transmitted frames, when complete, will cause tpacket_destruct_skb to issue a complete call, for which there will never be a wait_on_completion call. This imbalance will lead to any future call to wait_for_completion here to return early, when the frames they sent may not have completed. To correct this, we need to re-init the completion queue on every call to tpacket_snd before we enter the loop so as to ensure we wait properly for the frames we send in this iteration. Change the timeout and interrupted gotos to out_put rather than out_status so that we don't try to free a non-existant skb Clean up some extra newlines (Willem de Bruijn) Reviewed-by: Willem de Bruijn Signed-off-by: Neil Horman Reported-by: Matteo Croce Signed-off-by: David S. Miller --- net/packet/af_packet.c | 20 +++++++++++++++++--- net/packet/internal.h | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0b4cf94f0233..5f78df080573 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2401,6 +2401,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); + + if (!packet_read_pending(&po->tx_ring)) + complete(&po->skb_completion); } sock_wfree(skb); @@ -2585,7 +2588,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; @@ -2600,6 +2603,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; + long timeo = 0; mutex_lock(&po->pg_vec_lock); @@ -2646,12 +2650,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; + reinit_completion(&po->skb_completion); + do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && need_resched()) - schedule(); + if (need_wait && skb) { + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); + if (timeo <= 0) { + err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; + goto out_put; + } + } + /* check for additional frames */ continue; } @@ -3207,6 +3220,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); po = pkt_sk(sk); + init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bb7c5fb3bff..c70a2794456f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -128,6 +128,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; + struct completion skb_completion; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; From 33d4a5a7a5b4d02915d765064b2319e90a11cbde Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Thu, 27 Jun 2019 11:47:32 +0900 Subject: [PATCH 124/207] cpu/hotplug: Fix out-of-bounds read when setting fail state Setting invalid value to /sys/devices/system/cpu/cpuX/hotplug/fail can control `struct cpuhp_step *sp` address, results in the following global-out-of-bounds read. Reproducer: # echo -2 > /sys/devices/system/cpu/cpu0/hotplug/fail KASAN report: BUG: KASAN: global-out-of-bounds in write_cpuhp_fail+0x2cd/0x2e0 Read of size 8 at addr ffffffff89734438 by task bash/1941 CPU: 0 PID: 1941 Comm: bash Not tainted 5.2.0-rc6+ #31 Call Trace: write_cpuhp_fail+0x2cd/0x2e0 dev_attr_store+0x58/0x80 sysfs_kf_write+0x13d/0x1a0 kernfs_fop_write+0x2bc/0x460 vfs_write+0x1e1/0x560 ksys_write+0x126/0x250 do_syscall_64+0xc1/0x390 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f05e4f4c970 The buggy address belongs to the variable: cpu_hotplug_lock+0x98/0xa0 Memory state around the buggy address: ffffffff89734300: fa fa fa fa 00 00 00 00 00 00 00 00 00 00 00 00 ffffffff89734380: fa fa fa fa 00 00 00 00 00 00 00 00 00 00 00 00 >ffffffff89734400: 00 00 00 00 fa fa fa fa 00 00 00 00 fa fa fa fa ^ ffffffff89734480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffff89734500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Add a sanity check for the value written from user space. Fixes: 1db49484f21ed ("smp/hotplug: Hotplug state fail injection") Signed-off-by: Eiichi Tsukata Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Link: https://lkml.kernel.org/r/20190627024732.31672-1-devel@etsukata.com --- kernel/cpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 551db494f153..ef1c565edc5d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1964,6 +1964,9 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) + return -EINVAL; + /* * Cannot fail STARTING/DYING callbacks. */ From 6fd2fe494b17bf2dec37b610d23a43a72b16923a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Jun 2019 22:22:09 -0400 Subject: [PATCH 125/207] copy_process(): don't use ksys_close() on cleanups anon_inode_getfd() should be used *ONLY* in situations when we are guaranteed to be past the last failure point (including copying the descriptor number to userland, at that). And ksys_close() should not be used for cleanups at all. anon_inode_getfile() is there for all nontrivial cases like that. Just use that... Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Al Viro Reviewed-by: Jann Horn Signed-off-by: Christian Brauner --- kernel/fork.c | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 39a3adaa4ad1..399aca51ff75 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1712,31 +1712,6 @@ const struct file_operations pidfd_fops = { #endif }; -/** - * pidfd_create() - Create a new pid file descriptor. - * - * @pid: struct pid that the pidfd will reference - * - * This creates a new pid file descriptor with the O_CLOEXEC flag set. - * - * Note, that this function can only be called after the fd table has - * been unshared to avoid leaking the pidfd to the new process. - * - * Return: On success, a cloexec pidfd is returned. - * On error, a negative errno number will be returned. - */ -static int pidfd_create(struct pid *pid) -{ - int fd; - - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); - if (fd < 0) - put_pid(pid); - - return fd; -} - static void __delayed_free_task(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); @@ -1774,6 +1749,7 @@ static __latent_entropy struct task_struct *copy_process( int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; + struct file *pidfile = NULL; /* * Don't allow sharing the root directory with processes in a different @@ -2046,11 +2022,20 @@ static __latent_entropy struct task_struct *copy_process( * if the fd table isn't shared). */ if (clone_flags & CLONE_PIDFD) { - retval = pidfd_create(pid); + retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (retval < 0) goto bad_fork_free_pid; pidfd = retval; + + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, + O_RDWR | O_CLOEXEC); + if (IS_ERR(pidfile)) { + put_unused_fd(pidfd); + goto bad_fork_free_pid; + } + get_pid(pid); /* held by pidfile now */ + retval = put_user(pidfd, parent_tidptr); if (retval) goto bad_fork_put_pidfd; @@ -2168,6 +2153,9 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + /* past the last point of failure */ + if (pidfile) + fd_install(pidfd, pidfile); init_task_pid_links(p); if (likely(p->pid)) { @@ -2234,8 +2222,10 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_cgroup_threadgroup_change_end: cgroup_threadgroup_change_end(current); bad_fork_put_pidfd: - if (clone_flags & CLONE_PIDFD) - ksys_close(pidfd); + if (clone_flags & CLONE_PIDFD) { + fput(pidfile); + put_unused_fd(pidfd); + } bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); From 30d158b143b6575261ab610ae7b1b4f7fe3830b3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2019 11:35:14 +0200 Subject: [PATCH 126/207] proc: remove useless d_is_dir() check Remove the d_is_dir() check from tgid_pidfd_to_pid(). It is pointless since you should never get &proc_tgid_base_operations for f_op on a non-directory. Suggested-by: Al Viro Signed-off-by: Christian Brauner --- fs/proc/base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..255f6754c70d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); From 9d957a959bc8c3dfe37572ac8e99affb5a885965 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 26 Jun 2019 11:54:45 +0800 Subject: [PATCH 127/207] pinctrl: mediatek: Update cur_mask in mask/mask ops During suspend/resume, mtk_eint_mask may be called while wake_mask is active. For example, this happens if a wake-source with an active interrupt handler wakes the system: irq/pm.c:irq_pm_check_wakeup would disable the interrupt, so that it can be handled later on in the resume flow. However, this may happen before mtk_eint_do_resume is called: in this case, wake_mask is loaded, and cur_mask is restored from an older copy, re-enabling the interrupt, and causing an interrupt storm (especially for level interrupts). Step by step, for a line that has both wake and interrupt enabled: 1. cur_mask[irq] = 1; wake_mask[irq] = 1; EINT_EN[irq] = 1 (interrupt enabled at hardware level) 2. System suspends, resumes due to that line (at this stage EINT_EN == wake_mask) 3. irq_pm_check_wakeup is called, and disables the interrupt => EINT_EN[irq] = 0, but we still have cur_mask[irq] = 1 4. mtk_eint_do_resume is called, and restores EINT_EN = cur_mask, so it reenables EINT_EN[irq] = 1 => interrupt storm as the driver is not yet ready to handle the interrupt. This patch fixes the issue in step 3, by recording all mask/unmask changes in cur_mask. This also avoids the need to read the current mask in eint_do_suspend, and we can remove mtk_eint_chip_read_mask function. The interrupt will be re-enabled properly later on, sometimes after mtk_eint_do_resume, when the driver is ready to handle it. Fixes: 58a5e1b64bb0 ("pinctrl: mediatek: Implement wake handler and suspend resume") Signed-off-by: Nicolas Boichat Acked-by: Sean Wang Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 737385e86beb..7e526bcf5e0b 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -113,6 +113,8 @@ static void mtk_eint_mask(struct irq_data *d) void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, eint->regs->mask_set); + eint->cur_mask[d->hwirq >> 5] &= ~mask; + writel(mask, reg); } @@ -123,6 +125,8 @@ static void mtk_eint_unmask(struct irq_data *d) void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, eint->regs->mask_clr); + eint->cur_mask[d->hwirq >> 5] |= mask; + writel(mask, reg); if (eint->dual_edge[d->hwirq]) @@ -217,19 +221,6 @@ static void mtk_eint_chip_write_mask(const struct mtk_eint *eint, } } -static void mtk_eint_chip_read_mask(const struct mtk_eint *eint, - void __iomem *base, u32 *buf) -{ - int port; - void __iomem *reg; - - for (port = 0; port < eint->hw->ports; port++) { - reg = base + eint->regs->mask + (port << 2); - buf[port] = ~readl_relaxed(reg); - /* Mask is 0 when irq is enabled, and 1 when disabled. */ - } -} - static int mtk_eint_irq_request_resources(struct irq_data *d) { struct mtk_eint *eint = irq_data_get_irq_chip_data(d); @@ -384,7 +375,6 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) int mtk_eint_do_suspend(struct mtk_eint *eint) { - mtk_eint_chip_read_mask(eint, eint->base, eint->cur_mask); mtk_eint_chip_write_mask(eint, eint->base, eint->wake_mask); return 0; From 80031361747aec92163464f2ee08870fec33bcb0 Mon Sep 17 00:00:00 2001 From: Joshua Scott Date: Wed, 26 Jun 2019 10:11:08 +1200 Subject: [PATCH 128/207] ARM: dts: armada-xp-98dx3236: Switch to armada-38x-uart serial node Switch to the "marvell,armada-38x-uart" driver variant to empty the UART buffer before writing to the UART_LCR register. Signed-off-by: Joshua Scott Tested-by: Andrew Lunn Acked-by: Gregory CLEMENT . Cc: stable@vger.kernel.org Fixes: 43e28ba87708 ("ARM: dts: Use armada-370-xp as a base for armada-xp-98dx3236") Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-xp-98dx3236.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi index 59753470cd34..267d0c178e55 100644 --- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi +++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi @@ -336,3 +336,11 @@ &sdio { status = "disabled"; }; +&uart0 { + compatible = "marvell,armada-38x-uart"; +}; + +&uart1 { + compatible = "marvell,armada-38x-uart"; +}; + From d6b8bd679c9c8856fa04b80490765c43a4cb613b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 May 2019 07:58:38 -0400 Subject: [PATCH 129/207] ceph: fix ceph_mdsc_build_path to not stop on first component When ceph_mdsc_build_path is handed a positive dentry, it will return a zero-length path string with the base set to that dentry. This is not what we want. Always include at least one path component in the string. ceph_mdsc_build_path has behaved this way for a long time but it didn't matter until recent d_name handling rework. Fixes: 964fff7491e4 ("ceph: use ceph_mdsc_build_path instead of clone_dentry_name") Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6af2d0d4a87a..c8a9b89b922d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2121,9 +2121,10 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; From 83f44ae0f8afcc9da659799db8693f74847e66b3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 26 Jun 2019 19:33:52 -0500 Subject: [PATCH 130/207] perf/x86: Always store regs->ip in perf_callchain_kernel() The stacktrace_map_raw_tp BPF selftest is failing because the RIP saved by perf_arch_fetch_caller_regs() isn't getting saved by perf_callchain_kernel(). This was broken by the following commit: d15d356887e7 ("perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER") With that change, when starting with non-HW regs, the unwinder starts with the current stack frame and unwinds until it passes up the frame which called perf_arch_fetch_caller_regs(). So regs->ip needs to be saved deliberately. Fixes: d15d356887e7 ("perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER") Signed-off-by: Song Liu Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Kairui Song Cc: Steven Rostedt Cc: Borislav Petkov Link: https://lkml.kernel.org/r/3975a298fa52b506fea32666d8ff6a13467eee6d.1561595111.git.jpoimboe@redhat.com --- arch/x86/events/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..4fb3ca1e699d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_hw_regs(regs)) { - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) unwind_start(&state, current, regs, NULL); - } else { + else unwind_start(&state, current, NULL, (void *)regs->sp); - } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); From ae6a45a0868986f69039a2150d3b2b9ca294c378 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 26 Jun 2019 19:33:55 -0500 Subject: [PATCH 131/207] x86/unwind/orc: Fall back to using frame pointers for generated code The ORC unwinder can't unwind through BPF JIT generated code because there are no ORC entries associated with the code. If an ORC entry isn't available, try to fall back to frame pointers. If BPF and other generated code always do frame pointer setup (even with CONFIG_FRAME_POINTERS=n) then this will allow ORC to unwind through most generated code despite there being no corresponding ORC entries. Fixes: d15d356887e7 ("perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER") Reported-by: Song Liu Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Kairui Song Cc: Steven Rostedt Cc: Borislav Petkov Link: https://lkml.kernel.org/r/b6f69208ddff4343d56b7bfac1fc7cfcd62689e8.1561595111.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..72b997eaa1fc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip); * But they are copies of the ftrace entries that are static and * defined in ftrace_*.S, which do have orc entries. * - * If the undwinder comes across a ftrace trampoline, then find the + * If the unwinder comes across a ftrace trampoline, then find the * ftrace function that was used to create it, and use that ftrace - * function's orc entrie, as the placement of the return code in + * function's orc entry, as the placement of the return code in * the stack will be identical. */ static struct orc_entry *orc_ftrace_find(unsigned long ip) @@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, + .end = 0, +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc) - goto err; + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } /* End-of-stack check for kernel threads: */ if (orc->sp_reg == ORC_REG_UNDEFINED) { From 5de254dca87ab614b9c058246ee94c58a840e358 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 27 Jun 2019 14:57:02 +1000 Subject: [PATCH 132/207] cifs: fix crash querying symlinks stored as reparse-points We never parsed/returned any data from .get_link() when the object is a windows reparse-point containing a symlink. This results in the VFS layer oopsing accessing an uninitialized buffer: ... [ 171.407172] Call Trace: [ 171.408039] readlink_copy+0x29/0x70 [ 171.408872] vfs_readlink+0xc1/0x1f0 [ 171.409709] ? readlink_copy+0x70/0x70 [ 171.410565] ? simple_attr_release+0x30/0x30 [ 171.411446] ? getname_flags+0x105/0x2a0 [ 171.412231] do_readlinkat+0x1b7/0x1e0 [ 171.412938] ? __ia32_compat_sys_newfstat+0x30/0x30 ... Fix this by adding code to handle these buffers and make sure we do return a valid buffer to .get_link() CC: Stable Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 64 ++++++++++++++++++++++++++++++++++++++++++++--- fs/cifs/smb2pdu.h | 14 ++++++++++- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3fdc6a41b304..9fd56b0acd7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2372,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, kfree(dfs_rsp); return rc; } + +static int +parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int sub_len; + unsigned int sub_offset; + + /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */ + if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) { + cifs_dbg(VFS, "srv returned invalid symlink buffer\n"); + return -EIO; + } + + sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); + sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + if (sub_offset + 20 > plen || + sub_offset + sub_len + 20 > plen) { + cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); + return -EIO; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + #define SMB2_SYMLINK_STRUCT_SIZE \ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) @@ -2401,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct kvec close_iov[1]; struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; - char *ioctl_buf; + struct reparse_data_buffer *reparse_buf; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + *target_path = NULL; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2483,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == 0) && (is_reparse_point)) { /* See MS-FSCC 2.3.23 */ - ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); plen = le32_to_cpu(ioctl_rsp->OutputCount); if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > rsp_iov[1].iov_len) { - cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); + cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n", + plen); rc = -EIO; goto querty_exit; } - /* Do stuff with ioctl_buf/plen */ + if (plen < 8) { + cifs_dbg(VFS, "reparse buffer is too small. Must be " + "at least 8 bytes but was %d\n", plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) { + cifs_dbg(VFS, "srv returned invalid reparse buf " + "length: %d\n", plen); + rc = -EIO; + goto querty_exit; + } + + rc = parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)reparse_buf, + plen, target_path, cifs_sb); goto querty_exit; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7d5813bebd8..858353d20c39 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer { __u8 PathBuffer[0]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + +struct reparse_symlink_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; /* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ From 2acf5a3e6e9371e63c9e4ff54d84d08f630467a0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Jun 2019 17:43:08 +0100 Subject: [PATCH 133/207] ALSA: usb-audio: fix sign unintended sign extension on left shifts There are a couple of left shifts of unsigned 8 bit values that first get promoted to signed ints and hence get sign extended on the shift if the top bit of the 8 bit values are set. Fix this by casting the 8 bit values to unsigned ints to stop the unintentional sign extension. Addresses-Coverity: ("Unintended sign extension") Signed-off-by: Colin Ian King Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index a751a18ca4c2..5783329a3237 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -754,7 +754,7 @@ static int snd_ni_control_init_val(struct usb_mixer_interface *mixer, return err; } - kctl->private_value |= (value << 24); + kctl->private_value |= ((unsigned int)value << 24); return 0; } @@ -915,7 +915,7 @@ static int snd_ftu_eff_switch_init(struct usb_mixer_interface *mixer, if (err < 0) return err; - kctl->private_value |= value[0] << 24; + kctl->private_value |= (unsigned int)value[0] << 24; return 0; } From be132e1375c1fffe48801296279079f8a59a9ed3 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 27 Jun 2019 16:42:00 +0200 Subject: [PATCH 134/207] drm/etnaviv: add missing failure path to destroy suballoc When something goes wrong in the GPU init after the cmdbuf suballocator has been constructed, we fail to destroy it properly. This causes havok later when the GPU is unbound due to a module unload or similar. Fixes: e66774dd6f6a (drm/etnaviv: add cmdbuf suballocator) Signed-off-by: Lucas Stach Tested-by: Russell King --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 72d01e873160..5418a1a87b2c 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -760,7 +760,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) if (IS_ERR(gpu->cmdbuf_suballoc)) { dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n"); ret = PTR_ERR(gpu->cmdbuf_suballoc); - goto fail; + goto destroy_iommu; } /* Create buffer: */ @@ -768,7 +768,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) PAGE_SIZE); if (ret) { dev_err(gpu->dev, "could not create command buffer\n"); - goto destroy_iommu; + goto destroy_suballoc; } if (gpu->mmu->version == ETNAVIV_IOMMU_V1 && @@ -800,6 +800,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) free_buffer: etnaviv_cmdbuf_free(&gpu->buffer); gpu->buffer.suballoc = NULL; +destroy_suballoc: + etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc); + gpu->cmdbuf_suballoc = NULL; destroy_iommu: etnaviv_iommu_destroy(gpu->mmu); gpu->mmu = NULL; From bef33e19203dde434bcdf21c449e3fb4f06c2618 Mon Sep 17 00:00:00 2001 From: Dennis Wassenberg Date: Fri, 28 Jun 2019 10:54:53 +0200 Subject: [PATCH 135/207] ALSA: hda/realtek - Change front mic location for Lenovo M710q On M710q Lenovo ThinkCentre machine, there are two front mics, we change the location for one of them to avoid conflicts. Signed-off-by: Dennis Wassenberg Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 35f01f5102da..48f3c5b8d6e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7088,6 +7088,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x3111, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), From c3ea60c231446663afd6ea1054da6b7f830855ca Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Jun 2019 10:54:29 +0100 Subject: [PATCH 136/207] ALSA: seq: fix incorrect order of dest_client/dest_ports arguments There are two occurrances of a call to snd_seq_oss_fill_addr where the dest_client and dest_port arguments are in the wrong order. Fix this by swapping them around. Addresses-Coverity: ("Arguments in wrong order") Signed-off-by: Colin Ian King Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_ioctl.c | 2 +- sound/core/seq/oss/seq_oss_rw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_ioctl.c b/sound/core/seq/oss/seq_oss_ioctl.c index 5b8520177b0e..7d72e3d48ad5 100644 --- a/sound/core/seq/oss/seq_oss_ioctl.c +++ b/sound/core/seq/oss/seq_oss_ioctl.c @@ -62,7 +62,7 @@ static int snd_seq_oss_oob_user(struct seq_oss_devinfo *dp, void __user *arg) if (copy_from_user(ev, arg, 8)) return -EFAULT; memset(&tmpev, 0, sizeof(tmpev)); - snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.port, dp->addr.client); + snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port); tmpev.time.tick = 0; if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) { snd_seq_oss_dispatch(dp, &tmpev, 0, 0); diff --git a/sound/core/seq/oss/seq_oss_rw.c b/sound/core/seq/oss/seq_oss_rw.c index eb1ef12181f3..1063e1b16ea0 100644 --- a/sound/core/seq/oss/seq_oss_rw.c +++ b/sound/core/seq/oss/seq_oss_rw.c @@ -174,7 +174,7 @@ insert_queue(struct seq_oss_devinfo *dp, union evrec *rec, struct file *opt) memset(&event, 0, sizeof(event)); /* set dummy -- to be sure */ event.type = SNDRV_SEQ_EVENT_NOTEOFF; - snd_seq_oss_fill_addr(dp, &event, dp->addr.port, dp->addr.client); + snd_seq_oss_fill_addr(dp, &event, dp->addr.client, dp->addr.port); if (snd_seq_oss_process_event(dp, rec, &event)) return 0; /* invalid event - no need to insert queue */ From 7e3d3620974b743b91b1f9d0660061b1de20174c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Jun 2019 19:15:44 -0400 Subject: [PATCH 137/207] SUNRPC: Fix up calculation of client message length In the case where a record marker was used, xs_sendpages() needs to return the length of the payload + record marker so that we operate correctly in the case of a partial transmission. When the callers check return value, they therefore need to take into account the record marker length. Fixes: 06b5fc3ad94e ("Merge tag 'nfs-rdma-for-5.1-1'...") Cc: stable@vger.kernel.org # 5.1+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c69951ed2ebc..36652352a38c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -950,6 +950,8 @@ static int xs_local_send_request(struct rpc_rqst *req) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; int status; int sent = 0; @@ -964,9 +966,7 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@ -976,7 +976,7 @@ static int xs_local_send_request(struct rpc_rqst *req) if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; @@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; bool vm_wait = false; int status; int sent; @@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) while (1) { sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status); @@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) * reset the count of bytes sent. */ transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; From 68f461593f76bd5f17e87cdd0bea28f4278c7268 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jun 2019 16:41:16 -0400 Subject: [PATCH 138/207] NFS/flexfiles: Use the correct TCP timeout for flexfiles I/O Fix a typo where we're confusing the default TCP retrans value (NFS_DEF_TCP_RETRANS) for the default TCP timeout value. Fixes: 15d03055cf39f ("pNFS/flexfiles: Set reasonable default ...") Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); From d5b844a2cf507fc7642c9ae80a9d585db3065c28 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 27 Jun 2019 10:13:34 +0200 Subject: [PATCH 139/207] ftrace/x86: Remove possible deadlock between register_kprobe() and ftrace_run_update_code() The commit 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module text permissions race") causes a possible deadlock between register_kprobe() and ftrace_run_update_code() when ftrace is using stop_machine(). The existing dependency chain (in reverse order) is: -> #1 (text_mutex){+.+.}: validate_chain.isra.21+0xb32/0xd70 __lock_acquire+0x4b8/0x928 lock_acquire+0x102/0x230 __mutex_lock+0x88/0x908 mutex_lock_nested+0x32/0x40 register_kprobe+0x254/0x658 init_kprobes+0x11a/0x168 do_one_initcall+0x70/0x318 kernel_init_freeable+0x456/0x508 kernel_init+0x22/0x150 ret_from_fork+0x30/0x34 kernel_thread_starter+0x0/0xc -> #0 (cpu_hotplug_lock.rw_sem){++++}: check_prev_add+0x90c/0xde0 validate_chain.isra.21+0xb32/0xd70 __lock_acquire+0x4b8/0x928 lock_acquire+0x102/0x230 cpus_read_lock+0x62/0xd0 stop_machine+0x2e/0x60 arch_ftrace_update_code+0x2e/0x40 ftrace_run_update_code+0x40/0xa0 ftrace_startup+0xb2/0x168 register_ftrace_function+0x64/0x88 klp_patch_object+0x1a2/0x290 klp_enable_patch+0x554/0x980 do_one_initcall+0x70/0x318 do_init_module+0x6e/0x250 load_module+0x1782/0x1990 __s390x_sys_finit_module+0xaa/0xf0 system_call+0xd8/0x2d0 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(text_mutex); lock(cpu_hotplug_lock.rw_sem); lock(text_mutex); lock(cpu_hotplug_lock.rw_sem); It is similar problem that has been solved by the commit 2d1e38f56622b9b ("kprobes: Cure hotplug lock ordering issues"). Many locks are involved. To be on the safe side, text_mutex must become a low level lock taken after cpu_hotplug_lock.rw_sem. This can't be achieved easily with the current ftrace design. For example, arm calls set_all_modules_text_rw() already in ftrace_arch_code_modify_prepare(), see arch/arm/kernel/ftrace.c. This functions is called: + outside stop_machine() from ftrace_run_update_code() + without stop_machine() from ftrace_module_enable() Fortunately, the problematic fix is needed only on x86_64. It is the only architecture that calls set_all_modules_text_rw() in ftrace path and supports livepatching at the same time. Therefore it is enough to move text_mutex handling from the generic kernel/trace/ftrace.c into arch/x86/kernel/ftrace.c: ftrace_arch_code_modify_prepare() ftrace_arch_code_modify_post_process() This patch basically reverts the ftrace part of the problematic commit 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module text permissions race"). And provides x86_64 specific-fix. Some refactoring of the ftrace code will be needed when livepatching is implemented for arm or nds32. These architectures call set_all_modules_text_rw() and use stop_machine() at the same time. Link: http://lkml.kernel.org/r/20190627081334.12793-1-pmladek@suse.com Fixes: 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module text permissions race") Acked-by: Thomas Gleixner Reported-by: Miroslav Benes Reviewed-by: Miroslav Benes Reviewed-by: Josh Poimboeuf Signed-off-by: Petr Mladek [ As reviewed by Miroslav Benes , removed return value of ftrace_run_update_code() as it is a void function. ] Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 3 +++ kernel/trace/ftrace.c | 10 +--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..33786044d5ac 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -35,6 +36,7 @@ int ftrace_arch_code_modify_prepare(void) { + mutex_lock(&text_mutex); set_kernel_text_rw(); set_all_modules_text_rw(); return 0; @@ -44,6 +46,7 @@ int ftrace_arch_code_modify_post_process(void) { set_all_modules_text_ro(); set_kernel_text_ro(); + mutex_unlock(&text_mutex); return 0; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 38277af44f5c..576c41644e77 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,7 +34,6 @@ #include #include #include -#include #include @@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command) { int ret; - mutex_lock(&text_mutex); - ret = ftrace_arch_code_modify_prepare(); FTRACE_WARN_ON(ret); if (ret) - goto out_unlock; + return; /* * By default we use stop_machine() to modify the code. @@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); - -out_unlock: - mutex_unlock(&text_mutex); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod) struct ftrace_page *pg; mutex_lock(&ftrace_lock); - mutex_lock(&text_mutex); if (ftrace_disabled) goto out_unlock; @@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod) ftrace_arch_code_modify_post_process(); out_unlock: - mutex_unlock(&text_mutex); mutex_unlock(&ftrace_lock); process_cached_mods(mod->name); From 39611265edc1af3d574178202e19c31e187e7cf2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 27 Jun 2019 21:18:19 -0400 Subject: [PATCH 140/207] ftrace/x86: Add a comment to why we take text_mutex in ftrace_arch_code_modify_prepare() Taking the text_mutex in ftrace_arch_code_modify_prepare() is to fix a race against module loading and live kernel patching that might try to change the text permissions while ftrace has it as read/write. This really needs to be documented in the code. Add a comment that does such. Link: http://lkml.kernel.org/r/20190627211819.5a591f52@gandalf.local.home Suggested-by: Josh Poimboeuf Reviewed-by: Josh Poimboeuf Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 33786044d5ac..d7e93b2783fd 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -36,6 +36,11 @@ int ftrace_arch_code_modify_prepare(void) { + /* + * Need to grab text_mutex to prevent a race from module loading + * and live kernel patching from changing the text permissions while + * ftrace has it set to "read/write". + */ mutex_lock(&text_mutex); set_kernel_text_rw(); set_all_modules_text_rw(); From d122ed6288d9821b405b0f84a3937955b9df545f Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Fri, 28 Jun 2019 19:56:40 +0900 Subject: [PATCH 141/207] tracing: Fix memory leak in tracing_err_log_open() When tracing_err_log_open() calls seq_open(), allocated memory is not freed. kmemleak report: unreferenced object 0xffff92c0781d1100 (size 128): comm "tail", pid 15116, jiffies 4295163855 (age 22.704s) hex dump (first 32 bytes): 00 f0 08 e5 c0 92 ff ff 00 10 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000000d0687d5>] kmem_cache_alloc+0x11f/0x1e0 [<000000003e3039a8>] seq_open+0x2f/0x90 [<000000008dd36b7d>] tracing_err_log_open+0x67/0x140 [<000000005a431ae2>] do_dentry_open+0x1df/0x3a0 [<00000000a2910603>] vfs_open+0x2f/0x40 [<0000000038b0a383>] path_openat+0x2e8/0x1690 [<00000000fe025bda>] do_filp_open+0x9b/0x110 [<00000000483a5091>] do_sys_open+0x1ba/0x260 [<00000000c558b5fd>] __x64_sys_openat+0x20/0x30 [<000000006881ec07>] do_syscall_64+0x5a/0x130 [<00000000571c2e94>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by calling seq_release() in tracing_err_log_fops.release(). Link: http://lkml.kernel.org/r/20190628105640.GA1863@DESKTOP Fixes: 8a062902be725 ("tracing: Add tracing error log") Reviewed-by: Tom Zanussi Signed-off-by: Takeshi Misawa Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83e08b78dbee..4122ccde6ec2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7126,12 +7126,24 @@ static ssize_t tracing_err_log_write(struct file *file, return count; } +static int tracing_err_log_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + return 0; +} + static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, - .release = tracing_release_generic_tr, + .release = tracing_err_log_release, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) From 46cc0b44428d0f0e81f11ea98217fc0edfbeab07 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Tue, 25 Jun 2019 10:29:10 +0900 Subject: [PATCH 142/207] tracing/snapshot: Resize spare buffer if size changed Current snapshot implementation swaps two ring_buffers even though their sizes are different from each other, that can cause an inconsistency between the contents of buffer_size_kb file and the current buffer size. For example: # cat buffer_size_kb 7 (expanded: 1408) # echo 1 > events/enable # grep bytes per_cpu/cpu0/stats bytes: 1441020 # echo 1 > snapshot // current:1408, spare:1408 # echo 123 > buffer_size_kb // current:123, spare:1408 # echo 1 > snapshot // current:1408, spare:123 # grep bytes per_cpu/cpu0/stats bytes: 1443700 # cat buffer_size_kb 123 // != current:1408 And also, a similar per-cpu case hits the following WARNING: Reproducer: # echo 1 > per_cpu/cpu0/snapshot # echo 123 > buffer_size_kb # echo 1 > per_cpu/cpu0/snapshot WARNING: WARNING: CPU: 0 PID: 1946 at kernel/trace/trace.c:1607 update_max_tr_single.part.0+0x2b8/0x380 Modules linked in: CPU: 0 PID: 1946 Comm: bash Not tainted 5.2.0-rc6 #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 RIP: 0010:update_max_tr_single.part.0+0x2b8/0x380 Code: ff e8 dc da f9 ff 0f 0b e9 88 fe ff ff e8 d0 da f9 ff 44 89 ee bf f5 ff ff ff e8 33 dc f9 ff 41 83 fd f5 74 96 e8 b8 da f9 ff <0f> 0b eb 8d e8 af da f9 ff 0f 0b e9 bf fd ff ff e8 a3 da f9 ff 48 RSP: 0018:ffff888063e4fca0 EFLAGS: 00010093 RAX: ffff888066214380 RBX: ffffffff99850fe0 RCX: ffffffff964298a8 RDX: 0000000000000000 RSI: 00000000fffffff5 RDI: 0000000000000005 RBP: 1ffff1100c7c9f96 R08: ffff888066214380 R09: ffffed100c7c9f9b R10: ffffed100c7c9f9a R11: 0000000000000003 R12: 0000000000000000 R13: 00000000ffffffea R14: ffff888066214380 R15: ffffffff99851060 FS: 00007f9f8173c700(0000) GS:ffff88806d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000714dc0 CR3: 0000000066fa6000 CR4: 00000000000006f0 Call Trace: ? trace_array_printk_buf+0x140/0x140 ? __mutex_lock_slowpath+0x10/0x10 tracing_snapshot_write+0x4c8/0x7f0 ? trace_printk_init_buffers+0x60/0x60 ? selinux_file_permission+0x3b/0x540 ? tracer_preempt_off+0x38/0x506 ? trace_printk_init_buffers+0x60/0x60 __vfs_write+0x81/0x100 vfs_write+0x1e1/0x560 ksys_write+0x126/0x250 ? __ia32_sys_read+0xb0/0xb0 ? do_syscall_64+0x1f/0x390 do_syscall_64+0xc1/0x390 entry_SYSCALL_64_after_hwframe+0x49/0xbe This patch adds resize_buffer_duplicate_size() to check if there is a difference between current/spare buffer sizes and resize a spare buffer if necessary. Link: http://lkml.kernel.org/r/20190625012910.13109-1-devel@etsukata.com Cc: stable@vger.kernel.org Fixes: ad909e21bbe69 ("tracing: Add internal tracing_snapshot() functions") Signed-off-by: Eiichi Tsukata Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4122ccde6ec2..c3aabb576fe5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, break; } #endif - if (!tr->allocated_snapshot) { + if (tr->allocated_snapshot) + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, iter->cpu_file); + else ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - break; - } + if (ret < 0) + break; local_irq_disable(); /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) From 2f040d27080ddfffecff2be1a77107c494d0e4f4 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 27 Jun 2019 19:24:14 +0200 Subject: [PATCH 143/207] drm/panfrost: Fix a double-free error drm_gem_shmem_create_with_handle() returns a GEM object and attach a handle to it. When the user closes the DRM FD, the core releases all GEM handles along with their backing GEM objs, which can lead to a double-free issue if panfrost_ioctl_create_bo() failed and went through the err_free path where drm_gem_object_put_unlocked() is called without deleting the associate handle. Replace this drm_gem_object_put_unlocked() call by a drm_gem_handle_delete() one to fix that. Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") Cc: Signed-off-by: Boris Brezillon Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190627172414.27231-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d11e2281dde6..7e43b25785f7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -63,7 +63,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, return 0; err_free: - drm_gem_object_put_unlocked(&shmem->base); + drm_gem_handle_delete(file, args->handle); return ret; } From 36d6cb73d5e60cdb2724c21ccba93c964e6a16f9 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 28 Jun 2019 12:06:37 -0700 Subject: [PATCH 144/207] mm/dev_pfn: exclude MEMORY_DEVICE_PRIVATE while computing virtual address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The presence of struct page does not guarantee linear mapping for the pfn physical range. Device private memory which is non-coherent is excluded from linear mapping during devm_memremap_pages() though they will still have struct page coverage. Change pfn_t_to_virt() to just check for device private memory before giving out virtual address for a given pfn. pfn_t_to_virt() actually has no callers. Let's fix it for the 5.2 kernel and remove it in 5.3. Link: http://lkml.kernel.org/r/1558089514-25067-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Dan Williams Cc: Jérôme Glisse Cc: Laurent Dufour Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pfn_t.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 7bb77850c65a..3c202a11a79e 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -68,7 +68,7 @@ static inline phys_addr_t pfn_t_to_phys(pfn_t pfn) static inline void *pfn_t_to_virt(pfn_t pfn) { - if (pfn_t_has_page(pfn)) + if (pfn_t_has_page(pfn) && !is_device_private_page(pfn_t_to_page(pfn))) return __va(pfn_t_to_phys(pfn)); return NULL; } From cb8f381f1613cafe3aec30809991cd56e7135d92 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 28 Jun 2019 12:06:40 -0700 Subject: [PATCH 145/207] fs/proc/array.c: allow reporting eip/esp for all coredumping threads 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") stopped reporting eip/esp and fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") reintroduced the feature to fix a regression with userspace core dump handlers (such as minicoredumper). Because PF_DUMPCORE is only set for the primary thread, this didn't fix the original problem for secondary threads. Allow reporting the eip/esp for all threads by checking for PF_EXITING as well. This is set for all the other threads when they are killed. coredump_wait() waits for all the tasks to become inactive before proceeding to invoke a core dumper. Link: http://lkml.kernel.org/r/87y32p7i7a.fsf@linutronix.de Link: http://lkml.kernel.org/r/20190522161614.628-1-jlu@pengutronix.de Fixes: fd7d56270b526ca3 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") Signed-off-by: John Ogness Reported-by: Jan Luebbe Tested-by: Jan Luebbe Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..55180501b915 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); From 29b190fa774dd1b72a1a6f19687d55dc72ea83be Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 28 Jun 2019 12:06:43 -0700 Subject: [PATCH 146/207] mm/mempolicy.c: fix an incorrect rebind node in mpol_rebind_nodemask mpol_rebind_nodemask() is called for MPOL_BIND and MPOL_INTERLEAVE mempoclicies when the tasks's cpuset's mems_allowed changes. For policies created without MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES, it works by remapping the policy's allowed nodes (stored in v.nodes) using the previous value of mems_allowed (stored in w.cpuset_mems_allowed) as the domain of map and the new mems_allowed (passed as nodes) as the range of the map (see the comment of bitmap_remap() for details). The result of remapping is stored back as policy's nodemask in v.nodes, and the new value of mems_allowed should be stored in w.cpuset_mems_allowed to facilitate the next rebind, if it happens. However, 213980c0f23b ("mm, mempolicy: simplify rebinding mempolicies when updating cpusets") introduced a bug where the result of remapping is stored in w.cpuset_mems_allowed instead. Thus, a mempolicy's allowed nodes can evolve in an unexpected way after a series of rebinding due to cpuset mems_allowed changes, possibly binding to a wrong node or a smaller number of nodes which may e.g. overload them. This patch fixes the bug so rebinding again works as intended. [vbabka@suse.cz: new changlog] Link: http://lkml.kernel.org/r/ef6a69c6-c052-b067-8f2c-9d615c619bb9@suse.cz Link: http://lkml.kernel.org/r/1558768043-23184-1-git-send-email-zhongjiang@huawei.com Fixes: 213980c0f23b ("mm, mempolicy: simplify rebinding mempolicies when updating cpusets") Signed-off-by: zhong jiang Reviewed-by: Vlastimil Babka Cc: Oscar Salvador Cc: Anshuman Khandual Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Ralph Campbell Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 01600d80ae01..fdcb73536319 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) else { nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed, *nodes); - pol->w.cpuset_mems_allowed = tmp; + pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) From 867bfa4a5fcee66f2b25639acae718e8b28b25a5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 28 Jun 2019 12:06:46 -0700 Subject: [PATCH 147/207] fs/binfmt_flat.c: make load_flat_shared_library() work load_flat_shared_library() is broken: It only calls load_flat_file() if prepare_binprm() returns zero, but prepare_binprm() returns the number of bytes read - so this only happens if the file is empty. Instead, call into load_flat_file() if the number of bytes read is non-negative. (Even if the number of bytes is zero - in that case, load_flat_file() will see nullbytes and return a nice -ENOEXEC.) In addition, remove the code related to bprm creds and stop using prepare_binprm() - this code is loading a library, not a main executable, and it only actually uses the members "buf", "file" and "filename" of the linux_binprm struct. Instead, call kernel_read() directly. Link: http://lkml.kernel.org/r/20190524201817.16509-1-jannh@google.com Fixes: 287980e49ffc ("remove lots of IS_ERR_VALUE abuses") Signed-off-by: Jann Horn Cc: Alexander Viro Cc: Kees Cook Cc: Nicolas Pitre Cc: Arnd Bergmann Cc: Geert Uytterhoeven Cc: Russell King Cc: Greg Ungerer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_flat.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..e4b59e76afb0 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -856,9 +856,14 @@ static int load_flat_file(struct linux_binprm *bprm, static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; - - res = prepare_binprm(&bprm); - - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); From 97abc889ee296faf95ca0e978340fb7b942a3e32 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 28 Jun 2019 12:06:50 -0700 Subject: [PATCH 148/207] signal: remove the wrong signal_pending() check in restore_user_sigmask() This is the minimal fix for stable, I'll send cleanups later. Commit 854a6ed56839 ("signal: Add restore_user_sigmask()") introduced the visible change which breaks user-space: a signal temporary unblocked by set_user_sigmask() can be delivered even if the caller returns success or timeout. Change restore_user_sigmask() to accept the additional "interrupted" argument which should be used instead of signal_pending() check, and update the callers. Eric said: : For clarity. I don't think this is required by posix, or fundamentally to : remove the races in select. It is what linux has always done and we have : applications who care so I agree this fix is needed. : : Further in any case where the semantic change that this patch rolls back : (aka where allowing a signal to be delivered and the select like call to : complete) would be advantage we can do as well if not better by using : signalfd. : : Michael is there any chance we can get this guarantee of the linux : implementation of pselect and friends clearly documented. The guarantee : that if the system call completes successfully we are guaranteed that no : signal that is unblocked by using sigmask will be delivered? Link: http://lkml.kernel.org/r/20190604134117.GA29963@redhat.com Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()") Signed-off-by: Oleg Nesterov Reported-by: Eric Wong Tested-by: Eric Wong Acked-by: "Eric W. Biederman" Acked-by: Arnd Bergmann Acked-by: Deepa Dinamani Cc: Michael Kerrisk Cc: Jens Axboe Cc: Davidlohr Bueso Cc: Jason Baron Cc: Thomas Gleixner Cc: Al Viro Cc: David Laight Cc: [5.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 28 ++++++++++++++++++++-------- fs/eventpoll.c | 4 ++-- fs/io_uring.c | 7 ++++--- fs/select.c | 18 ++++++------------ include/linux/signal.h | 2 +- kernel/signal.c | 5 +++-- 6 files changed, 36 insertions(+), 28 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 3490d1fa0e16..c1e581dd32f5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c6f513100cc9..4c74c768ae43 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, error == -EINTR); return error; } @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, err == -EINTR); return err; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 86a2bd721900..e6981d3f4468 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2201,11 +2201,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (sig) - restore_user_sigmask(sig, &sigsaved); + restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); + + if (ret == -ERESTARTSYS) + ret = -EINTR; return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..a4d8f6e8b63c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; diff --git a/include/linux/signal.h b/include/linux/signal.h index 9702016734b1..78c2bb376954 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -276,7 +276,7 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, sigset_t *oldset, size_t sigsetsize); extern void restore_user_sigmask(const void __user *usigmask, - sigset_t *sigsaved); + sigset_t *sigsaved, bool interrupted); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/signal.c b/kernel/signal.c index d622eac9d169..edf8915ddd54 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask); * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed in from userland for the syscalls. */ -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved, + bool interrupted) { if (!usigmask) @@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) * Restoring sigmask here can lead to delivering signals that the above * syscalls are intended to block because of the sigmask passed in. */ - if (signal_pending(current)) { + if (interrupted) { current->saved_sigmask = *sigsaved; set_restore_sigmask(); return; From b38e5962f8ed0d2a2b28a887fc2221f7f41db119 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 28 Jun 2019 12:06:53 -0700 Subject: [PATCH 149/207] mm: soft-offline: return -EBUSY if set_hwpoison_free_buddy_page() fails The pass/fail of soft offline should be judged by checking whether the raw error page was finally contained or not (i.e. the result of set_hwpoison_free_buddy_page()), but current code do not work like that. It might lead us to misjudge the test result when set_hwpoison_free_buddy_page() fails. Without this fix, there are cases where madvise(MADV_SOFT_OFFLINE) may not offline the original page and will not return an error. Link: http://lkml.kernel.org/r/1560154686-18497-2-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Fixes: 6bc9b56433b76 ("mm: fix race on soft-offlining") Reviewed-by: Mike Kravetz Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Xishi Qiu Cc: "Chen, Jerry T" Cc: "Zhuo, Qiuxu" Cc: [4.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8da0334b9ca0..8ee7b16235ac 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1730,6 +1730,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (!ret) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); + else + ret = -EBUSY; } } return ret; From faf53def3b143df11062d87c12afe6afeb6f8cc7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 28 Jun 2019 12:06:56 -0700 Subject: [PATCH 150/207] mm: hugetlb: soft-offline: dissolve_free_huge_page() return zero on !PageHuge madvise(MADV_SOFT_OFFLINE) often returns -EBUSY when calling soft offline for hugepages with overcommitting enabled. That was caused by the suboptimal code in current soft-offline code. See the following part: ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { ... } else { /* * We set PG_hwpoison only when the migration source hugepage * was successfully dissolved, because otherwise hwpoisoned * hugepage remains on free hugepage list, then userspace will * find it as SIGBUS by allocation failure. That's not expected * in soft-offlining. */ ret = dissolve_free_huge_page(page); if (!ret) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); } } return ret; Here dissolve_free_huge_page() returns -EBUSY if the migration source page was freed into buddy in migrate_pages(), but even in that case we actually has a chance that set_hwpoison_free_buddy_page() succeeds. So that means current code gives up offlining too early now. dissolve_free_huge_page() checks that a given hugepage is suitable for dissolving, where we should return success for !PageHuge() case because the given hugepage is considered as already dissolved. This change also affects other callers of dissolve_free_huge_page(), which are cleaned up together. [n-horiguchi@ah.jp.nec.com: v3] Link: http://lkml.kernel.org/r/1560761476-4651-3-git-send-email-n-horiguchi@ah.jp.nec.comLink: http://lkml.kernel.org/r/1560154686-18497-3-git-send-email-n-horiguchi@ah.jp.nec.com Fixes: 6bc9b56433b76 ("mm: fix race on soft-offlining") Signed-off-by: Naoya Horiguchi Reported-by: Chen, Jerry T Tested-by: Chen, Jerry T Reviewed-by: Mike Kravetz Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Xishi Qiu Cc: "Chen, Jerry T" Cc: "Zhuo, Qiuxu" Cc: [4.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 29 ++++++++++++++++++++--------- mm/memory-failure.c | 5 +---- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac843d32b019..ede7e7f5d1ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1510,16 +1510,29 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the - * dissolution fails because a give page is not a free hugepage, or because - * free hugepages are fully reserved. + * nothing for in-use hugepages and non-hugepages. + * This function returns values like below: + * + * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use + * (allocated or reserved.) + * 0: successfully dissolved free hugepages or the page is not a + * hugepage (considered as already dissolved) */ int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; + spin_lock(&hugetlb_lock); - if (PageHuge(page) && !page_count(page)) { + if (!PageHuge(page)) { + rc = 0; + goto out; + } + + if (!page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); int nid = page_to_nid(head); @@ -1564,11 +1577,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { page = pfn_to_page(pfn); - if (PageHuge(page) && !page_count(page)) { - rc = dissolve_free_huge_page(page); - if (rc) - break; - } + rc = dissolve_free_huge_page(page); + if (rc) + break; } return rc; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8ee7b16235ac..d9cc6606f409 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1856,11 +1856,8 @@ static int soft_offline_in_use_page(struct page *page, int flags) static int soft_offline_free_page(struct page *page) { - int rc = 0; - struct page *head = compound_head(page); + int rc = dissolve_free_huge_page(page); - if (PageHuge(head)) - rc = dissolve_free_huge_page(page); if (!rc) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); From 432b1de0de02a83f64695e69a2d83cbee10c236f Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 28 Jun 2019 12:06:59 -0700 Subject: [PATCH 151/207] mm/oom_kill.c: fix uninitialized oc->constraint In dump_oom_summary() oc->constraint is used to show oom_constraint_text, but it hasn't been set before. So the value of it is always the default value 0. We should inititialize it before. Bellow is the output when memcg oom occurs, before this patch: oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null), cpuset=/,mems_allowed=0,oom_memcg=/foo,task_memcg=/foo,task=bash,pid=7997,uid=0 after this patch: oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null), cpuset=/,mems_allowed=0,oom_memcg=/foo,task_memcg=/foo,task=bash,pid=13681,uid=0 Link: http://lkml.kernel.org/r/1560522038-15879-1-git-send-email-laoar.shao@gmail.com Fixes: ef8444ea01d7 ("mm, oom: reorganize the oom report in dump_header") Signed-off-by: Yafang Shao Acked-by: Michal Hocko Cc: Wind Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5a58778c91d4..f719b64741d6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -987,8 +987,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ -static void check_panic_on_oom(struct oom_control *oc, - enum oom_constraint constraint) +static void check_panic_on_oom(struct oom_control *oc) { if (likely(!sysctl_panic_on_oom)) return; @@ -998,7 +997,7 @@ static void check_panic_on_oom(struct oom_control *oc, * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ - if (constraint != CONSTRAINT_NONE) + if (oc->constraint != CONSTRAINT_NONE) return; } /* Do not panic for oom kills triggered by sysrq */ @@ -1035,7 +1034,6 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); bool out_of_memory(struct oom_control *oc) { unsigned long freed = 0; - enum oom_constraint constraint = CONSTRAINT_NONE; if (oom_killer_disabled) return false; @@ -1071,10 +1069,10 @@ bool out_of_memory(struct oom_control *oc) * Check if there were limitations on the allocation (only relevant for * NUMA and memcg) that may require different handling. */ - constraint = constrained_alloc(oc); - if (constraint != CONSTRAINT_MEMORY_POLICY) + oc->constraint = constrained_alloc(oc); + if (oc->constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; - check_panic_on_oom(oc, constraint); + check_panic_on_oom(oc); if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && From 4ada1e810038e9dbc20e40b524e05ee1a9d31f98 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Jun 2019 12:07:03 -0700 Subject: [PATCH 152/207] initramfs: fix populate_initrd_image() section mismatch With gcc-4.6.3: WARNING: vmlinux.o(.text.unlikely+0x140): Section mismatch in reference from the function populate_initrd_image() to the variable .init.ramfs.info:__initramfs_size The function populate_initrd_image() references the variable __init __initramfs_size. This is often because populate_initrd_image lacks a __init annotation or the annotation of __initramfs_size is wrong. WARNING: vmlinux.o(.text.unlikely+0x14c): Section mismatch in reference from the function populate_initrd_image() to the function .init.text:unpack_to_rootfs() The function populate_initrd_image() references the function __init unpack_to_rootfs(). This is often because populate_initrd_image lacks a __init annotation or the annotation of unpack_to_rootfs is wrong. WARNING: vmlinux.o(.text.unlikely+0x198): Section mismatch in reference from the function populate_initrd_image() to the function .init.text:xwrite() The function populate_initrd_image() references the function __init xwrite(). This is often because populate_initrd_image lacks a __init annotation or the annotation of xwrite is wrong. Indeed, if the compiler decides not to inline populate_initrd_image(), a warning is generated. Fix this by adding the missing __init annotations. Link: http://lkml.kernel.org/r/20190617074340.12779-1-geert@linux-m68k.org Fixes: 7c184ecd262fe64f ("initramfs: factor out a helper to populate the initrd image") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 178130fd61c2..c47dad0884f7 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -617,7 +617,7 @@ static inline void clean_rootfs(void) #endif /* CONFIG_BLK_DEV_RAM */ #ifdef CONFIG_BLK_DEV_RAM -static void populate_initrd_image(char *err) +static void __init populate_initrd_image(char *err) { ssize_t written; int fd; @@ -637,7 +637,7 @@ static void populate_initrd_image(char *err) ksys_close(fd); } #else -static void populate_initrd_image(char *err) +static void __init populate_initrd_image(char *err) { printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); } From 7298e3b0a149c91323b3205d325e942c3b3b9ef6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Jun 2019 12:07:05 -0700 Subject: [PATCH 153/207] mm/page_idle.c: fix oops because end_pfn is larger than max_pfn Currently the calcuation of end_pfn can round up the pfn number to more than the actual maximum number of pfns, causing an Oops. Fix this by ensuring end_pfn is never more than max_pfn. This can be easily triggered when on systems where the end_pfn gets rounded up to more than max_pfn using the idle-page stress-ng stress test: sudo stress-ng --idle-page 0 BUG: unable to handle kernel paging request at 00000000000020d8 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 11039 Comm: stress-ng-idle- Not tainted 5.0.0-5-generic #6-Ubuntu Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:page_idle_get_page+0xc8/0x1a0 Code: 0f b1 0a 75 7d 48 8b 03 48 89 c2 48 c1 e8 33 83 e0 07 48 c1 ea 36 48 8d 0c 40 4c 8d 24 88 49 c1 e4 07 4c 03 24 d5 00 89 c3 be <49> 8b 44 24 58 48 8d b8 80 a1 02 00 e8 07 d5 77 00 48 8b 53 08 48 RSP: 0018:ffffafd7c672fde8 EFLAGS: 00010202 RAX: 0000000000000005 RBX: ffffe36341fff700 RCX: 000000000000000f RDX: 0000000000000284 RSI: 0000000000000275 RDI: 0000000001fff700 RBP: ffffafd7c672fe00 R08: ffffa0bc34056410 R09: 0000000000000276 R10: ffffa0bc754e9b40 R11: ffffa0bc330f6400 R12: 0000000000002080 R13: ffffe36341fff700 R14: 0000000000080000 R15: ffffa0bc330f6400 FS: 00007f0ec1ea5740(0000) GS:ffffa0bc7db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000020d8 CR3: 0000000077d68000 CR4: 00000000000006e0 Call Trace: page_idle_bitmap_write+0x8c/0x140 sysfs_kf_bin_write+0x5c/0x70 kernfs_fop_write+0x12e/0x1b0 __vfs_write+0x1b/0x40 vfs_write+0xab/0x1b0 ksys_write+0x55/0xc0 __x64_sys_write+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Link: http://lkml.kernel.org/r/20190618124352.28307-1-colin.king@canonical.com Fixes: 33c3fc71c8cf ("mm: introduce idle page tracking") Signed-off-by: Colin Ian King Reviewed-by: Andrew Morton Acked-by: Vladimir Davydov Cc: Michal Hocko Cc: Mike Rapoport Cc: Mel Gorman Cc: Stephen Rothwell Cc: Andrey Ryabinin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_idle.c b/mm/page_idle.c index 0b39ec0c945c..295512465065 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; @@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; From 2c9292336a09f7bf019689580ceea9a2d116b999 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jun 2019 12:07:09 -0700 Subject: [PATCH 154/207] mm/vmalloc.c: avoid bogus -Wmaybe-uninitialized warning gcc gets confused in pcpu_get_vm_areas() because there are too many branches that affect whether 'lva' was initialized before it gets used: mm/vmalloc.c: In function 'pcpu_get_vm_areas': mm/vmalloc.c:991:4: error: 'lva' may be used uninitialized in this function [-Werror=maybe-uninitialized] insert_vmap_area_augment(lva, &va->rb_node, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &free_vmap_area_root, &free_vmap_area_list); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mm/vmalloc.c:916:20: note: 'lva' was declared here struct vmap_area *lva; ^~~ Add an intialization to NULL, and check whether this has changed before the first use. [akpm@linux-foundation.org: tweak comments] Link: http://lkml.kernel.org/r/20190618092650.2943749-1-arnd@arndb.de Fixes: 68ad4a330433 ("mm/vmalloc.c: keep track of free blocks for vmap allocation") Signed-off-by: Arnd Bergmann Reviewed-by: Uladzislau Rezki (Sony) Cc: Joel Fernandes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4c9e150e5ad3..0f76cca32a1c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -913,7 +913,7 @@ adjust_va_to_fit_type(struct vmap_area *va, unsigned long nva_start_addr, unsigned long size, enum fit_type type) { - struct vmap_area *lva; + struct vmap_area *lva = NULL; if (type == FL_FIT_TYPE) { /* @@ -972,7 +972,7 @@ adjust_va_to_fit_type(struct vmap_area *va, if (type != FL_FIT_TYPE) { augment_tree_propagate_from(va); - if (type == NE_FIT_TYPE) + if (lva) /* type == NE_FIT_TYPE */ insert_vmap_area_augment(lva, &va->rb_node, &free_vmap_area_root, &free_vmap_area_list); } From 8708e13c6a0600625eea3aebd027c0715a5d2bb2 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 28 Jun 2019 12:07:12 -0700 Subject: [PATCH 155/207] MAINTAINERS: add CLANG/LLVM BUILD SUPPORT info Add keyword support so that our mailing list gets cc'ed for clang/llvm patches. We're pretty active on our mailing list so far as code review. There are numerous Googlers like myself that are paid to support building the Linux kernel with Clang and LLVM. Link: http://lkml.kernel.org/r/20190620001907.255803-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3c4d72755127..01a52fc964da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3942,6 +3942,14 @@ M: Miguel Ojeda S: Maintained F: .clang-format +CLANG/LLVM BUILD SUPPORT +L: clang-built-linux@googlegroups.com +W: https://clangbuiltlinux.github.io/ +B: https://github.com/ClangBuiltLinux/linux/issues +C: irc://chat.freenode.net/clangbuiltlinux +S: Supported +K: \b(?i:clang|llvm)\b + CLEANCACHE API M: Konrad Rzeszutek Wilk L: linux-kernel@vger.kernel.org From 1bf4580e00a248a2c86269125390eb3648e1877c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 28 Jun 2019 12:07:14 -0700 Subject: [PATCH 156/207] fork,memcg: alloc_thread_stack_node needs to set tsk->stack Commit 5eed6f1dff87 ("fork,memcg: fix crash in free_thread_stack on memcg charge fail") corrected two instances, but there was a third instance of this bug. Without setting tsk->stack, if memcg_charge_kernel_stack fails, it'll execute free_thread_stack() on a dangling pointer. Enterprise kernels are compiled with VMAP_STACK=y so this isn't critical, but custom VMAP_STACK=n builds should have some performance advantage, with the drawback of risking to fail fork because compaction didn't succeed. So as long as VMAP_STACK=n is a supported option it's worth fixing it upstream. Link: http://lkml.kernel.org/r/20190619011450.28048-1-aarcange@redhat.com Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting") Signed-off-by: Andrea Arcangeli Reviewed-by: Rik van Riel Acked-by: Roman Gushchin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 399aca51ff75..61667909ce83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); - return page ? page_address(page) : NULL; + if (likely(page)) { + tsk->stack = page_address(page); + return tsk->stack; + } + return NULL; #endif } From 1a5f439c7c02837d943e528d46501564d4226757 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 28 Jun 2019 12:07:18 -0700 Subject: [PATCH 157/207] mm, swap: fix THP swap out 0-Day test system reported some OOM regressions for several THP (Transparent Huge Page) swap test cases. These regressions are bisected to 6861428921b5 ("block: always define BIO_MAX_PAGES as 256"). In the commit, BIO_MAX_PAGES is set to 256 even when THP swap is enabled. So the bio_alloc(gfp_flags, 512) in get_swap_bio() may fail when swapping out THP. That causes the OOM. As in the patch description of 6861428921b5 ("block: always define BIO_MAX_PAGES as 256"), THP swap should use multi-page bvec to write THP to swap space. So the issue is fixed via doing that in get_swap_bio(). BTW: I remember I have checked the THP swap code when 6861428921b5 ("block: always define BIO_MAX_PAGES as 256") was merged, and thought the THP swap code needn't to be changed. But apparently, I was wrong. I should have done this at that time. Link: http://lkml.kernel.org/r/20190624075515.31040-1-ying.huang@intel.com Fixes: 6861428921b5 ("block: always define BIO_MAX_PAGES as 256") Signed-off-by: "Huang, Ying" Reviewed-by: Ming Lei Cc: Michal Hocko Cc: Johannes Weiner Cc: Hugh Dickins Cc: Minchan Kim Cc: Rik van Riel Cc: Daniel Jordan Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 2e8019d0e048..189415852077 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -29,10 +29,9 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { - int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, nr); + bio = bio_alloc(gfp_flags, 1); if (bio) { struct block_device *bdev; @@ -41,9 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - for (i = 0; i < nr; i++) - bio_add_page(bio, page + i, PAGE_SIZE, 0); - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); + bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); } return bio; } From 8f9fab480c7a87b10bb5440b5555f370272a5d59 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 28 Jun 2019 12:07:21 -0700 Subject: [PATCH 158/207] linux/kernel.h: fix overflow for DIV_ROUND_UP_ULL DIV_ROUND_UP_ULL adds the two arguments and then invokes DIV_ROUND_DOWN_ULL. But on a 32bit system the addition of two 32 bit values can overflow. DIV_ROUND_DOWN_ULL does it correctly and stashes the addition into a unsigned long long so cast the result to unsigned long long here to avoid the overflow condition. [akpm@linux-foundation.org: DIV_ROUND_UP_ULL must be an rval] Link: http://lkml.kernel.org/r/20190625100518.30753-1-vkoul@kernel.org Signed-off-by: Vinod Koul Reviewed-by: Andrew Morton Cc: Bjorn Andersson Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 74b1ee9027f5..0c9bc231107f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -93,7 +93,8 @@ #define DIV_ROUND_DOWN_ULL(ll, d) \ ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) -#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) +#define DIV_ROUND_UP_ULL(ll, d) \ + DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) From 6fbc7275c7a9ba97877050335f290341a1fd8dbf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Jun 2019 11:25:36 +0800 Subject: [PATCH 159/207] Linux 5.2-rc7 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7a7c17eb0cbf..fabc127d127f 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc6 -NAME = Golden Lions +EXTRAVERSION = -rc7 +NAME = Bobtail Squid # *DOCUMENTATION* # To see a list of typical targets execute "make help" From a126483e82957172b8a93ebb1d30fb2b1df3cbbc Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 6 Jun 2019 17:07:55 +0000 Subject: [PATCH 160/207] mtd: spinand: Fix max_bad_eraseblocks_per_lun info in memorg The 1Gb Macronix chip can have a maximum of 20 bad blocks, while the 2Gb version has twice as many blocks and therefore the maximum number of bad blocks is 40. The 4Gb GigaDevice GD5F4GQ4xA has twice as many blocks as its 2Gb counterpart and therefore a maximum of 80 bad blocks. Fixes: 377e517b5fa5 ("mtd: nand: Add max_bad_eraseblocks_per_lun info to memorg") Reported-by: Emil Lenngren Signed-off-by: Frieder Schrempf Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/gigadevice.c | 2 +- drivers/mtd/nand/spi/macronix.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index e5586390026a..e6c646007cda 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -180,7 +180,7 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, gd5fxgq4xa_ecc_get_status)), SPINAND_INFO("GD5F4GQ4xA", 0xF4, - NAND_MEMORG(1, 2048, 64, 64, 4096, 40, 1, 1, 1), + NAND_MEMORG(1, 2048, 64, 64, 4096, 80, 1, 1, 1), NAND_ECCREQ(8, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c index 6502727049a8..21def3f8fb36 100644 --- a/drivers/mtd/nand/spi/macronix.c +++ b/drivers/mtd/nand/spi/macronix.c @@ -100,7 +100,7 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand, static const struct spinand_info macronix_spinand_table[] = { SPINAND_INFO("MX35LF1GE4AB", 0x12, - NAND_MEMORG(1, 2048, 64, 64, 1024, 40, 1, 1, 1), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), NAND_ECCREQ(4, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, @@ -109,7 +109,7 @@ static const struct spinand_info macronix_spinand_table[] = { SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, mx35lf1ge4ab_ecc_get_status)), SPINAND_INFO("MX35LF2GE4AB", 0x22, - NAND_MEMORG(1, 2048, 64, 64, 2048, 20, 2, 1, 1), + NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 2, 1, 1), NAND_ECCREQ(4, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, From c403ec33b613a15d9fd8dde37f246b79cd56b5df Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 29 Jun 2019 03:22:48 +0200 Subject: [PATCH 161/207] mtd: rawnand: ingenic: Fix ingenic_ecc dependency If MTD_NAND_JZ4780 is y and MTD_NAND_JZ4780_BCH is m, which select CONFIG_MTD_NAND_INGENIC_ECC to m, building fails: drivers/mtd/nand/raw/ingenic/ingenic_nand.o: In function `ingenic_nand_remove': ingenic_nand.c:(.text+0x177): undefined reference to `ingenic_ecc_release' drivers/mtd/nand/raw/ingenic/ingenic_nand.o: In function `ingenic_nand_ecc_correct': ingenic_nand.c:(.text+0x2ee): undefined reference to `ingenic_ecc_correct' To fix that, the ingenic_nand and ingenic_ecc modules have been fused into one single module. - The ingenic_ecc.c code is now compiled in only if $(CONFIG_MTD_NAND_INGENIC_ECC) is set. This is now a boolean instead of tristate. - To avoid changing the module name, the ingenic_nand.c file is moved to ingenic_nand_drv.c. Then the module name is still ingenic_nand. - Since ingenic_ecc.c is no more a module, the module-specific macros have been dropped, and the functions are no more exported for use by the ingenic_nand driver. Fixes: 15de8c6efd0e ("mtd: rawnand: ingenic: Separate top-level and SoC specific code") Signed-off-by: Paul Cercueil Reported-by: Arnd Bergmann Reported-by: Hulk Robot Cc: YueHaibing Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/ingenic/Kconfig | 2 +- drivers/mtd/nand/raw/ingenic/Makefile | 4 +++- drivers/mtd/nand/raw/ingenic/ingenic_ecc.c | 9 --------- .../raw/ingenic/{ingenic_nand.c => ingenic_nand_drv.c} | 0 4 files changed, 4 insertions(+), 11 deletions(-) rename drivers/mtd/nand/raw/ingenic/{ingenic_nand.c => ingenic_nand_drv.c} (100%) diff --git a/drivers/mtd/nand/raw/ingenic/Kconfig b/drivers/mtd/nand/raw/ingenic/Kconfig index 19a96ce515c1..66b7cffdb0c2 100644 --- a/drivers/mtd/nand/raw/ingenic/Kconfig +++ b/drivers/mtd/nand/raw/ingenic/Kconfig @@ -16,7 +16,7 @@ config MTD_NAND_JZ4780 if MTD_NAND_JZ4780 config MTD_NAND_INGENIC_ECC - tristate + bool config MTD_NAND_JZ4740_ECC tristate "Hardware BCH support for JZ4740 SoC" diff --git a/drivers/mtd/nand/raw/ingenic/Makefile b/drivers/mtd/nand/raw/ingenic/Makefile index 1ac4f455baea..b63d36889263 100644 --- a/drivers/mtd/nand/raw/ingenic/Makefile +++ b/drivers/mtd/nand/raw/ingenic/Makefile @@ -2,7 +2,9 @@ obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o -obj-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o +ingenic_nand-y += ingenic_nand_drv.o +ingenic_nand-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o + obj-$(CONFIG_MTD_NAND_JZ4740_ECC) += jz4740_ecc.o obj-$(CONFIG_MTD_NAND_JZ4725B_BCH) += jz4725b_bch.o obj-$(CONFIG_MTD_NAND_JZ4780_BCH) += jz4780_bch.o diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c index d3e085c5685a..c954189606f6 100644 --- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c +++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c @@ -30,7 +30,6 @@ int ingenic_ecc_calculate(struct ingenic_ecc *ecc, { return ecc->ops->calculate(ecc, params, buf, ecc_code); } -EXPORT_SYMBOL(ingenic_ecc_calculate); /** * ingenic_ecc_correct() - detect and correct bit errors @@ -51,7 +50,6 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc, { return ecc->ops->correct(ecc, params, buf, ecc_code); } -EXPORT_SYMBOL(ingenic_ecc_correct); /** * ingenic_ecc_get() - get the ECC controller device @@ -111,7 +109,6 @@ struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *of_node) } return ecc; } -EXPORT_SYMBOL(of_ingenic_ecc_get); /** * ingenic_ecc_release() - release the ECC controller device @@ -122,7 +119,6 @@ void ingenic_ecc_release(struct ingenic_ecc *ecc) clk_disable_unprepare(ecc->clk); put_device(ecc->dev); } -EXPORT_SYMBOL(ingenic_ecc_release); int ingenic_ecc_probe(struct platform_device *pdev) { @@ -159,8 +155,3 @@ int ingenic_ecc_probe(struct platform_device *pdev) return 0; } EXPORT_SYMBOL(ingenic_ecc_probe); - -MODULE_AUTHOR("Alex Smith "); -MODULE_AUTHOR("Harvey Hunt "); -MODULE_DESCRIPTION("Ingenic ECC common driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c similarity index 100% rename from drivers/mtd/nand/raw/ingenic/ingenic_nand.c rename to drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c From f78c581e22d4b33359ac3462e8d0504735df01f4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 27 Jun 2019 11:01:04 +0800 Subject: [PATCH 162/207] drm/amd/powerplay: use hardware fan control if no powerplay fan table Otherwise, you may get divided-by-zero error or corrput the SMU fan control feature. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Tested-by: Slava Abramov Acked-by: Slava Abramov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c | 4 +++- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index ae64ff7153d6..1cd5a8b5cdc1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -916,8 +916,10 @@ static int init_thermal_controller( PHM_PlatformCaps_ThermalController ); - if (0 == powerplay_table->usFanTableOffset) + if (0 == powerplay_table->usFanTableOffset) { + hwmgr->thermal_controller.use_hw_fan_control = 1; return 0; + } fan_table = (const PPTable_Generic_SubTable_Header *) (((unsigned long)powerplay_table) + diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index c92999aac07c..eccb26fddbd0 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -694,6 +694,7 @@ struct pp_thermal_controller_info { uint8_t ucType; uint8_t ucI2cLine; uint8_t ucI2cAddress; + uint8_t use_hw_fan_control; struct pp_fan_info fanInfo; struct pp_advance_fan_control_parameters advanceFanControlParameters; }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2d4cfe14f72e..29e641c6a5db 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -2092,6 +2092,10 @@ static int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) return 0; } + /* use hardware fan control */ + if (hwmgr->thermal_controller.use_hw_fan_control) + return 0; + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters. usPWMMin * duty100; do_div(tmp64, 10000); From 688f3d1ebedffa310b6591bd1b63fa0770d945fe Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 20 Jun 2019 19:21:26 -0400 Subject: [PATCH 163/207] drm/amdgpu: Don't skip display settings in hwmgr_resume() I'm not entirely sure why this is, but for some reason: 921935dc6404 ("drm/amd/powerplay: enforce display related settings only on needed") Breaks runtime PM resume on the Radeon PRO WX 3100 (Lexa) in one the pre-production laptops I have. The issue manifests as the following messages in dmesg: [drm] UVD and UVD ENC initialized successfully. amdgpu 0000:3b:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vce1 test failed (-110) [drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block failed -110 [drm:amdgpu_device_resume [amdgpu]] *ERROR* amdgpu_device_ip_resume failed (-110). And happens after about 6-10 runtime PM suspend/resume cycles (sometimes sooner, if you're lucky!). Unfortunately I can't seem to pin down precisely which part in psm_adjust_power_state_dynamic that is causing the issue, but not skipping the display setting setup seems to fix it. Hopefully if there is a better fix for this, this patch will spark discussion around it. Fixes: 921935dc6404 ("drm/amd/powerplay: enforce display related settings only on needed") Cc: Evan Quan Cc: Alex Deucher Cc: Huang Rui Cc: Rex Zhu Cc: Likun Gao Cc: # v5.1+ Signed-off-by: Lyude Paul Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index f1d326caf69e..a7e8340baf90 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -326,7 +326,7 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr) if (ret) return ret; - ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL); + ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL); return ret; } From 28dd29c06d0dede4b32b2c559cff21955a830928 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 1 Jul 2019 16:01:46 +0200 Subject: [PATCH 164/207] fork: return proper negative error code Make sure to return a proper negative error code from copy_process() when anon_inode_getfile() fails with CLONE_PIDFD. Otherwise _do_fork() will not detect an error and get_task_pid() will operator on a nonsensical pointer: R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc2c R13: 00007ffc15fbb0ff R14: 00007ff07e47e9c0 R15: 0000000000000000 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 7990 Comm: syz-executor290 Not tainted 5.2.0-rc6+ #9 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:194 [inline] RIP: 0010:get_task_pid+0xe1/0x210 kernel/pid.c:372 Code: 89 ff e8 62 27 5f 00 49 8b 07 44 89 f1 4c 8d bc c8 90 01 00 00 eb 0c e8 0d fe 25 00 49 81 c7 38 05 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 18 00 74 08 4c 89 ff e8 31 27 5f 00 4d 8b 37 e8 f9 47 12 00 RSP: 0018:ffff88808a4a7d78 EFLAGS: 00010203 RAX: 00000000000000a7 RBX: dffffc0000000000 RCX: ffff888088180600 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88808a4a7d90 R08: ffffffff814fb3a8 R09: ffffed1015d66bf8 R10: ffffed1015d66bf8 R11: 1ffff11015d66bf7 R12: 0000000000041ffc R13: 1ffff11011494fbc R14: 0000000000000000 R15: 000000000000053d FS: 00007ff07e47e700(0000) GS:ffff8880aeb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b5100 CR3: 0000000094df2000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: _do_fork+0x1b9/0x5f0 kernel/fork.c:2360 __do_sys_clone kernel/fork.c:2454 [inline] __se_sys_clone kernel/fork.c:2448 [inline] __x64_sys_clone+0xc1/0xd0 kernel/fork.c:2448 do_syscall_64+0xfe/0x140 arch/x86/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: https://lore.kernel.org/lkml/000000000000e0dc0d058c9e7142@google.com Reported-and-tested-by: syzbot+002e636502bc4b64eb5c@syzkaller.appspotmail.com Fixes: 6fd2fe494b17 ("copy_process(): don't use ksys_close() on cleanups") Cc: Jann Horn Cc: Al Viro Signed-off-by: Christian Brauner --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/fork.c b/kernel/fork.c index 61667909ce83..fe83343da24b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2036,6 +2036,7 @@ static __latent_entropy struct task_struct *copy_process( O_RDWR | O_CLOEXEC); if (IS_ERR(pidfile)) { put_unused_fd(pidfd); + retval = PTR_ERR(pidfile); goto bad_fork_free_pid; } get_pid(pid); /* held by pidfile now */ From 570d7a98e7d6d5d8706d94ffd2d40adeaa318332 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 29 Jun 2019 13:27:44 -0700 Subject: [PATCH 165/207] vfs: move_mount: reject moving kernel internal mounts sys_move_mount() crashes by dereferencing the pointer MNT_NS_INTERNAL, a.k.a. ERR_PTR(-EINVAL), if the old mount is specified by fd for a kernel object with an internal mount, such as a pipe or memfd. Fix it by checking for this case and returning -EINVAL. [AV: what we want is is_mounted(); use that instead of making the condition even more convoluted] Reproducer: #include #define __NR_move_mount 429 #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 int main() { int fds[2]; pipe(fds); syscall(__NR_move_mount, fds[0], "", -1, "/", MOVE_MOUNT_F_EMPTY_PATH); } Reported-by: syzbot+6004acbaa1893ad013f0@syzkaller.appspotmail.com Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around") Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/namespace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 7660c2749c96..6fbc9126367a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2596,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path) if (!check_mnt(p)) goto out; - /* The thing moved should be either ours or completely unattached. */ - if (attached && !check_mnt(old)) + /* The thing moved must be mounted... */ + if (!is_mounted(&old->mnt)) goto out; - if (!attached && !(ns && is_anon_ns(ns))) + /* ... and either ours or the root of anon namespace */ + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; if (old->mnt.mnt_flags & MNT_LOCKED) From 7fbd1753b64eafe21cf842348a40a691d0dee440 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 1 Jul 2019 23:43:53 +0900 Subject: [PATCH 166/207] ALSA: firewire-lib/fireworks: fix miss detection of received MIDI messages In IEC 61883-6, 8 MIDI data streams are multiplexed into single MIDI conformant data channel. The index of stream is calculated by modulo 8 of the value of data block counter. In fireworks, the value of data block counter in CIP header has a quirk with firmware version v5.0.0, v5.7.3 and v5.8.0. This brings ALSA IEC 61883-1/6 packet streaming engine to miss detection of MIDI messages. This commit fixes the miss detection to modify the value of data block counter for the modulo calculation. For maintainers, this bug exists since a commit 18f5ed365d3f ("ALSA: fireworks/firewire-lib: add support for recent firmware quirk") in Linux kernel v4.2. There're many changes since the commit. This fix can be backported to Linux kernel v4.4 or later. I tagged a base commit to the backport for your convenience. Besides, my work for Linux kernel v5.3 brings heavy code refactoring and some structure members are renamed in 'sound/firewire/amdtp-stream.h'. The content of this patch brings conflict when merging -rc tree with this patch and the latest tree. I request maintainers to solve the conflict to replace 'tx_first_dbc' with 'ctx_data.tx.first_dbc'. Fixes: df075feefbd3 ("ALSA: firewire-lib: complete AM824 data block processing layer") Cc: # v4.4+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-am824.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-am824.c b/sound/firewire/amdtp-am824.c index 4210e5c6262e..d09da9dbf235 100644 --- a/sound/firewire/amdtp-am824.c +++ b/sound/firewire/amdtp-am824.c @@ -321,7 +321,7 @@ static void read_midi_messages(struct amdtp_stream *s, u8 *b; for (f = 0; f < frames; f++) { - port = (s->data_block_counter + f) % 8; + port = (8 - s->tx_first_dbc + s->data_block_counter + f) % 8; b = (u8 *)&buffer[p->midi_position]; len = b[0] - 0x80; From 25f09f858835b0e9a06213811031190a17d8ab78 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Jul 2019 08:38:12 -0500 Subject: [PATCH 167/207] drm/amdgpu/gfx9: use reset default for PA_SC_FIFO_SIZE Recommended by the hw team. Reviewed-and-Tested-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b610e3b30d95..2f18c64d531f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1959,25 +1959,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); - - mutex_lock(&adev->grbm_idx_mutex); - /* - * making sure that the following register writes will be broadcasted - * to all the shaders - */ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, - (adev->gfx.config.sc_prim_fifo_size_frontend << - PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_prim_fifo_size_backend << - PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_hiz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_earlyz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); - mutex_unlock(&adev->grbm_idx_mutex); - } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) From 98482377dc7295d0c70e251925b7cc14aff4c5ac Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 1 Jul 2019 10:30:30 -0700 Subject: [PATCH 168/207] ALSA: hda: Fix widget_mutex incomplete protection The widget_mutex was introduced to serialize callers to hda_widget_sysfs_{re}init. However, its protection of the sysfs widget array is incomplete. For example, it is acquired around the call to hda_widget_sysfs_reinit(), which actually creates the new array, but isn't still acquired when codec->num_nodes and codec->start_nid is updated. So the lock ensures one thread sets up the new array at a time, but doesn't ensure which thread's value will end up in codec->num_nodes. If a larger num_nodes wins but a smaller array was set up, the next call to refresh_widgets() will touch free memory as it iterates over codec->num_nodes that aren't there. The widget_lock really protects both the tree as well as codec->num_nodes, start_nid, and end_nid, so make sure it's held across that update. It should also be held during snd_hdac_get_sub_nodes(), so that a very old read from that function doesn't end up clobbering a later update. Fixes: ed180abba7f1 ("ALSA: hda: Fix race between creating and refreshing sysfs entries") Signed-off-by: Evan Green Signed-off-by: Takashi Iwai --- sound/hda/hdac_device.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index 4769f4c03e14..11050bfd8068 100644 --- a/sound/hda/hdac_device.c +++ b/sound/hda/hdac_device.c @@ -399,27 +399,33 @@ static void setup_fg_nodes(struct hdac_device *codec) int snd_hdac_refresh_widgets(struct hdac_device *codec, bool sysfs) { hda_nid_t start_nid; - int nums, err; + int nums, err = 0; + /* + * Serialize against multiple threads trying to update the sysfs + * widgets array. + */ + mutex_lock(&codec->widget_lock); nums = snd_hdac_get_sub_nodes(codec, codec->afg, &start_nid); if (!start_nid || nums <= 0 || nums >= 0xff) { dev_err(&codec->dev, "cannot read sub nodes for FG 0x%02x\n", codec->afg); - return -EINVAL; + err = -EINVAL; + goto unlock; } if (sysfs) { - mutex_lock(&codec->widget_lock); err = hda_widget_sysfs_reinit(codec, start_nid, nums); - mutex_unlock(&codec->widget_lock); if (err < 0) - return err; + goto unlock; } codec->num_nodes = nums; codec->start_nid = start_nid; codec->end_nid = start_nid + nums; - return 0; +unlock: + mutex_unlock(&codec->widget_lock); + return err; } EXPORT_SYMBOL_GPL(snd_hdac_refresh_widgets); From 521a503f5247de9a5cbc381f15c466a6c6f2793c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Jun 2019 15:01:05 +0200 Subject: [PATCH 169/207] soc: ti: fix irq-ti-sci link error The irqchip driver depends on the SoC specific driver, but we want to be able to compile-test it elsewhere: WARNING: unmet direct dependencies detected for TI_SCI_INTA_MSI_DOMAIN Depends on [n]: SOC_TI [=n] Selected by [y]: - TI_SCI_INTA_IRQCHIP [=y] && TI_SCI_PROTOCOL [=y] drivers/irqchip/irq-ti-sci-inta.o: In function `ti_sci_inta_irq_domain_probe': irq-ti-sci-inta.c:(.text+0x204): undefined reference to `ti_sci_inta_msi_create_irq_domain' Rearrange the Kconfig and Makefile so we build the soc driver whenever its users are there, regardless of the SOC_TI option. Fixes: 49b323157bf1 ("soc: ti: Add MSI domain bus support for Interrupt Aggregator") Fixes: f011df6179bd ("irqchip/ti-sci-inta: Add msi domain support") Signed-off-by: Arnd Bergmann Reviewed-by: Lokesh Vutla Acked-by: Santosh Shilimkar Signed-off-by: Olof Johansson --- drivers/soc/Makefile | 2 +- drivers/soc/ti/Kconfig | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index 524ecdc2a9bb..2ec355003524 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/ obj-$(CONFIG_SOC_SAMSUNG) += samsung/ obj-y += sunxi/ obj-$(CONFIG_ARCH_TEGRA) += tegra/ -obj-$(CONFIG_SOC_TI) += ti/ +obj-y += ti/ obj-$(CONFIG_ARCH_U8500) += ux500/ obj-$(CONFIG_PLAT_VERSATILE) += versatile/ obj-y += xilinx/ diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig index ea0859f7b185..d7d50d48d05d 100644 --- a/drivers/soc/ti/Kconfig +++ b/drivers/soc/ti/Kconfig @@ -75,10 +75,10 @@ config TI_SCI_PM_DOMAINS called ti_sci_pm_domains. Note this is needed early in boot before rootfs may be available. +endif # SOC_TI + config TI_SCI_INTA_MSI_DOMAIN bool select GENERIC_MSI_IRQ_DOMAIN help Driver to enable Interrupt Aggregator specific MSI Domain. - -endif # SOC_TI From c84c9029d782a3a0d2a7f0522ecb907314d43e2c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Apr 2019 12:17:47 +0100 Subject: [PATCH 170/207] drm/i915/ringbuffer: EMIT_INVALIDATE *before* switch context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite what I think the prm recommends, commit f2253bd9859b ("drm/i915/ringbuffer: EMIT_INVALIDATE after switch context") turned out to be a huge mistake when enabling Ironlake contexts as the GPU would hang on either a MI_FLUSH or PIPE_CONTROL immediately following the MI_SET_CONTEXT of an active mesa context (more vanilla contexts, e.g. simple rendercopies with igt, do not suffer). Ville found the following clue, "[DevCTG+]: For the invalidate operation of the pipe control, the following pointers are affected. The invalidate operation affects the restore of these packets. If the pipe control invalidate operation is completed before the context save, the indirect pointers will not be restored from memory. 1. Pipeline State Pointer 2. Media State Pointer 3. Constant Buffer Packet" which suggests by us emitting the INVALIDATE prior to the MI_SET_CONTEXT, we prevent the context-restore from chasing the dangling pointers within the image, and explains why this likely prevents the GPU hang. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190419111749.3910-1-chris@chris-wilson.co.uk (cherry picked from commit 928f8f42310f244501a7c70daac82c196112c190 in drm-intel-next) Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111014 Fixes: f2253bd9859b ("drm/i915/ringbuffer: EMIT_INVALIDATE after switch context") Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 029fd8ec1857..f0d45ccc1aac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1888,12 +1888,12 @@ static int ring_request_alloc(struct i915_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - ret = switch_context(request); + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + ret = switch_context(request); if (ret) return ret; From 018ad0523208ad86ec85d47ebb59abd91d3f0d8b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 25 Jun 2019 18:49:14 +0200 Subject: [PATCH 171/207] ARM: davinci: da830-evm: add missing regulator constraints for OHCI We need to enable status changes for the fixed power supply for the USB controller. Fixes: 274e4c336192 ("ARM: davinci: da830-evm: add a fixed regulator for ohci-da8xx") Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-da830-evm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 51a892702e27..b9891209f775 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -61,6 +61,9 @@ static struct regulator_consumer_supply da830_evm_usb_supplies[] = { static struct regulator_init_data da830_evm_usb_vbus_data = { .consumer_supplies = da830_evm_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config da830_evm_usb_vbus = { From ed667776d6e652819f3f44d28cd79bdffac15141 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 25 Jun 2019 18:49:15 +0200 Subject: [PATCH 172/207] ARM: davinci: omapl138-hawk: add missing regulator constraints for OHCI We need to enable status changes for the fixed power supply for the USB controller. Fixes: 1d272894ec4f ("ARM: davinci: omapl138-hawk: add a fixed regulator for ohci-da8xx") Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-omapl138-hawk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index db177a6a7e48..5390a8630cf0 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -306,6 +306,9 @@ static struct regulator_consumer_supply hawk_usb_supplies[] = { static struct regulator_init_data hawk_usb_vbus_data = { .consumer_supplies = hawk_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config hawk_usb_vbus = { From 4f2fe646770774d02d52a514849c181c9e0970f6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 25 Jun 2019 17:16:12 +0200 Subject: [PATCH 173/207] ARM: davinci: da830-evm: fix GPIO lookup for OHCI The fixed regulator driver doesn't specify any con_id for gpio lookup so it must be NULL in the table entry. Fixes: 274e4c336192 ("ARM: davinci: da830-evm: add a fixed regulator for ohci-da8xx") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-da830-evm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index b9891209f775..a273ab25c668 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -91,7 +91,7 @@ static struct gpiod_lookup_table da830_evm_usb_oc_gpio_lookup = { static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = { .dev_id = "reg-fixed-voltage.0", .table = { - GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0), + GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0), { } }, }; From 3f16a5c318392cbb5a0c7a3d19dff8c8ef3c38ee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jun 2019 14:16:13 +0200 Subject: [PATCH 174/207] KVM: x86: degrade WARN to pr_warn_ratelimited This warning can be triggered easily by userspace, so it should certainly not cause a panic if panic_on_warn is set. Reported-by: syzbot+c03f30b4f4c46bdf8575@syzkaller.appspotmail.com Suggested-by: Alexander Potapenko Acked-by: Alexander Potapenko Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9857992d4e58..fafd81d2c9ea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1554,7 +1554,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) vcpu->arch.tsc_always_catchup = 1; return 0; } else { - WARN(1, "user requested TSC rate below hardware speed\n"); + pr_warn_ratelimited("user requested TSC rate below hardware speed\n"); return -1; } } @@ -1564,8 +1564,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) user_tsc_khz, tsc_khz); if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { - WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); + pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); return -1; } From 65b712f1560abdd9ebec005e9bd17c21ecacc849 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Tue, 25 Jun 2019 14:26:42 +0300 Subject: [PATCH 175/207] KVM: nVMX: Allow restore nested-state to enable eVMCS when vCPU in SMM As comment in code specifies, SMM temporarily disables VMX so we cannot be in guest mode, nor can VMLAUNCH/VMRESUME be pending. However, code currently assumes that these are the only flags that can be set on kvm_state->flags. This is not true as KVM_STATE_NESTED_EVMCS can also be set on this field to signal that eVMCS should be enabled. Therefore, fix code to check for guest-mode and pending VMLAUNCH/VMRESUME explicitly. Reviewed-by: Joao Martins Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5f9c1a200201..adbf4fc77ad8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5373,7 +5373,10 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags * must be zero. */ - if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags) + if (is_smm(vcpu) ? + (kvm_state->flags & + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) + : kvm_state->hdr.vmx.smm.flags) return -EINVAL; if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && From 323d73a8ecad22bf3284f11112a7cce576ade6af Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Wed, 26 Jun 2019 16:09:27 +0300 Subject: [PATCH 176/207] KVM: nVMX: Change KVM_STATE_NESTED_EVMCS to signal vmcs12 is copied from eVMCS Currently KVM_STATE_NESTED_EVMCS is used to signal that eVMCS capability is enabled on vCPU. As indicated by vmx->nested.enlightened_vmcs_enabled. This is quite bizarre as userspace VMM should make sure to expose same vCPU with same CPUID values in both source and destination. In case vCPU is exposed with eVMCS support on CPUID, it is also expected to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability. Therefore, KVM_STATE_NESTED_EVMCS is redundant. KVM_STATE_NESTED_EVMCS is currently used on restore path (vmx_set_nested_state()) only to enable eVMCS capability in KVM and to signal need_vmcs12_sync such that on next VMEntry to guest nested_sync_from_vmcs12() will be called to sync vmcs12 content into eVMCS in guest memory. However, because restore nested-state is rare enough, we could have just modified vmx_set_nested_state() to always signal need_vmcs12_sync. From all the above, it seems that we could have just removed the usage of KVM_STATE_NESTED_EVMCS. However, in order to preserve backwards migration compatibility, we cannot do that. (vmx_get_nested_state() needs to signal flag when migrating from new kernel to old kernel). Returning KVM_STATE_NESTED_EVMCS when just vCPU have eVMCS enabled have a bad side-effect of userspace VMM having to send nested-state from source to destination as part of migration stream. Even if guest have never used eVMCS as it doesn't even run a nested hypervisor workload. This requires destination userspace VMM and KVM to support setting nested-state. Which make it more difficult to migrate from new host to older host. To avoid this, change KVM_STATE_NESTED_EVMCS to signal eVMCS is not only enabled but also active. i.e. Guest have made some eVMCS active via an enlightened VMEntry. i.e. vmcs12 is copied from eVMCS and therefore should be restored into eVMCS resident in memory (by copy_vmcs12_to_enlightened()). Reviewed-by: Vitaly Kuznetsov Reviewed-by: Maran Wilson Reviewed-by: Krish Sadhukhan Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++------- .../testing/selftests/kvm/x86_64/evmcs_test.c | 1 + 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index adbf4fc77ad8..46af3a5e9209 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; - if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; @@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx_has_valid_vmcs12(vcpu)) { kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); + if (vmx->nested.hv_evmcs) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; + if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) @@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; + /* + * KVM_STATE_NESTED_EVMCS used to signal that KVM should + * enable eVMCS capability on vCPU. However, since then + * code was changed such that flag signals vmcs12 should + * be copied into eVMCS in guest memory. + * + * To preserve backwards compatability, allow user + * to set this flag even when there is no VMXON region. + */ if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { @@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) return -EINVAL; - } + } if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) @@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; - vmx_leave_nested(vcpu); - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { - if (!nested_vmx_allowed(vcpu)) + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; - nested_enable_evmcs(vcpu, NULL); - } + vmx_leave_nested(vcpu); if (kvm_state->hdr.vmx.vmxon_pa == -1ull) return 0; diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index b38260e29775..241919ef1eac 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -146,6 +146,7 @@ int main(int argc, char *argv[]) kvm_vm_restart(vm, O_RDWR); vm_vcpu_add(vm, VCPU_ID, 0, 0); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); free(state); From bb34e690e9340bc155ebed5a3d75fc63ff69e082 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 2 Jul 2019 17:25:02 +0800 Subject: [PATCH 177/207] KVM: LAPIC: Fix pending interrupt in IRR blocked by software disable LAPIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thomas reported that: | Background: | | In preparation of supporting IPI shorthands I changed the CPU offline | code to software disable the local APIC instead of just masking it. | That's done by clearing the APIC_SPIV_APIC_ENABLED bit in the APIC_SPIV | register. | | Failure: | | When the CPU comes back online the startup code triggers occasionally | the warning in apic_pending_intr_clear(). That complains that the IRRs | are not empty. | | The offending vector is the local APIC timer vector who's IRR bit is set | and stays set. | | It took me quite some time to reproduce the issue locally, but now I can | see what happens. | | It requires apicv_enabled=0, i.e. full apic emulation. With apicv_enabled=1 | (and hardware support) it behaves correctly. | | Here is the series of events: | | Guest CPU | | goes down | | native_cpu_disable() | | apic_soft_disable(); | | play_dead() | | .... | | startup() | | if (apic_enabled()) | apic_pending_intr_clear() <- Not taken | | enable APIC | | apic_pending_intr_clear() <- Triggers warning because IRR is stale | | When this happens then the deadline timer or the regular APIC timer - | happens with both, has fired shortly before the APIC is disabled, but the | interrupt was not serviced because the guest CPU was in an interrupt | disabled region at that point. | | The state of the timer vector ISR/IRR bits: | | ISR IRR | before apic_soft_disable() 0 1 | after apic_soft_disable() 0 1 | | On startup 0 1 | | Now one would assume that the IRR is cleared after the INIT reset, but this | happens only on CPU0. | | Why? | | Because our CPU0 hotplug is just for testing to make sure nothing breaks | and goes through an NMI wakeup vehicle because INIT would send it through | the boots-trap code which is not really working if that CPU was not | physically unplugged. | | Now looking at a real world APIC the situation in that case is: | | ISR IRR | before apic_soft_disable() 0 1 | after apic_soft_disable() 0 1 | | On startup 0 0 | | Why? | | Once the dying CPU reenables interrupts the pending interrupt gets | delivered as a spurious interupt and then the state is clear. | | While that CPU0 hotplug test case is surely an esoteric issue, the APIC | emulation is still wrong, Even if the play_dead() code would not enable | interrupts then the pending IRR bit would turn into an ISR .. interrupt | when the APIC is reenabled on startup. From SDM 10.4.7.2 Local APIC State After It Has Been Software Disabled * Pending interrupts in the IRR and ISR registers are held and require masking or handling by the CPU. In Thomas's testing, hardware cpu will not respect soft disable LAPIC when IRR has already been set or APICv posted-interrupt is in flight, so we can skip soft disable APIC checking when clearing IRR and set ISR, continue to respect soft disable APIC when attempting to set IRR. Reported-by: Rong Chen Reported-by: Feng Tang Reported-by: Thomas Gleixner Tested-by: Thomas Gleixner Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Rong Chen Cc: Feng Tang Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a21c440ff356..4dabc318adb8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2339,7 +2339,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!apic_enabled(apic)) + if (!kvm_apic_hw_enabled(apic)) return -1; __apic_update_ppr(apic, &ppr); From 3450121997ce872eb7f1248417225827ea249710 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 2 Jul 2019 20:07:21 +0200 Subject: [PATCH 178/207] ALSA: line6: Fix write on zero-sized buffer LINE6 drivers allocate the buffers based on the value returned from usb_maxpacket() calls. The manipulated device may return zero for this, and this results in the kmalloc() with zero size (and it may succeed) while the other part of the driver code writes the packet data with the fixed size -- which eventually overwrites. This patch adds a simple sanity check for the invalid buffer size for avoiding that problem. Reported-by: syzbot+219f00fb49874dcaea17@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/pcm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 72c6f8e82a7e..78c2d6cab3b5 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -560,6 +560,11 @@ int line6_init_pcm(struct usb_line6 *line6, line6pcm->max_packet_size_out = usb_maxpacket(line6->usbdev, usb_sndisocpipe(line6->usbdev, ep_write), 1); + if (!line6pcm->max_packet_size_in || !line6pcm->max_packet_size_out) { + dev_err(line6pcm->line6->ifcdev, + "cannot get proper max packet size\n"); + return -EINVAL; + } spin_lock_init(&line6pcm->out.lock); spin_lock_init(&line6pcm->in.lock); From 074376ac0e1d1fcd4fafebca86ee6158e7c20680 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sat, 29 Jun 2019 23:22:33 +0200 Subject: [PATCH 179/207] ftrace/x86: Anotate text_mutex split between ftrace_arch_code_modify_post_process() and ftrace_arch_code_modify_prepare() ftrace_arch_code_modify_prepare() is acquiring text_mutex, while the corresponding release is happening in ftrace_arch_code_modify_post_process(). This has already been documented in the code, but let's also make the fact that this is intentional clear to the semantic analysis tools such as sparse. Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1906292321170.27227@cbobk.fhfr.pm Fixes: 39611265edc1a ("ftrace/x86: Add a comment to why we take text_mutex in ftrace_arch_code_modify_prepare()") Fixes: d5b844a2cf507 ("ftrace/x86: Remove possible deadlock between register_kprobe() and ftrace_run_update_code()") Signed-off-by: Jiri Kosina Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d7e93b2783fd..76228525acd0 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -35,6 +35,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { /* * Need to grab text_mutex to prevent a race from module loading @@ -48,6 +49,7 @@ int ftrace_arch_code_modify_prepare(void) } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); From fbbf145a0e0a0177e089c52275fbfa55763e7d1d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 2 Jul 2019 21:39:59 +0200 Subject: [PATCH 180/207] gpio/spi: Fix spi-gpio regression on active high CS I ran into an intriguing bug caused by commit ""spi: gpio: Don't request CS GPIO in DT use-case" affecting all SPI GPIO devices with an active high chip select line. The commit switches the CS gpio handling over to the GPIO core, which will parse and handle "cs-gpios" from the OF node without even calling down to the driver to get the job done. However the GPIO core handles the standard bindings in Documentation/devicetree/bindings/spi/spi-controller.yaml that specifies that active high CS needs to be specified using "spi-cs-high" in the DT node. The code in drivers/spi/spi-gpio.c never respected this and never tried to inspect subnodes to see if they contained "spi-cs-high" like the gpiolib OF quirks does. Instead the only way to get an active high CS was to tag it in the device tree using the flags cell such as cs-gpios = <&gpio 4 GPIO_ACTIVE_HIGH>; This alters the quirks to not inspect the subnodes of SPI masters on "spi-gpio" for the standard attribute "spi-cs-high", making old device trees work as expected. This semantic is a bit ambigous, but just allowing the flags on the GPIO descriptor to modify polarity is what the kernel at large mostly uses so let's encourage that. Fixes: 249e2632dcd0 ("spi: gpio: Don't request CS GPIO in DT use-case") Cc: Andrey Smirnov Cc: linux-gpio@vger.kernel.org Cc: linux-spi@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index aec7bd86ae7e..9c9b965d7d6d 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -118,8 +118,15 @@ static void of_gpio_flags_quirks(struct device_node *np, * Legacy handling of SPI active high chip select. If we have a * property named "cs-gpios" we need to inspect the child node * to determine if the flags should have inverted semantics. + * + * This does not apply to an SPI device named "spi-gpio", because + * these have traditionally obtained their own GPIOs by parsing + * the device tree directly and did not respect any "spi-cs-high" + * property on the SPI bus children. */ - if (IS_ENABLED(CONFIG_SPI_MASTER) && !strcmp(propname, "cs-gpios") && + if (IS_ENABLED(CONFIG_SPI_MASTER) && + !strcmp(propname, "cs-gpios") && + !of_device_is_compatible(np, "spi-gpio") && of_property_read_bool(np, "cs-gpios")) { struct device_node *child; u32 cs; From 5676234f20fef02f6ca9bd66c63a8860fce62645 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Tue, 2 Jul 2019 22:16:38 +0300 Subject: [PATCH 181/207] scsi: target/iblock: Fix overrun in WRITE SAME emulation WRITE SAME corrupts data on the block device behind iblock if the command is emulated. The emulation code issues (M - 1) * N times more bios than requested, where M is the number of 512 blocks per real block size and N is the NUMBER OF LOGICAL BLOCKS specified in WRITE SAME command. So, for a device with 4k blocks, 7 * N more LBAs gets written after the requested range. The issue happens because the number of 512 byte sectors to be written is decreased one by one while the real bios are typically from 1 to 8 512 byte sectors per bio. Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6") Cc: Signed-off-by: Roman Bolshakov Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/target/target_core_iblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index b5ed9c377060..efebacd36101 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -515,7 +515,7 @@ iblock_execute_write_same(struct se_cmd *cmd) /* Always in 512 byte units for Linux/Block */ block_lba += sg->length >> SECTOR_SHIFT; - sectors -= 1; + sectors -= sg->length >> SECTOR_SHIFT; } iblock_submit_bios(&list); From 5dd6c49339126c2c8df2179041373222362d6e49 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 26 Jun 2019 19:27:34 +0200 Subject: [PATCH 182/207] scsi: iscsi: set auth_protocol back to NULL if CHAP_A value is not supported If the CHAP_A value is not supported, the chap_server_open() function should free the auth_protocol pointer and set it to NULL, or we will leave a dangling pointer around. [ 66.010905] Unsupported CHAP_A value [ 66.011660] Security negotiation failed. [ 66.012443] iSCSI Login negotiation failed. [ 68.413924] general protection fault: 0000 [#1] SMP PTI [ 68.414962] CPU: 0 PID: 1562 Comm: targetcli Kdump: loaded Not tainted 4.18.0-80.el8.x86_64 #1 [ 68.416589] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 68.417677] RIP: 0010:__kmalloc_track_caller+0xc2/0x210 Signed-off-by: Maurizio Lombardi Reviewed-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_auth.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 4e680d753941..e2fa3a3bc81d 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -89,6 +89,12 @@ static int chap_check_algorithm(const char *a_str) return CHAP_DIGEST_UNKNOWN; } +static void chap_close(struct iscsi_conn *conn) +{ + kfree(conn->auth_protocol); + conn->auth_protocol = NULL; +} + static struct iscsi_chap *chap_server_open( struct iscsi_conn *conn, struct iscsi_node_auth *auth, @@ -126,7 +132,7 @@ static struct iscsi_chap *chap_server_open( case CHAP_DIGEST_UNKNOWN: default: pr_err("Unsupported CHAP_A value\n"); - kfree(conn->auth_protocol); + chap_close(conn); return NULL; } @@ -141,19 +147,13 @@ static struct iscsi_chap *chap_server_open( * Generate Challenge. */ if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) { - kfree(conn->auth_protocol); + chap_close(conn); return NULL; } return chap; } -static void chap_close(struct iscsi_conn *conn) -{ - kfree(conn->auth_protocol); - conn->auth_protocol = NULL; -} - static int chap_server_compute_md5( struct iscsi_conn *conn, struct iscsi_node_auth *auth, From eca94432934fe5f141d084f2e36ee2c0e614cc04 Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Tue, 2 Jul 2019 16:35:09 +0200 Subject: [PATCH 183/207] Bluetooth: Fix faulty expression for minimum encryption key size check Fix minimum encryption key size check so that HCI_MIN_ENC_KEY_SIZE is also allowed as stated in the comment. This bug caused connection problems with devices having maximum encryption key size of 7 octets (56-bit). Fixes: 693cd8ce3f88 ("Bluetooth: Fix regression with minimum encryption key size alignment") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203997 Signed-off-by: Matias Karhumaa Cc: stable@vger.kernel.org Signed-off-by: Marcel Holtmann Signed-off-by: Linus Torvalds --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9f77432dbe38..5406d7cd46ad 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1353,7 +1353,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) * actually encrypted before enforcing a key size. */ return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || - hcon->enc_key_size > HCI_MIN_ENC_KEY_SIZE); + hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE); } static void l2cap_do_start(struct l2cap_chan *chan) From c8ea9fce2baf7b643384f36f29e4194fa40d33a6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Jun 2019 18:32:26 +0800 Subject: [PATCH 184/207] lib/mpi: Fix karactx leak in mpi_powm Sometimes mpi_powm will leak karactx because a memory allocation failure causes a bail-out that skips the freeing of karactx. This patch moves the freeing of karactx to the end of the function like everything else so that it can't be skipped. Reported-by: syzbot+f7baccc38dcc1e094e77@syzkaller.appspotmail.com Fixes: cdec9cb5167a ("crypto: GnuPG based MPI lib - source files...") Cc: Signed-off-by: Herbert Xu Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/mpi/mpi-pow.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index a5c921e6d667..d3ca55093fa5 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -37,6 +37,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) { mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; + struct karatsuba_ctx karactx = {}; mpi_ptr_t xp_marker = NULL; mpi_ptr_t tspace = NULL; mpi_ptr_t rp, ep, mp, bp; @@ -163,13 +164,11 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) int c; mpi_limb_t e; mpi_limb_t carry_limb; - struct karatsuba_ctx karactx; xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); if (!xp) goto enomem; - memset(&karactx, 0, sizeof karactx); negative_result = (ep[0] & 1) && base->sign; i = esize - 1; @@ -294,8 +293,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) if (mod_shift_cnt) mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); MPN_NORMALIZE(rp, rsize); - - mpihelp_release_karatsuba_ctx(&karactx); } if (negative_result && rsize) { @@ -312,6 +309,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) leave: rc = 0; enomem: + mpihelp_release_karatsuba_ctx(&karactx); if (assign_rp) mpi_assign_limb_space(res, rp, size); if (mp_marker) From 1a0fad630e0b7cff38e7691b28b0517cfbb0633f Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 2 Jul 2019 09:53:25 +0200 Subject: [PATCH 185/207] crypto: cryptd - Fix skcipher instance memory leak cryptd_skcipher_free() fails to free the struct skcipher_instance allocated in cryptd_create_skcipher(), leading to a memory leak. This is detected by kmemleak on bootup on ARM64 platforms: unreferenced object 0xffff80003377b180 (size 1024): comm "cryptomgr_probe", pid 822, jiffies 4294894830 (age 52.760s) backtrace: kmem_cache_alloc_trace+0x270/0x2d0 cryptd_create+0x990/0x124c cryptomgr_probe+0x5c/0x1e8 kthread+0x258/0x318 ret_from_fork+0x10/0x1c Fixes: 4e0958d19bd8 ("crypto: cryptd - Add support for skcipher") Cc: Signed-off-by: Vincent Whitchurch Signed-off-by: Herbert Xu --- crypto/cryptd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index b3bb99390ae7..c8434042cbc1 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -393,6 +393,7 @@ static void cryptd_skcipher_free(struct skcipher_instance *inst) struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ctx->spawn); + kfree(inst); } static int cryptd_create_skcipher(struct crypto_template *tmpl, From 21d4120ec6f5b5992b01b96ac484701163917b63 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 2 Jul 2019 14:17:00 -0700 Subject: [PATCH 186/207] crypto: user - prevent operating on larval algorithms Michal Suchanek reported [1] that running the pcrypt_aead01 test from LTP [2] in a loop and holding Ctrl-C causes a NULL dereference of alg->cra_users.next in crypto_remove_spawns(), via crypto_del_alg(). The test repeatedly uses CRYPTO_MSG_NEWALG and CRYPTO_MSG_DELALG. The crash occurs when the instance that CRYPTO_MSG_DELALG is trying to unregister isn't a real registered algorithm, but rather is a "test larval", which is a special "algorithm" added to the algorithms list while the real algorithm is still being tested. Larvals don't have initialized cra_users, so that causes the crash. Normally pcrypt_aead01 doesn't trigger this because CRYPTO_MSG_NEWALG waits for the algorithm to be tested; however, CRYPTO_MSG_NEWALG returns early when interrupted. Everything else in the "crypto user configuration" API has this same bug too, i.e. it inappropriately allows operating on larval algorithms (though it doesn't look like the other cases can cause a crash). Fix this by making crypto_alg_match() exclude larval algorithms. [1] https://lkml.kernel.org/r/20190625071624.27039-1-msuchanek@suse.de [2] https://github.com/linux-test-project/ltp/blob/20190517/testcases/kernel/crypto/pcrypt_aead01.c Reported-by: Michal Suchanek Fixes: a38f7907b926 ("crypto: Add userspace configuration API") Cc: # v3.2+ Cc: Steffen Klassert Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/crypto_user_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c index f25d3f32c9c2..f93b691f8045 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user_base.c @@ -56,6 +56,9 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) list_for_each_entry(q, &crypto_alg_list, cra_list) { int match = 0; + if (crypto_is_larval(q)) + continue; + if ((q->cra_flags ^ p->cru_type) & p->cru_mask) continue; From 3b2d4dcf71c4a91b420f835e52ddea8192300a3b Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Wed, 3 Jul 2019 13:28:15 +0200 Subject: [PATCH 187/207] nfsd: Fix overflow causing non-working mounts on 1 TB machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 10a68cdf10 (nfsd: fix performance-limiting session calculation) (Linux 5.1-rc1 and 4.19.31), shares from NFS servers with 1 TB of memory cannot be mounted anymore. The mount just hangs on the client. The gist of commit 10a68cdf10 is the change below. -avail = clamp_t(int, avail, slotsize, avail/3); +avail = clamp_t(int, avail, slotsize, total_avail/3); Here are the macros. #define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) `total_avail` is 8,434,659,328 on the 1 TB machine. `clamp_t()` casts the values to `int`, which for 32-bit integers can only hold values −2,147,483,648 (−2^31) through 2,147,483,647 (2^31 − 1). `avail` (in the function signature) is just 65536, so that no overflow was happening. Before the commit the assignment would result in 21845, and `num = 4`. When using `total_avail`, it is causing the assignment to be 18446744072226137429 (printed as %lu), and `num` is then 4164608182. My next guess is, that `nfsd_drc_mem_used` is then exceeded, and the server thinks there is no memory available any more for this client. Updating the arguments of `clamp_t()` and `min_t()` to `unsigned long` fixes the issue. Now, `avail = 65536` (before commit 10a68cdf10 `avail = 21845`), but `num = 4` remains the same. Fixes: c54f24e338ed (nfsd: fix performance-limiting session calculation) Cc: stable@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618e66078ee5..1a0cdeb3b875 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1563,7 +1563,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) * Never use more than a third of the remaining memory, * unless it's the only way to give this client a slot: */ - avail = clamp_t(int, avail, slotsize, total_avail/3); + avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); From 78c68e8f5cd24bd32ba4ca1cdfb0c30cf0642685 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Tue, 25 Jun 2019 18:59:13 +0100 Subject: [PATCH 188/207] drm/imx: notify drm core before sending event during crtc disable Notify drm core before sending pending events during crtc disable. This fixes the first event after disable having an old stale timestamp by having drm_crtc_vblank_off update the timestamp to now. This was seen while debugging weston log message: Warning: computed repaint delay is insane: -8212 msec This occurred due to: 1. driver starts up 2. fbcon comes along and restores fbdev, enabling vblank 3. vblank_disable_fn fires via timer disabling vblank, keeping vblank seq number and time set at current value (some time later) 4. weston starts and does a modeset 5. atomic commit disables crtc while it does the modeset 6. ipu_crtc_atomic_disable sends vblank with old seq number and time Fixes: a474478642d5 ("drm/imx: fix crtc vblank state regression") Signed-off-by: Robert Beckett Reviewed-by: Daniel Vetter Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 9cc1d678674f..e04d6efff1b5 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -91,14 +91,14 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, ipu_dc_disable(ipu); ipu_prg_disable(ipu); + drm_crtc_vblank_off(crtc); + spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { drm_crtc_send_vblank_event(crtc, crtc->state->event); crtc->state->event = NULL; } spin_unlock_irq(&crtc->dev->event_lock); - - drm_crtc_vblank_off(crtc); } static void imx_drm_crtc_reset(struct drm_crtc *crtc) From 5aeab2bfc9ffa72d3ca73416635cb3785dfc076f Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Tue, 25 Jun 2019 18:59:15 +0100 Subject: [PATCH 189/207] drm/imx: only send event on crtc disable if kept disabled The event will be sent as part of the vblank enable during the modeset if the crtc is not being kept disabled. Fixes: 5f2f911578fb ("drm/imx: atomic phase 3 step 1: Use atomic configuration") Signed-off-by: Robert Beckett Reviewed-by: Daniel Vetter Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index e04d6efff1b5..c436a28d50e4 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -94,7 +94,7 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); spin_lock_irq(&crtc->dev->event_lock); - if (crtc->state->event) { + if (crtc->state->event && !crtc->state->active) { drm_crtc_send_vblank_event(crtc, crtc->state->event); crtc->state->event = NULL; } From 6994eefb0053799d2e07cd140df6c2ea106c41ee Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 4 Jul 2019 17:32:23 +0200 Subject: [PATCH 190/207] ptrace: Fix ->ptracer_cred handling for PTRACE_TRACEME Fix two issues: When called for PTRACE_TRACEME, ptrace_link() would obtain an RCU reference to the parent's objective credentials, then give that pointer to get_cred(). However, the object lifetime rules for things like struct cred do not permit unconditionally turning an RCU reference into a stable reference. PTRACE_TRACEME records the parent's credentials as if the parent was acting as the subject, but that's not the case. If a malicious unprivileged child uses PTRACE_TRACEME and the parent is privileged, and at a later point, the parent process becomes attacker-controlled (because it drops privileges and calls execve()), the attacker ends up with control over two processes with a privileged ptrace relationship, which can be abused to ptrace a suid binary and obtain root privileges. Fix both of these by always recording the credentials of the process that is requesting the creation of the ptrace relationship: current_cred() can't change under us, and current is the proper subject for access control. This change is theoretically userspace-visible, but I am not aware of any code that it will actually break. Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP") Signed-off-by: Jann Horn Acked-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8456b6e2205f..705887f63288 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, */ static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - rcu_read_lock(); - __ptrace_link(child, new_parent, __task_cred(new_parent)); - rcu_read_unlock(); + __ptrace_link(child, new_parent, current_cred()); } /** From b9705d8778e7adc97de38f405f835a2426e14d84 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 4 Jul 2019 15:14:36 -0700 Subject: [PATCH 191/207] mm/page_alloc.c: fix regression with deferred struct page init Commit 0e56acae4b4d ("mm: initialize MAX_ORDER_NR_PAGES at a time instead of doing larger sections") is causing a regression on some systems when the kernel is booted as Xen dom0. The system will just hang in early boot. Reason is an endless loop in get_page_from_freelist() in case the first zone looked at has no free memory. deferred_grow_zone() is always returning true due to the following code snipplet: /* If the zone is empty somebody else may have cleared out the zone */ if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_deferred_pfn)) { pgdat->first_deferred_pfn = ULONG_MAX; pgdat_resize_unlock(pgdat, &flags); return true; } This in turn results in the loop as get_page_from_freelist() is assuming forward progress can be made by doing some more struct page initialization. Link: http://lkml.kernel.org/r/20190620160821.4210-1-jgross@suse.com Fixes: 0e56acae4b4d ("mm: initialize MAX_ORDER_NR_PAGES at a time instead of doing larger sections") Signed-off-by: Juergen Gross Suggested-by: Alexander Duyck Acked-by: Alexander Duyck Cc: Michal Hocko Cc: Pavel Tatashin Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d66bc8abe0af..8e3bc949ebcc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1826,7 +1826,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order) first_deferred_pfn)) { pgdat->first_deferred_pfn = ULONG_MAX; pgdat_resize_unlock(pgdat, &flags); - return true; + /* Retry only once. */ + return first_deferred_pfn != ULONG_MAX; } /* From cbcfa130a911c613a1d9d921af2eea171c414172 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Jul 2019 15:14:39 -0700 Subject: [PATCH 192/207] fs/userfaultfd.c: disable irqs for fault_pending and event locks When IOCB_CMD_POLL is used on a userfaultfd, aio_poll() disables IRQs and takes kioctx::ctx_lock, then userfaultfd_ctx::fd_wqh.lock. This may have to wait for userfaultfd_ctx::fd_wqh.lock to be released by userfaultfd_ctx_read(), which in turn can be waiting for userfaultfd_ctx::fault_pending_wqh.lock or userfaultfd_ctx::event_wqh.lock. But elsewhere the fault_pending_wqh and event_wqh locks are taken with IRQs enabled. Since the IRQ handler may take kioctx::ctx_lock, lockdep reports that a deadlock is possible. Fix it by always disabling IRQs when taking the fault_pending_wqh and event_wqh locks. Commit ae62c16e105a ("userfaultfd: disable irqs when taking the waitqueue lock") didn't fix this because it only accounted for the fd_wqh lock, not the other locks nested inside it. Link: http://lkml.kernel.org/r/20190627075004.21259-1-ebiggers@kernel.org Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL") Signed-off-by: Eric Biggers Reported-by: syzbot+fab6de82892b6b9c6191@syzkaller.appspotmail.com Reported-by: syzbot+53c0b767f7ca0dc0c451@syzkaller.appspotmail.com Reported-by: syzbot+a3accb352f9c22041cfa@syzkaller.appspotmail.com Reviewed-by: Andrew Morton Cc: Christoph Hellwig Cc: Andrea Arcangeli Cc: [4.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ae0b8b5f69e6..ccbdbd62f0d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -40,6 +40,16 @@ enum userfaultfd_state { /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ @@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * __add_wait_queue. */ set_current_state(blocking_state); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* @@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; @@ -918,10 +928,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * the last page faults that may have been already waiting on * the fault_*wqh. */ - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); @@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can @@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (ret) userfaultfd_ctx_put(fork_nctx); } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); } return ret; @@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, @@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; @@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown From dffcac2cb88e4ec5906235d64a83d802580b119e Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 4 Jul 2019 15:14:42 -0700 Subject: [PATCH 193/207] mm/vmscan.c: prevent useless kswapd loops In production we have noticed hard lockups on large machines running large jobs due to kswaps hoarding lru lock within isolate_lru_pages when sc->reclaim_idx is 0 which is a small zone. The lru was couple hundred GiBs and the condition (page_zonenum(page) > sc->reclaim_idx) in isolate_lru_pages() was basically skipping GiBs of pages while holding the LRU spinlock with interrupt disabled. On further inspection, it seems like there are two issues: (1) If kswapd on the return from balance_pgdat() could not sleep (i.e. node is still unbalanced), the classzone_idx is unintentionally set to 0 and the whole reclaim cycle of kswapd will try to reclaim only the lowest and smallest zone while traversing the whole memory. (2) Fundamentally isolate_lru_pages() is really bad when the allocation has woken kswapd for a smaller zone on a very large machine running very large jobs. It can hoard the LRU spinlock while skipping over 100s of GiBs of pages. This patch only fixes (1). (2) needs a more fundamental solution. To fix (1), in the kswapd context, if pgdat->kswapd_classzone_idx is invalid use the classzone_idx of the previous kswapd loop otherwise use the one the waker has requested. Link: http://lkml.kernel.org/r/20190701201847.251028-1-shakeelb@google.com Fixes: e716f2eb24de ("mm, vmscan: prevent kswapd sleeping prematurely due to mismatched classzone_idx") Signed-off-by: Shakeel Butt Reviewed-by: Yang Shi Acked-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Vlastimil Babka Cc: Hillf Danton Cc: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7889f583ced9..910e02c793ff 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3644,19 +3644,18 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) } /* - * pgdat->kswapd_classzone_idx is the highest zone index that a recent - * allocation request woke kswapd for. When kswapd has not woken recently, - * the value is MAX_NR_ZONES which is not a valid index. This compares a - * given classzone and returns it or the highest classzone index kswapd - * was recently woke for. + * The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be + * reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not + * a valid index then either kswapd runs for first time or kswapd couldn't sleep + * after previous reclaim attempt (node is still unbalanced). In that case + * return the zone index of the previous kswapd reclaim cycle. */ static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat, - enum zone_type classzone_idx) + enum zone_type prev_classzone_idx) { if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES) - return classzone_idx; - - return max(pgdat->kswapd_classzone_idx, classzone_idx); + return prev_classzone_idx; + return pgdat->kswapd_classzone_idx; } static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, @@ -3797,7 +3796,7 @@ static int kswapd(void *p) /* Read the new order and classzone_idx */ alloc_order = reclaim_order = pgdat->kswapd_order; - classzone_idx = kswapd_classzone_idx(pgdat, 0); + classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx); pgdat->kswapd_order = 0; pgdat->kswapd_classzone_idx = MAX_NR_ZONES; @@ -3851,8 +3850,12 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, if (!cpuset_zone_allowed(zone, gfp_flags)) return; pgdat = zone->zone_pgdat; - pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, - classzone_idx); + + if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES) + pgdat->kswapd_classzone_idx = classzone_idx; + else + pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, + classzone_idx); pgdat->kswapd_order = max(pgdat->kswapd_order, order); if (!waitqueue_active(&pgdat->kswapd_wait)) return; From eef778c99c0239ed0a0696ddf22ae3673f28a489 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Jul 2019 15:14:45 -0700 Subject: [PATCH 194/207] devres: allow const resource arguments devm_ioremap_resource() does not currently take 'const' arguments, which results in a warning from the first driver trying to do it anyway: drivers/gpio/gpio-amd-fch.c: In function 'amd_fch_gpio_probe': drivers/gpio/gpio-amd-fch.c:171:49: error: passing argument 2 of 'devm_ioremap_resource' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] priv->base = devm_ioremap_resource(&pdev->dev, &amd_fch_gpio_iores); ^~~~~~~~~~~~~~~~~~~ Change the prototype to allow it, as there is no real reason not to. Link: http://lkml.kernel.org/r/20190628150049.1108048-1-arnd@arndb.de Fixes: 9bb2e0452508 ("gpio: amd: Make resource struct const") Signed-off-by: Arnd Bergmann Acked-by: Greg Kroah-Hartman Reviewed-by: Enrico Weigelt Reviewed-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Arnd Bergmann Cc: "Rafael J. Wysocki" Cc: Ulf Hansson Cc: Andy Shevchenko Cc: Heikki Krogerus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 3 ++- lib/devres.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 848fc71c6ba6..4a295e324ac5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -704,7 +704,8 @@ extern unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); extern void devm_free_pages(struct device *dev, unsigned long addr); -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res); void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, diff --git a/lib/devres.c b/lib/devres.c index 69bed2f38306..6a0e9bd6524a 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -131,7 +131,8 @@ EXPORT_SYMBOL(devm_iounmap); * if (IS_ERR(base)) * return PTR_ERR(base); */ -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res) { resource_size_t size; void __iomem *dest_ptr; From 8751853091998cd31e9e5f1e8206280155af8921 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jul 2019 15:14:49 -0700 Subject: [PATCH 195/207] swap_readpage(): avoid blk_wake_io_task() if !synchronous swap_readpage() sets waiter = bio->bi_private even if synchronous = F, this means that the caller can get the spurious wakeup after return. This can be fatal if blk_wake_io_task() does set_current_state(TASK_RUNNING) after the caller does set_special_state(), in the worst case the kernel can crash in do_task_dead(). Link: http://lkml.kernel.org/r/20190704160301.GA5956@redhat.com Fixes: 0619317ff8baa2d ("block: add polled wakeup task helper") Signed-off-by: Oleg Nesterov Reported-by: Qian Cai Acked-by: Hugh Dickins Reviewed-by: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 189415852077..a39aac2f8c8d 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -137,8 +137,10 @@ static void end_swap_bio_read(struct bio *bio) unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); bio_put(bio); - blk_wake_io_task(waiter); - put_task_struct(waiter); + if (waiter) { + blk_wake_io_task(waiter); + put_task_struct(waiter); + } } int generic_swapfile_activate(struct swap_info_struct *sis, @@ -395,11 +397,12 @@ int swap_readpage(struct page *page, bool synchronous) * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ - get_task_struct(current); - bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (synchronous) + if (synchronous) { bio->bi_opf |= REQ_HIPRI; + get_task_struct(current); + bio->bi_private = current; + } count_vm_event(PSWPIN); bio_get(bio); qc = submit_bio(bio); From 4c89cc73d1da42ae48b5c5dfbfd12304d0b86786 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Jun 2019 16:49:40 +0300 Subject: [PATCH 196/207] dmaengine: jz4780: Fix an endian bug in IRQ handler The "pending" variable was a u32 but we cast it to an unsigned long pointer when we do the for_each_set_bit() loop. The problem is that on big endian 64bit systems that results in an out of bounds read. Fixes: 4e4106f5e942 ("dmaengine: jz4780: Fix transfers being ACKed too soon") Signed-off-by: Dan Carpenter Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4780.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 263bee76ef0d..6b8c4c458e8a 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -718,12 +718,13 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) { struct jz4780_dma_dev *jzdma = data; unsigned int nb_channels = jzdma->soc_data->nb_channels; - uint32_t pending, dmac; + unsigned long pending; + uint32_t dmac; int i; pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); - for_each_set_bit(i, (unsigned long *)&pending, nb_channels) { + for_each_set_bit(i, &pending, nb_channels) { if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i])) pending &= ~BIT(i); } From 2b8066c3deb9140fdf258417a51479b2aeaa7622 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Mon, 24 Jun 2019 10:07:31 -0400 Subject: [PATCH 197/207] dmaengine: imx-sdma: fix use-after-free on probe error path If probe() fails anywhere beyond the point where sdma_get_firmware() is called, then a kernel oops may occur. Problematic sequence of events: 1. probe() calls sdma_get_firmware(), which schedules the firmware callback to run when firmware becomes available, using the sdma instance structure as the context 2. probe() encounters an error, which deallocates the sdma instance structure 3. firmware becomes available, firmware callback is called with deallocated sdma instance structure 4. use after free - kernel oops ! Solution: only attempt to load firmware when we're certain that probe() will succeed. This guarantees that the firmware callback's context will remain valid. Note that the remove() path is unaffected by this issue: the firmware loader will increment the driver module's use count, ensuring that the module cannot be unloaded while the firmware callback is pending or running. Signed-off-by: Sven Van Asbroeck Reviewed-by: Robin Gong [vkoul: fixed braces for if condition] Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 48 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 99d9f431ae2c..ba72fcfbebfe 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -2096,27 +2096,6 @@ static int sdma_probe(struct platform_device *pdev) if (pdata && pdata->script_addrs) sdma_add_scripts(sdma, pdata->script_addrs); - if (pdata) { - ret = sdma_get_firmware(sdma, pdata->fw_name); - if (ret) - dev_warn(&pdev->dev, "failed to get firmware from platform data\n"); - } else { - /* - * Because that device tree does not encode ROM script address, - * the RAM script in firmware is mandatory for device tree - * probe, otherwise it fails. - */ - ret = of_property_read_string(np, "fsl,sdma-ram-script-name", - &fw_name); - if (ret) - dev_warn(&pdev->dev, "failed to get firmware name\n"); - else { - ret = sdma_get_firmware(sdma, fw_name); - if (ret) - dev_warn(&pdev->dev, "failed to get firmware from device tree\n"); - } - } - sdma->dma_device.dev = &pdev->dev; sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources; @@ -2161,6 +2140,33 @@ static int sdma_probe(struct platform_device *pdev) of_node_put(spba_bus); } + /* + * Kick off firmware loading as the very last step: + * attempt to load firmware only if we're not on the error path, because + * the firmware callback requires a fully functional and allocated sdma + * instance. + */ + if (pdata) { + ret = sdma_get_firmware(sdma, pdata->fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware from platform data\n"); + } else { + /* + * Because that device tree does not encode ROM script address, + * the RAM script in firmware is mandatory for device tree + * probe, otherwise it fails. + */ + ret = of_property_read_string(np, "fsl,sdma-ram-script-name", + &fw_name); + if (ret) { + dev_warn(&pdev->dev, "failed to get firmware name\n"); + } else { + ret = sdma_get_firmware(sdma, fw_name); + if (ret) + dev_warn(&pdev->dev, "failed to get firmware from device tree\n"); + } + } + return 0; err_register: From 3f93a4f297961c12bb17aa16cb3a4d1291823cae Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 21 Jun 2019 16:23:06 +0800 Subject: [PATCH 198/207] dmaengine: imx-sdma: remove BD_INTR for channel0 It is possible for an irq triggered by channel0 to be received later after clks are disabled once firmware loaded during sdma probe. If that happens then clearing them by writing to SDMA_H_INTR won't work and the kernel will hang processing infinite interrupts. Actually, don't need interrupt triggered on channel0 since it's pollling SDMA_H_STATSTOP to know channel0 done rather than interrupt in current code, just clear BD_INTR to disable channel0 interrupt to avoid the above case. This issue was brought by commit 1d069bfa3c78 ("dmaengine: imx-sdma: ack channel 0 IRQ in the interrupt handler") which didn't take care the above case. Fixes: 1d069bfa3c78 ("dmaengine: imx-sdma: ack channel 0 IRQ in the interrupt handler") Cc: stable@vger.kernel.org #5.0+ Signed-off-by: Robin Gong Reported-by: Sven Van Asbroeck Tested-by: Sven Van Asbroeck Reviewed-by: Michael Olbrich Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index ba72fcfbebfe..4ec84a633bd3 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -703,7 +703,7 @@ static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size, spin_lock_irqsave(&sdma->channel_0_lock, flags); bd0->mode.command = C0_SETPM; - bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; + bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD; bd0->mode.count = size / 2; bd0->buffer_addr = buf_phys; bd0->ext_buffer_addr = address; @@ -1025,7 +1025,7 @@ static int sdma_load_context(struct sdma_channel *sdmac) context->gReg[7] = sdmac->watermark_level; bd0->mode.command = C0_SETDM; - bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; + bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD; bd0->mode.count = sizeof(*context) / 4; bd0->buffer_addr = sdma->context_phys; bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel; From f6034225442c4a87906d36e975fd9e99a8f95487 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Fri, 28 Jun 2019 17:39:46 +0530 Subject: [PATCH 199/207] dmaengine: qcom: bam_dma: Fix completed descriptors count One space is left unused in circular FIFO to differentiate 'full' and 'empty' cases. So take that in to account while counting for the descriptors completed. Fixes the issue reported here, https://lkml.org/lkml/2019/6/18/669 Cc: stable@vger.kernel.org Reported-by: Srinivas Kandagatla Signed-off-by: Sricharan R Tested-by: Srinivas Kandagatla Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 4b43844f6af5..8e90a405939d 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -799,6 +799,9 @@ static u32 process_channel_irqs(struct bam_device *bdev) /* Number of bytes available to read */ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1); + if (offset < bchan->head) + avail--; + list_for_each_entry_safe(async_desc, tmp, &bchan->desc_list, desc_node) { /* Not enough data to read */ From e644fa18e2ffc8895ca30dade503ae10128573a6 Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Wed, 3 Jul 2019 18:42:50 +0100 Subject: [PATCH 200/207] KVM: arm64/sve: Fix vq_present() macro to yield a bool The original implementation of vq_present() relied on aggressive inlining in order for the compiler to know that the code is correct, due to some const-casting issues. This was causing sparse and clang to complain, while GCC compiled cleanly. Commit 0c529ff789bc addressed this problem, but since vq_present() is no longer a function, there is now no implicit casting of the returned value to the return type (bool). In set_sve_vls(), this uncast bit value is compared against a bool, and so may spuriously compare as unequal when both are nonzero. As a result, KVM may reject valid SVE vector length configurations as invalid, and vice versa. Fix it by forcing the returned value to a bool. Signed-off-by: Zhang Lei Fixes: 0c529ff789bc ("KVM: arm64: Implement vq_present() as a macro") Signed-off-by: Dave Martin [commit message rewrite] Cc: Viresh Kumar Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c2afa7982047..dfd626447482 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -208,7 +208,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq)) +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq))) static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { From 75f2d86b20bf6aec0392d6dd2ae3ffff26d2ae0e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 1 Apr 2019 13:53:57 +0200 Subject: [PATCH 201/207] fs: VALIDATE_FS_PARSER should default to n CONFIG_VALIDATE_FS_PARSER is a debugging tool to check that the parser tables are vaguely sane. It was set to default to 'Y' for the moment to catch errors in upcoming fs conversion development. Make sure it is not enabled by default in the final release of v5.1. Fixes: 31d921c7fb969172 ("vfs: Add configuration parser helpers") Signed-off-by: Geert Uytterhoeven Signed-off-by: Al Viro --- fs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/Kconfig b/fs/Kconfig index f1046cf6ad85..bfb1c6095c7a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS config VALIDATE_FS_PARSER bool "Validate filesystem parameter description" - default y help Enable this to perform validation of the parameter description for a filesystem when it is registered. From f3a3ea28edd9a17588fede4ff53bc02d986cf4d1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Jun 2019 20:46:55 +0300 Subject: [PATCH 202/207] i2c: tegra: Add Dmitry as a reviewer I'm contributing to Tegra's upstream development in general and happened to review the Tegra's I2C patches for awhile because I'm actively using upstream kernel on all of my Tegra-powered devices and initially some of the submitted patches were getting my attention since they were causing problems. Recently Wolfram Sang asked whether I'm interested in becoming a reviewer for the driver and I don't mind at all. Signed-off-by: Dmitry Osipenko [wsa: ack was expressed by Thierry Reding in a mail thread] Signed-off-by: Wolfram Sang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 01a52fc964da..f16e5d047912 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15503,6 +15503,7 @@ F: drivers/dma/tegra* TEGRA I2C DRIVER M: Laxman Dewangan +R: Dmitry Osipenko S: Supported F: drivers/i2c/busses/i2c-tegra.c From 4f032640bf5751ce792b69d2abdf18e0f379b3ce Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 5 Jul 2019 11:25:29 +0200 Subject: [PATCH 203/207] Revert "mtd: rawnand: sunxi: Add A23/A33 DMA support" This reverts commit c49836f05aa15282f7280e06ede3f6f8a6324833. The commit is wrong and its approach actually does not work. Let's revert it in order to add the feature with a clean patch. Fixes: c49836f05aa1 ("mtd: rawnand: sunxi: Add A23/A33 DMA support") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/sunxi_nand.c | 38 ++----------------------------- 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index b021a5720b42..e93f39bc2bc5 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -43,7 +43,6 @@ #define NFC_REG_RCMD_SET 0x0028 #define NFC_REG_WCMD_SET 0x002C #define NFC_REG_A10_IO_DATA 0x0030 -#define NFC_REG_A23_IO_DATA 0x0300 #define NFC_REG_ECC_CTL 0x0034 #define NFC_REG_ECC_ST 0x0038 #define NFC_REG_DEBUG 0x003C @@ -205,14 +204,10 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand) * NAND Controller capabilities structure: stores NAND controller capabilities * for distinction between compatible strings. * - * @sram_through_ahb: On A23, we choose to access the internal RAM through AHB - * instead of MBUS (less configuration). A10, A10s, A13 and - * A20 use the MBUS but no extra configuration is needed. * @reg_io_data: I/O data register * @dma_maxburst: DMA maxburst */ struct sunxi_nfc_caps { - bool sram_through_ahb; unsigned int reg_io_data; unsigned int dma_maxburst; }; @@ -368,29 +363,10 @@ static int sunxi_nfc_dma_op_prepare(struct sunxi_nfc *nfc, const void *buf, goto err_unmap_buf; } - /* - * On A23, we suppose the "internal RAM" (p.12 of the NFC user manual) - * refers to the NAND controller's internal SRAM. This memory is mapped - * and so is accessible from the AHB. It seems that it can also be - * accessed by the MBUS. MBUS accesses are mandatory when using the - * internal DMA instead of the external DMA engine. - * - * During DMA I/O operation, either we access this memory from the AHB - * by clearing the NFC_RAM_METHOD bit, or we set the bit and use the - * MBUS. In this case, we should also configure the MBUS DMA length - * NFC_REG_MDMA_CNT(0xC4) to be chunksize * nchunks. NAND I/O over MBUS - * are also limited to 32kiB pages. - */ - if (nfc->caps->sram_through_ahb) - writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD, - nfc->regs + NFC_REG_CTL); - else - writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD, - nfc->regs + NFC_REG_CTL); - + writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD, + nfc->regs + NFC_REG_CTL); writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM); writel(chunksize, nfc->regs + NFC_REG_CNT); - dmat = dmaengine_submit(dmad); ret = dma_submit_error(dmat); @@ -2199,21 +2175,11 @@ static const struct sunxi_nfc_caps sunxi_nfc_a10_caps = { .dma_maxburst = 4, }; -static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = { - .sram_through_ahb = true, - .reg_io_data = NFC_REG_A23_IO_DATA, - .dma_maxburst = 8, -}; - static const struct of_device_id sunxi_nfc_ids[] = { { .compatible = "allwinner,sun4i-a10-nand", .data = &sunxi_nfc_a10_caps, }, - { - .compatible = "allwinner,sun8i-a23-nand-controller", - .data = &sunxi_nfc_a23_caps, - }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sunxi_nfc_ids); From c7a87ceb17aee9222c069a97aee4647260c7b3a6 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 8 Apr 2019 09:41:46 +0200 Subject: [PATCH 204/207] mtd: rawnand: sunxi: Add A23/A33 DMA support with extra MBUS configuration Allwinner NAND controllers can make use of DMA to enhance the I/O throughput thanks to ECC pipelining. DMA handling with A23/A33 NAND IP is a bit different than with the older SoCs, hence the introduction of a new compatible to handle: * the differences between register offsets, * the burst length change from 4 to minimum 8, * manage SRAM accesses through MBUS with extra configuration. Fixes: c49836f05aa1 ("mtd: rawnand: sunxi: Add A23/A33 DMA support") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/sunxi_nand.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index e93f39bc2bc5..89773293c64d 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -43,6 +43,7 @@ #define NFC_REG_RCMD_SET 0x0028 #define NFC_REG_WCMD_SET 0x002C #define NFC_REG_A10_IO_DATA 0x0030 +#define NFC_REG_A23_IO_DATA 0x0300 #define NFC_REG_ECC_CTL 0x0034 #define NFC_REG_ECC_ST 0x0038 #define NFC_REG_DEBUG 0x003C @@ -50,6 +51,7 @@ #define NFC_REG_USER_DATA(x) (0x0050 + ((x) * 4)) #define NFC_REG_SPARE_AREA 0x00A0 #define NFC_REG_PAT_ID 0x00A4 +#define NFC_REG_MDMA_CNT 0x00C4 #define NFC_RAM0_BASE 0x0400 #define NFC_RAM1_BASE 0x0800 @@ -68,6 +70,7 @@ #define NFC_PAGE_SHIFT(x) (((x) < 10 ? 0 : (x) - 10) << 8) #define NFC_SAM BIT(12) #define NFC_RAM_METHOD BIT(14) +#define NFC_DMA_TYPE_NORMAL BIT(15) #define NFC_DEBUG_CTL BIT(31) /* define bit use in NFC_ST */ @@ -204,10 +207,13 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand) * NAND Controller capabilities structure: stores NAND controller capabilities * for distinction between compatible strings. * + * @extra_mbus_conf: Contrary to A10, A10s and A13, accessing internal RAM + * through MBUS on A23/A33 needs extra configuration. * @reg_io_data: I/O data register * @dma_maxburst: DMA maxburst */ struct sunxi_nfc_caps { + bool extra_mbus_conf; unsigned int reg_io_data; unsigned int dma_maxburst; }; @@ -367,6 +373,9 @@ static int sunxi_nfc_dma_op_prepare(struct sunxi_nfc *nfc, const void *buf, nfc->regs + NFC_REG_CTL); writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM); writel(chunksize, nfc->regs + NFC_REG_CNT); + if (nfc->caps->extra_mbus_conf) + writel(chunksize * nchunks, nfc->regs + NFC_REG_MDMA_CNT); + dmat = dmaengine_submit(dmad); ret = dma_submit_error(dmat); @@ -2127,6 +2136,11 @@ static int sunxi_nfc_probe(struct platform_device *pdev) dmac_cfg.src_maxburst = nfc->caps->dma_maxburst; dmac_cfg.dst_maxburst = nfc->caps->dma_maxburst; dmaengine_slave_config(nfc->dmac, &dmac_cfg); + + if (nfc->caps->extra_mbus_conf) + writel(readl(nfc->regs + NFC_REG_CTL) | + NFC_DMA_TYPE_NORMAL, nfc->regs + NFC_REG_CTL); + } else { dev_warn(dev, "failed to request rxtx DMA channel\n"); } @@ -2175,11 +2189,21 @@ static const struct sunxi_nfc_caps sunxi_nfc_a10_caps = { .dma_maxburst = 4, }; +static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = { + .extra_mbus_conf = true, + .reg_io_data = NFC_REG_A23_IO_DATA, + .dma_maxburst = 8, +}; + static const struct of_device_id sunxi_nfc_ids[] = { { .compatible = "allwinner,sun4i-a10-nand", .data = &sunxi_nfc_a10_caps, }, + { + .compatible = "allwinner,sun8i-a23-nand-controller", + .data = &sunxi_nfc_a23_caps, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sunxi_nfc_ids); From 69bf4b6b54fb7f52b7ea9ce28d4a360cd5ec956d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Jul 2019 19:55:18 -0700 Subject: [PATCH 205/207] Revert "mm: page cache: store only head pages in i_pages" This reverts commit 5fd4ca2d84b249f0858ce28cf637cf25b61a398f. Mikhail Gavrilov reports that it causes the VM_BUG_ON_PAGE() in __delete_from_swap_cache() to trigger: page:ffffd6d34dff0000 refcount:1 mapcount:1 mapping:ffff97812323a689 index:0xfecec363 anon flags: 0x17fffe00080034(uptodate|lru|active|swapbacked) raw: 0017fffe00080034 ffffd6d34c67c508 ffffd6d3504b8d48 ffff97812323a689 raw: 00000000fecec363 0000000000000000 0000000100000000 ffff978433ace000 page dumped because: VM_BUG_ON_PAGE(entry != page) page->mem_cgroup:ffff978433ace000 ------------[ cut here ]------------ kernel BUG at mm/swap_state.c:170! invalid opcode: 0000 [#1] SMP NOPTI CPU: 1 PID: 221 Comm: kswapd0 Not tainted 5.2.0-0.rc2.git0.1.fc31.x86_64 #1 Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 2202 04/11/2019 RIP: 0010:__delete_from_swap_cache+0x20d/0x240 Code: 30 65 48 33 04 25 28 00 00 00 75 4a 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c6 2f dc 0f 8a 48 89 c7 e8 93 1b fd ff <0f> 0b 48 c7 c6 a8 74 0f 8a e8 85 1b fd ff 0f 0b 48 c7 c6 a8 7d 0f RSP: 0018:ffffa982036e7980 EFLAGS: 00010046 RAX: 0000000000000021 RBX: 0000000000000040 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000086 RDI: ffff97843d657900 RBP: 0000000000000001 R08: ffffa982036e7835 R09: 0000000000000535 R10: ffff97845e21a46c R11: ffffa982036e7835 R12: ffff978426387120 R13: 0000000000000000 R14: ffffd6d34dff0040 R15: ffffd6d34dff0000 FS: 0000000000000000(0000) GS:ffff97843d640000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00002cba88ef5000 CR3: 000000078a97c000 CR4: 00000000003406e0 Call Trace: delete_from_swap_cache+0x46/0xa0 try_to_free_swap+0xbc/0x110 swap_writepage+0x13/0x70 pageout.isra.0+0x13c/0x350 shrink_page_list+0xc14/0xdf0 shrink_inactive_list+0x1e5/0x3c0 shrink_node_memcg+0x202/0x760 shrink_node+0xe0/0x470 balance_pgdat+0x2d1/0x510 kswapd+0x220/0x420 kthread+0xfb/0x130 ret_from_fork+0x22/0x40 and it's not immediately obvious why it happens. It's too late in the rc cycle to do anything but revert for now. Link: https://lore.kernel.org/lkml/CABXGCsN9mYmBD-4GaaeW_NrDu+FDXLzr_6x+XNxfmFV6QkYCDg@mail.gmail.com/ Reported-and-bisected-by: Mikhail Gavrilov Suggested-by: Jan Kara Cc: Michal Hocko Cc: Vlastimil Babka Cc: Matthew Wilcox Cc: Kirill Shutemov Cc: William Kucharski Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 13 ---- mm/filemap.c | 146 ++++++++++++++++++++++++---------------- mm/huge_memory.c | 3 - mm/khugepaged.c | 4 +- mm/memfd.c | 2 - mm/migrate.c | 2 +- mm/shmem.c | 2 +- mm/swap_state.c | 4 +- 8 files changed, 94 insertions(+), 82 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9ec3544baee2..fe0b29bf2df7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -333,19 +333,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -static inline struct page *find_subpage(struct page *page, pgoff_t offset) -{ - unsigned long mask; - - if (PageHuge(page)) - return page; - - VM_BUG_ON_PAGE(PageTail(page), page); - - mask = (1UL << compound_order(page)) - 1; - return page + (offset & mask); -} - struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, diff --git a/mm/filemap.c b/mm/filemap.c index df2006ba0cfa..6dd9a2274c80 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -281,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache); * @pvec: pagevec with pages to delete * * The function walks over mapping->i_pages and removes pages passed in @pvec - * from the mapping. The function expects @pvec to be sorted by page index - * and is optimised for it to be dense. + * from the mapping. The function expects @pvec to be sorted by page index. * It tolerates holes in @pvec (mapping entries at those indices are not * modified). The function expects only THP head pages to be present in the - * @pvec. + * @pvec and takes care to delete all corresponding tail pages from the + * mapping as well. * * The function expects the i_pages lock to be held. */ @@ -294,44 +294,40 @@ static void page_cache_delete_batch(struct address_space *mapping, { XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); int total_pages = 0; - int i = 0; + int i = 0, tail_pages = 0; struct page *page; mapping_set_update(&xas, mapping); xas_for_each(&xas, page, ULONG_MAX) { - if (i >= pagevec_count(pvec)) + if (i >= pagevec_count(pvec) && !tail_pages) break; - - /* A swap/dax/shadow entry got inserted? Skip it. */ if (xa_is_value(page)) continue; - /* - * A page got inserted in our range? Skip it. We have our - * pages locked so they are protected from being removed. - * If we see a page whose index is higher than ours, it - * means our page has been removed, which shouldn't be - * possible because we're holding the PageLock. - */ - if (page != pvec->pages[i]) { - VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, - page); - continue; - } - - WARN_ON_ONCE(!PageLocked(page)); - - if (page->index == xas.xa_index) + if (!tail_pages) { + /* + * Some page got inserted in our range? Skip it. We + * have our pages locked so they are protected from + * being removed. + */ + if (page != pvec->pages[i]) { + VM_BUG_ON_PAGE(page->index > + pvec->pages[i]->index, page); + continue; + } + WARN_ON_ONCE(!PageLocked(page)); + if (PageTransHuge(page) && !PageHuge(page)) + tail_pages = HPAGE_PMD_NR - 1; page->mapping = NULL; - /* Leave page->index set: truncation lookup relies on it */ - - /* - * Move to the next page in the vector if this is a regular - * page or the index is of the last sub-page of this compound - * page. - */ - if (page->index + (1UL << compound_order(page)) - 1 == - xas.xa_index) + /* + * Leave page->index set: truncation lookup relies + * upon it + */ i++; + } else { + VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages + != pvec->pages[i]->index, page); + tail_pages--; + } xas_store(&xas, NULL); total_pages++; } @@ -1498,7 +1494,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) { XA_STATE(xas, &mapping->i_pages, offset); - struct page *page; + struct page *head, *page; rcu_read_lock(); repeat: @@ -1513,19 +1509,25 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) if (!page || xa_is_value(page)) goto out; - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto repeat; + /* The page was split under us? */ + if (compound_head(page) != head) { + put_page(head); + goto repeat; + } + /* - * Has the page moved or been split? + * Has the page moved? * This is part of the lockless pagecache protocol. See * include/linux/pagemap.h for details. */ if (unlikely(page != xas_reload(&xas))) { - put_page(page); + put_page(head); goto repeat; } - page = find_subpage(page, offset); out: rcu_read_unlock(); @@ -1707,6 +1709,7 @@ unsigned find_get_entries(struct address_space *mapping, rcu_read_lock(); xas_for_each(&xas, page, ULONG_MAX) { + struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1717,13 +1720,17 @@ unsigned find_get_entries(struct address_space *mapping, if (xa_is_value(page)) goto export; - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto retry; - /* Has the page moved or been split? */ + /* The page was split under us? */ + if (compound_head(page) != head) + goto put_page; + + /* Has the page moved? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - page = find_subpage(page, xas.xa_index); export: indices[ret] = xas.xa_index; @@ -1732,7 +1739,7 @@ unsigned find_get_entries(struct address_space *mapping, break; continue; put_page: - put_page(page); + put_page(head); retry: xas_reset(&xas); } @@ -1774,27 +1781,33 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, rcu_read_lock(); xas_for_each(&xas, page, end) { + struct page *head; if (xas_retry(&xas, page)) continue; /* Skip over shadow, swap and DAX entries */ if (xa_is_value(page)) continue; - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto retry; - /* Has the page moved or been split? */ + /* The page was split under us? */ + if (compound_head(page) != head) + goto put_page; + + /* Has the page moved? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = find_subpage(page, xas.xa_index); + pages[ret] = page; if (++ret == nr_pages) { *start = xas.xa_index + 1; goto out; } continue; put_page: - put_page(page); + put_page(head); retry: xas_reset(&xas); } @@ -1839,6 +1852,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); for (page = xas_load(&xas); page; page = xas_next(&xas)) { + struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1848,19 +1862,24 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, if (xa_is_value(page)) break; - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto retry; - /* Has the page moved or been split? */ + /* The page was split under us? */ + if (compound_head(page) != head) + goto put_page; + + /* Has the page moved? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = find_subpage(page, xas.xa_index); + pages[ret] = page; if (++ret == nr_pages) break; continue; put_page: - put_page(page); + put_page(head); retry: xas_reset(&xas); } @@ -1896,6 +1915,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, rcu_read_lock(); xas_for_each_marked(&xas, page, end, tag) { + struct page *head; if (xas_retry(&xas, page)) continue; /* @@ -1906,21 +1926,26 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, if (xa_is_value(page)) continue; - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) goto retry; - /* Has the page moved or been split? */ + /* The page was split under us? */ + if (compound_head(page) != head) + goto put_page; + + /* Has the page moved? */ if (unlikely(page != xas_reload(&xas))) goto put_page; - pages[ret] = find_subpage(page, xas.xa_index); + pages[ret] = page; if (++ret == nr_pages) { *index = xas.xa_index + 1; goto out; } continue; put_page: - put_page(page); + put_page(head); retry: xas_reset(&xas); } @@ -2603,7 +2628,7 @@ void filemap_map_pages(struct vm_fault *vmf, pgoff_t last_pgoff = start_pgoff; unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); - struct page *page; + struct page *head, *page; rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2612,19 +2637,24 @@ void filemap_map_pages(struct vm_fault *vmf, if (xa_is_value(page)) goto next; + head = compound_head(page); + /* * Check for a locked page first, as a speculative * reference may adversely influence page migration. */ - if (PageLocked(page)) + if (PageLocked(head)) goto next; - if (!page_cache_get_speculative(page)) + if (!page_cache_get_speculative(head)) goto next; - /* Has the page moved or been split? */ + /* The page was split under us? */ + if (compound_head(page) != head) + goto skip; + + /* Has the page moved? */ if (unlikely(page != xas_reload(&xas))) goto skip; - page = find_subpage(page, xas.xa_index); if (!PageUptodate(page) || PageReadahead(page) || diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bb8b617e34ed..885642c82aaa 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2496,9 +2496,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) shmem_uncharge(head->mapping->host, 1); put_page(head + i); - } else if (!PageAnon(page)) { - __xa_store(&head->mapping->i_pages, head[i].index, - head + i, 0); } } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0f7419938008..eaaa21b23215 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1378,7 +1378,7 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_FAIL; goto xa_locked; } - xas_store(&xas, new_page); + xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); nr_none++; continue; } @@ -1454,7 +1454,7 @@ static void collapse_shmem(struct mm_struct *mm, list_add_tail(&page->lru, &pagelist); /* Finally, replace with the new page. */ - xas_store(&xas, new_page); + xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); continue; out_unlock: unlock_page(page); diff --git a/mm/memfd.c b/mm/memfd.c index 2647c898990c..650e65a46b9c 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -39,7 +39,6 @@ static void memfd_tag_pins(struct xa_state *xas) xas_for_each(xas, page, ULONG_MAX) { if (xa_is_value(page)) continue; - page = find_subpage(page, xas->xa_index); if (page_count(page) - page_mapcount(page) > 1) xas_set_mark(xas, MEMFD_TAG_PINNED); @@ -89,7 +88,6 @@ static int memfd_wait_for_pins(struct address_space *mapping) bool clear = true; if (xa_is_value(page)) continue; - page = find_subpage(page, xas.xa_index); if (page_count(page) - page_mapcount(page) != 1) { /* * On the last scan, we clean up all those tags diff --git a/mm/migrate.c b/mm/migrate.c index f2ecc2855a12..e9594bc0d406 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -463,7 +463,7 @@ int migrate_page_move_mapping(struct address_space *mapping, for (i = 1; i < HPAGE_PMD_NR; i++) { xas_next(&xas); - xas_store(&xas, newpage); + xas_store(&xas, newpage + i); } } diff --git a/mm/shmem.c b/mm/shmem.c index 1bb3b8dc8bb2..f4dce9c8670d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -614,7 +614,7 @@ static int shmem_add_to_page_cache(struct page *page, if (xas_error(&xas)) goto unlock; next: - xas_store(&xas, page); + xas_store(&xas, page + i); if (++i < nr) { xas_next(&xas); goto next; diff --git a/mm/swap_state.c b/mm/swap_state.c index eb714165afd2..85245fdec8d9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -132,7 +132,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) for (i = 0; i < nr; i++) { VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); set_page_private(page + i, entry.val + i); - xas_store(&xas, page); + xas_store(&xas, page + i); xas_next(&xas); } address_space->nrpages += nr; @@ -167,7 +167,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry) for (i = 0; i < nr; i++) { void *entry = xas_store(&xas, NULL); - VM_BUG_ON_PAGE(entry != page, entry); + VM_BUG_ON_PAGE(entry != page + i, entry); set_page_private(page + i, 0); xas_next(&xas); } From 7e41c3c9b6ceb2da52ba9d2b328d1851f269a48e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 6 Jul 2019 17:50:32 +0200 Subject: [PATCH 206/207] blk-mq: fix up placement of debugfs directory of queue files When the blk-mq debugfs file creation logic was "cleaned up" it was cleaned up too much, causing the queue file to not be created in the correct location. Turns out the check for the directory being present is needed as if that has not happened yet, the files should not be created, and the function will be called later on in the initialization code so that the files can be created in the correct location. Fixes: 6cfc0081b046 ("blk-mq: no need to check return value of debugfs_create functions") Reported-by: Stephen Rothwell Cc: linux-block@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 2489ddbb21db..3afe327f816f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -934,6 +934,13 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + /* + * If the parent directory has not been created yet, return, we will be + * called again later on and the directory/files will be created then. + */ + if (!q->debugfs_dir) + return; + if (!e->queue_debugfs_attrs) return; From 0ecfebd2b52404ae0c54a878c872bb93363ada36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Jul 2019 15:41:56 -0700 Subject: [PATCH 207/207] Linux 5.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fabc127d127f..3e4868a6498b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Bobtail Squid # *DOCUMENTATION*