mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 12:21:22 -05:00
Merge branch 'next' into for-linus
Prepare input updates for 6.18 merge window.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -114,6 +114,7 @@ modules.order
|
||||
!.gitignore
|
||||
!.kunitconfig
|
||||
!.mailmap
|
||||
!.pylintrc
|
||||
!.rustfmt.toml
|
||||
|
||||
#
|
||||
|
||||
30
.mailmap
30
.mailmap
@@ -138,6 +138,7 @@ Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
|
||||
Benno Lossin <lossin@kernel.org> <benno.lossin@proton.me>
|
||||
Bernard Metzler <bernard.metzler@linux.dev> <bmt@zurich.ibm.com>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
|
||||
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
|
||||
@@ -197,6 +198,7 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
|
||||
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
|
||||
Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
|
||||
David Brownell <david-b@pacbell.net>
|
||||
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
|
||||
David Heidelberg <david@ixit.cz> <d.okias@gmail.com>
|
||||
@@ -222,6 +224,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com>
|
||||
Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com>
|
||||
Domen Puncer <domen@coderock.org>
|
||||
Douglas Gilbert <dougg@torque.net>
|
||||
Drew Fustini <fustini@kernel.org> <drew@pdp7.com>
|
||||
<duje@dujemihanovic.xyz> <duje.mihanovic@skole.hr>
|
||||
Ed L. Cashin <ecashin@coraid.com>
|
||||
Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
|
||||
@@ -282,8 +286,10 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br>
|
||||
Gustavo Padovan <padovan@profusion.mobi>
|
||||
Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com>
|
||||
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
|
||||
Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com>
|
||||
Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl>
|
||||
Hans de Goede <hansg@kernel.org> <hdegoede@redhat.com>
|
||||
Hans Verkuil <hverkuil@kernel.org> <hverkuil@xs4all.nl>
|
||||
Hans Verkuil <hverkuil@kernel.org> <hverkuil-cisco@xs4all.nl>
|
||||
Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com>
|
||||
Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com>
|
||||
Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com>
|
||||
Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com>
|
||||
@@ -412,6 +418,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||
Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
|
||||
Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
|
||||
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
|
||||
Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com>
|
||||
Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com>
|
||||
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org>
|
||||
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org>
|
||||
@@ -426,6 +433,9 @@ Krzysztof Wilczyński <kwilczynski@kernel.org> <krzysztof.wilczynski@linux.com>
|
||||
Krzysztof Wilczyński <kwilczynski@kernel.org> <kw@linux.com>
|
||||
Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
|
||||
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
|
||||
Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.com>
|
||||
Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.co.jp>
|
||||
Kuniyuki Iwashima <kuniyu@google.com> <kuni1840@gmail.com>
|
||||
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
|
||||
Lee Jones <lee@kernel.org> <joneslee@google.com>
|
||||
Lee Jones <lee@kernel.org> <lee.jones@canonical.com>
|
||||
@@ -663,6 +673,7 @@ Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
|
||||
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
|
||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||
Sachin Mokashi <sachin.mokashi@intel.com> <sachinx.mokashi@intel.com>
|
||||
Sachin P Sant <ssant@in.ibm.com>
|
||||
Sai Prakash Ranjan <quic_saipraka@quicinc.com> <saiprakash.ranjan@codeaurora.org>
|
||||
Sakari Ailus <sakari.ailus@linux.intel.com> <sakari.ailus@iki.fi>
|
||||
@@ -686,11 +697,16 @@ Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
|
||||
Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
|
||||
Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com>
|
||||
Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com>
|
||||
Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky.work@gmail.com>
|
||||
Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@gmail.com>
|
||||
Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@mail.by>
|
||||
Sergey Senozhatsky <senozhatsky@chromium.org> <senozhatsky@google.com>
|
||||
Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
|
||||
Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com>
|
||||
Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
|
||||
Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
|
||||
Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
|
||||
Shannon Nelson <sln@onemain.com> <shannon.nelson@amd.com>
|
||||
Shannon Nelson <sln@onemain.com> <snelson@pensando.io>
|
||||
Shannon Nelson <sln@onemain.com> <shannon.nelson@intel.com>
|
||||
Shannon Nelson <sln@onemain.com> <shannon.nelson@oracle.com>
|
||||
Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org>
|
||||
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
|
||||
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
|
||||
@@ -719,6 +735,7 @@ Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
|
||||
Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
|
||||
Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> <sriram.yagnaraman@est.tech>
|
||||
Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com>
|
||||
Stanislav Fomichev <sdf@fomichev.me> <stfomichev@gmail.com>
|
||||
Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com>
|
||||
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
|
||||
@@ -823,3 +840,6 @@ Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
|
||||
Yusuke Goda <goda.yusuke@renesas.com>
|
||||
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
|
||||
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
|
||||
Zijun Hu <zijun.hu@oss.qualcomm.com> <quic_zijuhu@quicinc.com>
|
||||
Zijun Hu <zijun.hu@oss.qualcomm.com> <zijuhu@codeaurora.org>
|
||||
Zijun Hu <zijun_hu@htc.com>
|
||||
|
||||
15
CREDITS
15
CREDITS
@@ -1397,6 +1397,10 @@ N: Thomas Gleixner
|
||||
E: tglx@linutronix.de
|
||||
D: NAND flash hardware support, JFFS2 on NAND flash
|
||||
|
||||
N: Jérôme Glisse
|
||||
E: jglisse@redhat.com
|
||||
D: HMM - Heterogeneous Memory Management
|
||||
|
||||
N: Richard E. Gooch
|
||||
E: rgooch@atnf.csiro.au
|
||||
D: parent process death signal to children
|
||||
@@ -2981,6 +2985,11 @@ S: 521 Pleasant Valley Road
|
||||
S: Potsdam, New York 13676
|
||||
S: USA
|
||||
|
||||
N: Shannon Nelson
|
||||
E: sln@onemain.com
|
||||
D: Worked on several network drivers including
|
||||
D: ixgbe, i40e, ionic, pds_core, pds_vdpa, pds_fwctl
|
||||
|
||||
N: Dave Neuer
|
||||
E: dave.neuer@pobox.com
|
||||
D: Helped implement support for Compaq's H31xx series iPAQs
|
||||
@@ -4369,6 +4378,12 @@ S: 542 West 112th Street, 5N
|
||||
S: New York, New York 10025
|
||||
S: USA
|
||||
|
||||
N: Masahiro Yamada
|
||||
E: masahiroy@kernel.org
|
||||
D: Kbuild Maintainer 2017-2025
|
||||
D: Kconfig Maintainer 2018-2025
|
||||
S: Japan
|
||||
|
||||
N: Li Yang
|
||||
E: leoli@freescale.com
|
||||
D: Freescale Highspeed USB device driver
|
||||
|
||||
@@ -46,7 +46,9 @@ Every file in these directories will contain the following information:
|
||||
|
||||
What: Short description of the interface
|
||||
Date: Date created
|
||||
KernelVersion: Kernel version this feature first showed up in.
|
||||
KernelVersion: (Optional) Kernel version this feature first showed up in.
|
||||
Note: git history often provides more accurate version
|
||||
info, so this field may be omitted.
|
||||
Contact: Primary contact for this interface (may be a mailing list)
|
||||
Description: Long description of the interface and how to use it.
|
||||
Users: All users of this interface who wish to be notified when
|
||||
|
||||
20
Documentation/ABI/obsolete/automount-tracefs-debugfs
Normal file
20
Documentation/ABI/obsolete/automount-tracefs-debugfs
Normal file
@@ -0,0 +1,20 @@
|
||||
What: /sys/kernel/debug/tracing
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.27
|
||||
Contact: linux-trace-kernel@vger.kernel.org
|
||||
Description:
|
||||
|
||||
The ftrace was first added to the kernel, its interface was placed
|
||||
into the debugfs file system under the "tracing" directory. Access
|
||||
to the files were in /sys/kernel/debug/tracing. As systems wanted
|
||||
access to the tracing interface without having to enable debugfs, a
|
||||
new interface was created called "tracefs". This was a stand alone
|
||||
file system and was usually mounted in /sys/kernel/tracing.
|
||||
|
||||
To allow older tooling to continue to operate, when mounting
|
||||
debugfs, the tracefs file system would automatically get mounted in
|
||||
the "tracing" directory of debugfs. The tracefs interface was added
|
||||
in January 2015 in the v4.1 kernel.
|
||||
|
||||
All tooling should now be using tracefs directly and the "tracing"
|
||||
directory in debugfs should be removed by January 2030.
|
||||
@@ -48,10 +48,6 @@ What: /sys/.../iio:deviceX/scan_elements/in_timestamp_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_incli_x_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_incli_y_en
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_pressureY_en
|
||||
@@ -73,10 +69,6 @@ What: /sys/.../iio:deviceX/scan_elements/in_incli_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_timestamp_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_pressureY_type
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_pressure_type
|
||||
@@ -110,10 +102,6 @@ Description:
|
||||
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_accel_x_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_accel_y_index
|
||||
What: /sys/.../iio:deviceX/scan_elements/in_accel_z_index
|
||||
|
||||
10
Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
Normal file
10
Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
Normal file
@@ -0,0 +1,10 @@
|
||||
What: /sys/devices/platform/samsung/battery_life_extender
|
||||
Date: December 1, 2011
|
||||
KernelVersion: 3.3
|
||||
Contact: Corentin Chary <corentin.chary@gmail.com>
|
||||
Description: Max battery charge level can be modified, battery cycle
|
||||
life can be extended by reducing the max battery charge
|
||||
level.
|
||||
|
||||
- 0 means normal battery mode (100% charge)
|
||||
- 1 means battery life extender mode (80% charge)
|
||||
@@ -19,14 +19,22 @@ Description:
|
||||
/export ... asks the kernel to export a GPIO to userspace
|
||||
/unexport ... to return a GPIO to the kernel
|
||||
/gpioN ... for each exported GPIO #N OR
|
||||
/<LINE-NAME> ... for a properly named GPIO line
|
||||
/value ... always readable, writes fail for input GPIOs
|
||||
/direction ... r/w as: in, out (default low); write: high, low
|
||||
/edge ... r/w as: none, falling, rising, both
|
||||
/active_low ... r/w as: 0, 1
|
||||
/gpiochipN ... for each gpiochip; #N is its first GPIO
|
||||
/base ... (r/o) same as N
|
||||
/label ... (r/o) descriptive, not necessarily unique
|
||||
/label ... (r/o) descriptive chip name
|
||||
/ngpio ... (r/o) number of GPIOs; numbered N to N + (ngpio - 1)
|
||||
/gpio<OFFSET>
|
||||
/value ... always readable, writes fail for input GPIOs
|
||||
/direction ... r/w as: in, out (default low); write: high, low
|
||||
/chipX ... for each gpiochip; #X is the gpio device ID
|
||||
/export ... asks the kernel to export a GPIO at HW offset X to userspace
|
||||
/unexport ... to return a GPIO at HW offset X to the kernel
|
||||
/label ... (r/o) descriptive chip name
|
||||
/ngpio ... (r/o) number of GPIOs exposed by the chip
|
||||
|
||||
This ABI is obsoleted by Documentation/ABI/testing/gpio-cdev and will be
|
||||
removed after 2020.
|
||||
|
||||
8
Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
Normal file
8
Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
Normal file
@@ -0,0 +1,8 @@
|
||||
What: /sys/bus/platform/devices/VPC2004:*/conservation_mode
|
||||
Date: Aug 2017
|
||||
KernelVersion: 4.14
|
||||
Contact: platform-driver-x86@vger.kernel.org
|
||||
Description:
|
||||
Controls whether the conservation mode is enabled or not.
|
||||
This feature limits the maximum battery charge percentage to
|
||||
around 50-60% in order to prolong the lifetime of the battery.
|
||||
@@ -731,7 +731,7 @@ Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] If the device is registered for writeback throttling, then
|
||||
this file shows the target minimum read latency. If this latency
|
||||
is exceeded in a given window of time (see wb_window_usec), then
|
||||
is exceeded in a given window of time (see curr_win_nsec), then
|
||||
the writeback throttling will start scaling back writes. Writing
|
||||
a value of '0' to this file disables the feature. Writing a
|
||||
value of '-1' to this file resets the value to the default
|
||||
@@ -778,6 +778,39 @@ Description:
|
||||
0, write zeroes is not supported by the device.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/write_zeroes_unmap_max_hw_bytes
|
||||
Date: January 2025
|
||||
Contact: Zhang Yi <yi.zhang@huawei.com>
|
||||
Description:
|
||||
[RO] This file indicates whether a device supports zeroing data
|
||||
in a specified block range without incurring the cost of
|
||||
physically writing zeroes to the media for each individual
|
||||
block. If this parameter is set to write_zeroes_max_bytes, the
|
||||
device implements a zeroing operation which opportunistically
|
||||
avoids writing zeroes to media while still guaranteeing that
|
||||
subsequent reads from the specified block range will return
|
||||
zeroed data. This operation is a best-effort optimization, a
|
||||
device may fall back to physically writing zeroes to the media
|
||||
due to other factors such as misalignment or being asked to
|
||||
clear a block range smaller than the device's internal
|
||||
allocation unit. If this parameter is set to 0, the device may
|
||||
have to write each logical block media during a zeroing
|
||||
operation.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/write_zeroes_unmap_max_bytes
|
||||
Date: January 2025
|
||||
Contact: Zhang Yi <yi.zhang@huawei.com>
|
||||
Description:
|
||||
[RW] While write_zeroes_unmap_max_hw_bytes is the hardware limit
|
||||
for the device, this setting is the software limit. Since the
|
||||
unmap write zeroes operation is a best-effort optimization, some
|
||||
devices may still physically writing zeroes to media. So the
|
||||
speed of this operation is not guaranteed. Writing a value of
|
||||
'0' to this file disables this operation. Otherwise, this
|
||||
parameter should be equal to write_zeroes_unmap_max_hw_bytes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/zone_append_max_bytes
|
||||
Date: May 2020
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
||||
@@ -227,3 +227,12 @@ Contact: Jiaqi Yan <jiaqiyan@google.com>
|
||||
Description:
|
||||
Of the raw poisoned pages on a NUMA node, how many pages are
|
||||
recovered by memory error recovery attempt.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/reclaim
|
||||
Date: June 2025
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Perform user-triggered proactive reclaim on a NUMA node.
|
||||
This interface is equivalent to the memcg variant.
|
||||
|
||||
See Documentation/admin-guide/cgroup-v2.rst
|
||||
|
||||
5
Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
Normal file
5
Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
Normal file
@@ -0,0 +1,5 @@
|
||||
What: /sys/kernel/time/aux_clocks/<ID>/enable
|
||||
Date: May 2025
|
||||
Contact: Thomas Gleixner <tglx@linutronix.de>
|
||||
Description:
|
||||
Controls the enablement of auxiliary clock timekeepers.
|
||||
131
Documentation/ABI/testing/debugfs-amd-iommu
Normal file
131
Documentation/ABI/testing/debugfs-amd-iommu
Normal file
@@ -0,0 +1,131 @@
|
||||
What: /sys/kernel/debug/iommu/amd/iommu<x>/mmio
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file provides read/write access for user input. Users specify the
|
||||
MMIO register offset for iommu<x>, and the file outputs the corresponding
|
||||
MMIO register value of iommu<x>
|
||||
|
||||
Example::
|
||||
|
||||
$ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio
|
||||
$ cat /sys/kernel/debug/iommu/amd/iommu00/mmio
|
||||
|
||||
Output::
|
||||
|
||||
Offset:0x18 Value:0x000c22000003f48d
|
||||
|
||||
What: /sys/kernel/debug/iommu/amd/iommu<x>/capability
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file provides read/write access for user input. Users specify the
|
||||
capability register offset for iommu<x>, and the file outputs the
|
||||
corresponding capability register value of iommu<x>.
|
||||
|
||||
Example::
|
||||
|
||||
$ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability
|
||||
$ cat /sys/kernel/debug/iommu/amd/iommu00/capability
|
||||
|
||||
Output::
|
||||
|
||||
Offset:0x10 Value:0x00203040
|
||||
|
||||
What: /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file is a read-only output file containing iommu<x> command
|
||||
buffer entries.
|
||||
|
||||
Examples::
|
||||
|
||||
$ cat /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
|
||||
|
||||
Output::
|
||||
|
||||
CMD Buffer Head Offset:339 Tail Offset:339
|
||||
0: 00835001 10000001 00003c00 00000000
|
||||
1: 00000000 30000005 fffff003 7fffffff
|
||||
2: 00835001 10000001 00003c01 00000000
|
||||
3: 00000000 30000005 fffff003 7fffffff
|
||||
4: 00835001 10000001 00003c02 00000000
|
||||
5: 00000000 30000005 fffff003 7fffffff
|
||||
6: 00835001 10000001 00003c03 00000000
|
||||
7: 00000000 30000005 fffff003 7fffffff
|
||||
8: 00835001 10000001 00003c04 00000000
|
||||
9: 00000000 30000005 fffff003 7fffffff
|
||||
10: 00835001 10000001 00003c05 00000000
|
||||
11: 00000000 30000005 fffff003 7fffffff
|
||||
[...]
|
||||
|
||||
What: /sys/kernel/debug/iommu/amd/devid
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file provides read/write access for user input. Users specify the
|
||||
device ID, which can be used to dump IOMMU data structures such as the
|
||||
interrupt remapping table and device table.
|
||||
|
||||
Example:
|
||||
|
||||
1.
|
||||
::
|
||||
|
||||
$ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid
|
||||
$ cat /sys/kernel/debug/iommu/amd/devid
|
||||
|
||||
Output::
|
||||
|
||||
0000:01:00.0
|
||||
|
||||
2.
|
||||
::
|
||||
|
||||
$ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid
|
||||
$ cat /sys/kernel/debug/iommu/amd/devid
|
||||
|
||||
Output::
|
||||
|
||||
0000:01:00.0
|
||||
|
||||
What: /sys/kernel/debug/iommu/amd/devtbl
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file is a read-only output file containing the device table entry
|
||||
for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
|
||||
|
||||
Example::
|
||||
|
||||
$ cat /sys/kernel/debug/iommu/amd/devtbl
|
||||
|
||||
Output::
|
||||
|
||||
DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu
|
||||
0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3
|
||||
|
||||
What: /sys/kernel/debug/iommu/amd/irqtbl
|
||||
Date: January 2025
|
||||
Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
|
||||
Description:
|
||||
This file is a read-only output file containing valid IRT table entries
|
||||
for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
|
||||
|
||||
Example::
|
||||
|
||||
$ cat /sys/kernel/debug/iommu/amd/irqtbl
|
||||
|
||||
Output::
|
||||
|
||||
DeviceId 0000:01:00.0
|
||||
IRT[0000] 0000000000000020 0000000000000241
|
||||
IRT[0001] 0000000000000020 0000000000000841
|
||||
IRT[0002] 0000000000000020 0000000000002041
|
||||
IRT[0003] 0000000000000020 0000000000008041
|
||||
IRT[0004] 0000000000000020 0000000000020041
|
||||
IRT[0005] 0000000000000020 0000000000080041
|
||||
IRT[0006] 0000000000000020 0000000000200041
|
||||
IRT[0007] 0000000000000020 0000000000800041
|
||||
[...]
|
||||
@@ -20,7 +20,7 @@ Description:
|
||||
visible for devices supporting the capability.
|
||||
|
||||
|
||||
What: /sys/kernel/debug/memX/clear_poison
|
||||
What: /sys/kernel/debug/cxl/memX/clear_poison
|
||||
Date: April, 2023
|
||||
KernelVersion: v6.4
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
|
||||
@@ -67,7 +67,7 @@ Contact: qat-linux@intel.com
|
||||
Description: (RO) Read returns power management information specific to the
|
||||
QAT device.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/kernel/debug/qat_<device>_<BDF>/cnv_errors
|
||||
Date: January 2024
|
||||
|
||||
@@ -32,7 +32,7 @@ Description: (RW) Enables/disables the reporting of telemetry metrics.
|
||||
|
||||
echo 0 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/control
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/device_data
|
||||
Date: March 2024
|
||||
@@ -67,6 +67,10 @@ Description: (RO) Reports device telemetry counters.
|
||||
exec_xlt<N> execution count of Translator slice N
|
||||
util_dcpr<N> utilization of Decompression slice N [%]
|
||||
exec_dcpr<N> execution count of Decompression slice N
|
||||
util_cnv<N> utilization of Compression and verify slice N [%]
|
||||
exec_cnv<N> execution count of Compression and verify slice N
|
||||
util_dcprz<N> utilization of Decompression slice N [%]
|
||||
exec_dcprz<N> execution count of Decompression slice N
|
||||
util_pke<N> utilization of PKE N [%]
|
||||
exec_pke<N> execution count of PKE N
|
||||
util_ucs<N> utilization of UCS slice N [%]
|
||||
@@ -100,7 +104,7 @@ Description: (RO) Reports device telemetry counters.
|
||||
If a device lacks of a specific accelerator, the corresponding
|
||||
attribute is not reported.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/rp_<A/B/C/D>_data
|
||||
Date: March 2024
|
||||
@@ -225,4 +229,4 @@ Description: (RW) Selects up to 4 Ring Pairs (RP) to monitor, one per file,
|
||||
``rp2srv`` from sysfs.
|
||||
See Documentation/ABI/testing/sysfs-driver-qat for details.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
|
||||
The pktcdvd module (packet writing driver) creates
|
||||
these files in debugfs:
|
||||
|
||||
/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
|
||||
|
||||
==== ====== ====================================
|
||||
info 0444 Lots of driver statistics and infos.
|
||||
==== ====== ====================================
|
||||
|
||||
Example::
|
||||
|
||||
cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
|
||||
@@ -1,6 +1,6 @@
|
||||
What: /sys/bus/acpi/devices/.../path
|
||||
Date: December 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute indicates the full path of ACPI namespace
|
||||
object associated with the device object. For example,
|
||||
@@ -12,7 +12,7 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../modalias
|
||||
Date: July 2007
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute indicates the PNP IDs of the device object.
|
||||
That is acpi:HHHHHHHH:[CCCCCCC:]. Where each HHHHHHHH or
|
||||
@@ -20,7 +20,7 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../hid
|
||||
Date: April 2005
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute indicates the hardware ID (_HID) of the
|
||||
device object. For example, PNP0103.
|
||||
@@ -29,14 +29,14 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../description
|
||||
Date: October 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute contains the output of the device object's
|
||||
_STR control method, if present.
|
||||
|
||||
What: /sys/bus/acpi/devices/.../adr
|
||||
Date: October 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute contains the output of the device object's
|
||||
_ADR control method, which is present for ACPI device
|
||||
@@ -45,14 +45,14 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../uid
|
||||
Date: October 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
This attribute contains the output of the device object's
|
||||
_UID control method, if present.
|
||||
|
||||
What: /sys/bus/acpi/devices/.../eject
|
||||
Date: December 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
Writing 1 to this attribute will trigger hot removal of
|
||||
this device object. This file exists for every device
|
||||
@@ -60,7 +60,7 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../status
|
||||
Date: Jan, 2014
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
(RO) Returns the ACPI device status: enabled, disabled or
|
||||
functioning or present, if the method _STA is present.
|
||||
@@ -90,7 +90,7 @@ Description:
|
||||
|
||||
What: /sys/bus/acpi/devices/.../hrv
|
||||
Date: Apr, 2016
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
(RO) Allows users to read the hardware version of non-PCI
|
||||
hardware, if the _HRV control method is present. It is mostly
|
||||
|
||||
@@ -141,8 +141,6 @@ Description:
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_raw
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@@ -417,18 +415,14 @@ What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_q_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_i_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_q_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_i_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
|
||||
@@ -456,21 +450,15 @@ Description:
|
||||
to the _raw output.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_i_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_current_q_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
|
||||
@@ -559,6 +547,30 @@ Description:
|
||||
- a small discrete set of values like "0 2 4 6 8"
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay
|
||||
KernelVersion: 6.17
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Delay of start of conversion from common reference point shared
|
||||
by all channels. Can be writable when used to compensate for
|
||||
delay variation introduced by external filters feeding a
|
||||
simultaneous sampling ADC.
|
||||
|
||||
E.g., for the ad7606 ADC series, this value is intended as a
|
||||
configurable time delay in seconds, to correct delay introduced
|
||||
by an optional external filtering circuit.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay_available
|
||||
KernelVersion: 6.16
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Available values of convdelay. Maybe expressed as:
|
||||
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
If shared across all channels, <type>_convdelay_available
|
||||
is used.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
|
||||
@@ -579,11 +591,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
|
||||
@@ -805,7 +813,11 @@ Description:
|
||||
all the other channels, since it involves changing the VCO
|
||||
fundamental output frequency.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_phase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_phase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_phase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_i_phase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_q_phase
|
||||
KernelVersion: 3.4.0
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@@ -1434,10 +1446,6 @@ What: /sys/.../iio:deviceX/bufferY/in_timestamp_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY-voltageZ_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_i_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_q_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_incli_x_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_incli_y_en
|
||||
What: /sys/.../iio:deviceX/bufferY/in_pressureY_en
|
||||
@@ -1458,10 +1466,6 @@ What: /sys/.../iio:deviceX/bufferY/in_incli_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_i_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_q_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_timestamp_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_pressureY_type
|
||||
What: /sys/.../iio:deviceX/bufferY/in_pressure_type
|
||||
@@ -1499,10 +1503,6 @@ Description:
|
||||
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_i_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_voltage_q_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_accel_x_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_accel_y_index
|
||||
What: /sys/.../iio:deviceX/bufferY/in_accel_z_index
|
||||
@@ -1692,8 +1692,6 @@ Description:
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_raw
|
||||
KernelVersion: 3.17
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@@ -2278,6 +2276,9 @@ Description:
|
||||
Reading returns a list with the possible filter modes. Options
|
||||
for the attribute:
|
||||
|
||||
* "none" - Filter is disabled/bypassed.
|
||||
* "sinc1" - The digital sinc1 filter. Fast 1st
|
||||
conversion time. Poor noise performance.
|
||||
* "sinc3" - The digital sinc3 filter. Moderate 1st
|
||||
conversion time. Good noise performance.
|
||||
* "sinc4" - Sinc 4. Excellent noise performance. Long
|
||||
@@ -2293,6 +2294,8 @@ Description:
|
||||
* "sinc3+pf2" - Sinc3 + device specific Post Filter 2.
|
||||
* "sinc3+pf3" - Sinc3 + device specific Post Filter 3.
|
||||
* "sinc3+pf4" - Sinc3 + device specific Post Filter 4.
|
||||
* "sinc5+pf1" - Sinc5 + device specific Post Filter 1.
|
||||
* "sinc5+avg" - Sinc5 + averaging by 4.
|
||||
* "wideband" - filter with wideband low ripple passband
|
||||
and sharp transition band.
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_i_calibphase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_i_calibphase
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write unscaled value for the Local Oscillatior path quadrature I phase shift.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_q_calibphase
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_q_calibphase
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
|
||||
@@ -132,3 +132,12 @@ Description:
|
||||
|
||||
A list of governors that support the node:
|
||||
- simple_ondemand
|
||||
|
||||
What: /sys/class/devfreq/.../related_cpus
|
||||
Date: June 2025
|
||||
Contact: Linux power management list <linux-pm@vger.kernel.org>
|
||||
Description: The list of CPUs whose performance is closely related to the
|
||||
frequency of this devfreq domain.
|
||||
|
||||
This file is only present if a specific devfreq device is
|
||||
closely associated with a subset of CPUs.
|
||||
|
||||
134
Documentation/ABI/testing/sysfs-class-intel_pmt-features
Normal file
134
Documentation/ABI/testing/sysfs-class-intel_pmt-features
Normal file
@@ -0,0 +1,134 @@
|
||||
What: /sys/class/intel_pmt/features-<PCI BDF>/
|
||||
Date: 2025-04-24
|
||||
KernelVersion: 6.16
|
||||
Contact: david.e.box@linux.intel.com
|
||||
Description:
|
||||
The `features-<PCI BDF>/` directory represents the "features"
|
||||
capability exposed by Intel PMT (Platform Monitoring Technology)
|
||||
for the given PCI device.
|
||||
|
||||
Each directory corresponds to a PMT feature and contains
|
||||
attributes describing the available telemetry, monitoring, or
|
||||
control functionalities.
|
||||
|
||||
Directory Structure:
|
||||
|
||||
/sys/class/intel_pmt/features-<PCI BDF>/
|
||||
├── accelerator_telemetry/ # Per-accelerator telemetry data
|
||||
├── crash_log/ # Contains system crash telemetry logs
|
||||
├── per_core_environment_telemetry/ # Environmental telemetry per core
|
||||
├── per_core_performance_telemetry/ # Performance telemetry per core
|
||||
├── per_rmid_energy_telemetry/ # Energy telemetry for RMIDs
|
||||
├── per_rmid_perf_telemetry/ # Performance telemetry for RMIDs
|
||||
├── tpmi_control/ # TPMI-related controls and telemetry
|
||||
├── tracing/ # PMT tracing features
|
||||
└── uncore_telemetry/ # Uncore telemetry data
|
||||
|
||||
Common Files (Present in all feature directories):
|
||||
|
||||
caps
|
||||
- Read-only
|
||||
- Lists available capabilities for this feature.
|
||||
|
||||
guids
|
||||
- Read-only
|
||||
- Lists GUIDs associated with this feature.
|
||||
|
||||
Additional Attributes (Conditional Presence):
|
||||
|
||||
max_command_size
|
||||
- Read-only
|
||||
- Present if the feature supports out-of-band MCTP access.
|
||||
- Maximum supported MCTP command size for out-of-band PMT access (bytes).
|
||||
|
||||
max_stream_size
|
||||
- Read-only
|
||||
- Present if the feature supports out-of-band MCTP access.
|
||||
- Maximum supported MCTP stream size (bytes).
|
||||
|
||||
min_watcher_period_ms
|
||||
- Read-only
|
||||
- Present if the feature supports the watcher API.
|
||||
The watcher API provides a writable control interface that allows user
|
||||
configuration of monitoring behavior, such as setting the sampling or
|
||||
reporting interval.
|
||||
- Minimum supported time period for the watcher interface (milliseconds).
|
||||
|
||||
num_rmids
|
||||
- Read-only
|
||||
- Present if the feature supports RMID (Resource Monitoring ID) telemetry.
|
||||
RMIDs are identifiers used by hardware to track and report resource usage,
|
||||
such as memory bandwidth or energy consumption, on a per-logical-entity
|
||||
basis (e.g., per core, thread, or process group).
|
||||
- Maximum number of RMIDs tracked simultaneously.
|
||||
|
||||
Example:
|
||||
For a device with PCI BDF `0000:00:03.1`, the directory tree could look like:
|
||||
|
||||
/sys/class/intel_pmt/features-0000:00:03.1/
|
||||
├── accelerator_telemetry/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
│ ├── max_command_size
|
||||
│ ├── max_stream_size
|
||||
│ ├── min_watcher_period_ms
|
||||
├── crash_log/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
│ ├── max_command_size
|
||||
│ ├── max_stream_size
|
||||
├── per_core_environment_telemetry/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
│ ├── max_command_size
|
||||
│ ├── max_stream_size
|
||||
│ ├── min_watcher_period_ms
|
||||
├── per_rmid_energy_telemetry/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
│ ├── max_command_size
|
||||
│ ├── max_stream_size
|
||||
│ ├── min_watcher_period_ms
|
||||
│ ├── num_rmids
|
||||
├── tpmi_control/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
├── tracing/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
├── uncore_telemetry/
|
||||
│ ├── caps
|
||||
│ ├── guids
|
||||
│ ├── max_command_size
|
||||
│ ├── max_stream_size
|
||||
│ ├── min_watcher_period_ms
|
||||
|
||||
Notes:
|
||||
- Some attributes are only present if the corresponding feature supports
|
||||
the capability (e.g., `max_command_size` for MCTP-capable features).
|
||||
- Features supporting RMIDs include `num_rmids`.
|
||||
- Features supporting the watcher API include `min_watcher_period_ms`.
|
||||
- The `caps` file provides additional information about the functionality
|
||||
of the feature.
|
||||
|
||||
Example 'caps' content for the 'tracing' feature:
|
||||
|
||||
/sys/class/intel_pmt/features-0000:00:03.1/
|
||||
├── tracing/
|
||||
│ ├── caps
|
||||
|
||||
telemetry Available: No
|
||||
watcher Available: Yes
|
||||
crashlog Available: No
|
||||
streaming Available: No
|
||||
threashold Available: No
|
||||
window Available: No
|
||||
config Available: Yes
|
||||
tracing Available: No
|
||||
inband Available: Yes
|
||||
oob Available: Yes
|
||||
secure_chan Available: No
|
||||
pmt_sp Available: Yes
|
||||
pmt_sp_policy Available: Yes
|
||||
mailbox Available: Yes
|
||||
bios_lock Available: Yes
|
||||
@@ -26,6 +26,16 @@ Description:
|
||||
This ID is used to match the device with the appropriate
|
||||
driver.
|
||||
|
||||
What: /sys/class/mdio_bus/<bus>/<device>/c45_phy_ids/mmd<n>_device_id
|
||||
Date: June 2025
|
||||
KernelVersion: 6.17
|
||||
Contact: netdev@vger.kernel.org
|
||||
Description:
|
||||
This attribute contains the 32-bit PHY Identifier as reported
|
||||
by the device during bus enumeration, encoded in hexadecimal.
|
||||
These C45 IDs are used to match the device with the appropriate
|
||||
driver. These files are invisible to the C22 device.
|
||||
|
||||
What: /sys/class/mdio_bus/<bus>/<device>/phy_interface
|
||||
Date: February 2014
|
||||
KernelVersion: 3.15
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
sysfs interface
|
||||
---------------
|
||||
The pktcdvd module (packet writing driver) creates the following files in the
|
||||
sysfs: (<devid> is in the format major:minor)
|
||||
|
||||
What: /sys/class/pktcdvd/add
|
||||
What: /sys/class/pktcdvd/remove
|
||||
What: /sys/class/pktcdvd/device_map
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
|
||||
========== ==============================================
|
||||
add (WO) Write a block device id (major:minor) to
|
||||
create a new pktcdvd device and map it to the
|
||||
block device.
|
||||
|
||||
remove (WO) Write the pktcdvd device id (major:minor)
|
||||
to remove the pktcdvd device.
|
||||
|
||||
device_map (RO) Shows the device mapping in format:
|
||||
pktcdvd[0-7] <pktdevid> <blkdevid>
|
||||
========== ==============================================
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/dev
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
dev: (RO) Device id
|
||||
|
||||
uevent: (WO) To send a uevent
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
packets_started: (RO) Number of started packets.
|
||||
|
||||
packets_finished: (RO) Number of finished packets.
|
||||
|
||||
kb_written: (RO) kBytes written.
|
||||
|
||||
kb_read: (RO) kBytes read.
|
||||
|
||||
kb_read_gather: (RO) kBytes read to fill write packets.
|
||||
|
||||
reset: (WO) Write any value to it to reset
|
||||
pktcdvd device statistic values, like
|
||||
bytes read/written.
|
||||
|
||||
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
|
||||
What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
|
||||
Date: Oct. 2006
|
||||
KernelVersion: 2.6.20
|
||||
Contact: Thomas Maier <balagi@justmail.de>
|
||||
Description:
|
||||
============== ================================================
|
||||
size (RO) Contains the size of the bio write queue.
|
||||
|
||||
congestion_off (RW) If bio write queue size is below this mark,
|
||||
accept new bio requests from the block layer.
|
||||
|
||||
congestion_on (RW) If bio write queue size is higher as this
|
||||
mark, do no longer accept bio write requests
|
||||
from the block layer and wait till the pktcdvd
|
||||
device has processed enough bio's so that bio
|
||||
write queue size is below congestion off mark.
|
||||
A value of <= 0 disables congestion control.
|
||||
============== ================================================
|
||||
|
||||
|
||||
Example:
|
||||
--------
|
||||
To use the pktcdvd sysfs interface directly, you can do::
|
||||
|
||||
# create a new pktcdvd device mapped to /dev/hdc
|
||||
echo "22:0" >/sys/class/pktcdvd/add
|
||||
cat /sys/class/pktcdvd/device_map
|
||||
# assuming device pktcdvd0 was created, look at stat's
|
||||
cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
|
||||
# print the device id of the mapped block device
|
||||
fgrep pktcdvd0 /sys/class/pktcdvd/device_map
|
||||
# remove device, using pktcdvd0 device id 253:0
|
||||
echo "253:0" >/sys/class/pktcdvd/remove
|
||||
@@ -1,6 +1,6 @@
|
||||
What: /sys/devices/.../power/
|
||||
Date: January 2009
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power directory contains attributes
|
||||
allowing the user space to check and modify some power
|
||||
@@ -8,7 +8,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup
|
||||
Date: January 2009
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/wakeup attribute allows the user
|
||||
space to check if the device is enabled to wake up the system
|
||||
@@ -34,7 +34,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/control
|
||||
Date: January 2009
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/control attribute allows the user
|
||||
space to control the run-time power management of the device.
|
||||
@@ -53,10 +53,10 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/async
|
||||
Date: January 2009
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../async attribute allows the user space to
|
||||
enable or diasble the device's suspend and resume callbacks to
|
||||
enable or disable the device's suspend and resume callbacks to
|
||||
be executed asynchronously (ie. in separate threads, in parallel
|
||||
with the main suspend/resume thread) during system-wide power
|
||||
transitions (eg. suspend to RAM, hibernation).
|
||||
@@ -79,7 +79,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_count
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_count attribute contains the number
|
||||
of signaled wakeup events associated with the device. This
|
||||
@@ -90,7 +90,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_active_count
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_active_count attribute contains the
|
||||
number of times the processing of wakeup events associated with
|
||||
@@ -102,7 +102,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_abort_count
|
||||
Date: February 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_abort_count attribute contains the
|
||||
number of times the processing of a wakeup event associated with
|
||||
@@ -114,7 +114,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_expire_count
|
||||
Date: February 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_expire_count attribute contains the
|
||||
number of times a wakeup event associated with the device has
|
||||
@@ -126,7 +126,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_active
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_active attribute contains either 1,
|
||||
or 0, depending on whether or not a wakeup event associated with
|
||||
@@ -138,7 +138,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_total_time_ms
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_total_time_ms attribute contains
|
||||
the total time of processing wakeup events associated with the
|
||||
@@ -149,7 +149,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_max_time_ms
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_max_time_ms attribute contains
|
||||
the maximum time of processing a single wakeup event associated
|
||||
@@ -161,7 +161,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_last_time_ms
|
||||
Date: September 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_last_time_ms attribute contains
|
||||
the value of the monotonic clock corresponding to the time of
|
||||
@@ -173,7 +173,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms
|
||||
Date: February 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
|
||||
contains the total time the device has been preventing
|
||||
@@ -203,7 +203,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/pm_qos_resume_latency_us
|
||||
Date: March 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/pm_qos_resume_latency_us attribute
|
||||
contains the PM QoS resume latency limit for the given device,
|
||||
@@ -223,7 +223,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/pm_qos_latency_tolerance_us
|
||||
Date: January 2014
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/pm_qos_latency_tolerance_us attribute
|
||||
contains the PM QoS active state latency tolerance limit for the
|
||||
@@ -248,7 +248,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/pm_qos_no_power_off
|
||||
Date: September 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/pm_qos_no_power_off attribute
|
||||
is used for manipulating the PM QoS "no power off" flag. If
|
||||
@@ -263,7 +263,7 @@ Description:
|
||||
|
||||
What: /sys/devices/.../power/runtime_status
|
||||
Date: April 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/.../power/runtime_status attribute contains
|
||||
the current runtime PM status of the device, which may be
|
||||
|
||||
@@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v1
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v2
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsa
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
|
||||
@@ -148,3 +148,51 @@ Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RO. Fan 3 speed in RPM.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Card burst (PL2) power limit in microwatts.
|
||||
|
||||
The power controller will throttle the operating frequency
|
||||
if the power averaged over a window (typically milli seconds)
|
||||
exceeds this limit. A read value of 0 means that the PL2
|
||||
power limit is disabled, writing 0 disables the limit.
|
||||
PL2 is greater than PL1 and its time window is lesser
|
||||
compared to PL1.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Package burst (PL2) power limit in microwatts.
|
||||
|
||||
The power controller will throttle the operating frequency
|
||||
if the power averaged over a window (typically milli seconds)
|
||||
exceeds this limit. A read value of 0 means that the PL2
|
||||
power limit is disabled, writing 0 disables the limit.
|
||||
PL2 is greater than PL1 and its time window is lesser
|
||||
compared to PL1.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap_interval
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Card burst power limit interval (Tau in PL2/Tau) in
|
||||
milliseconds over which sustained power is averaged.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap_interval
|
||||
Date: May 2025
|
||||
KernelVersion: 6.15
|
||||
Contact: intel-xe@lists.freedesktop.org
|
||||
Description: RW. Package burst power limit interval (Tau in PL2/Tau) in
|
||||
milliseconds over which sustained power is averaged.
|
||||
|
||||
Only supported for particular Intel Xe graphics platforms.
|
||||
|
||||
18
Documentation/ABI/testing/sysfs-driver-qaic
Normal file
18
Documentation/ABI/testing/sysfs-driver-qaic
Normal file
@@ -0,0 +1,18 @@
|
||||
What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ce_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.17
|
||||
Contact: dri-devel@lists.freedesktop.org
|
||||
Description: Number of correctable errors received from device since driver is loaded.
|
||||
|
||||
What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.17
|
||||
Contact: dri-devel@lists.freedesktop.org
|
||||
Description: Number of uncorrectable errors received from device since driver is loaded.
|
||||
|
||||
What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_nonfatal_count
|
||||
Date: May 2025
|
||||
KernelVersion: 6.17
|
||||
Contact: dri-devel@lists.freedesktop.org
|
||||
Description: Number of uncorrectable non-fatal errors received from device since driver
|
||||
is loaded.
|
||||
@@ -14,7 +14,7 @@ Description: (RW) Reports the current state of the QAT device. Write to
|
||||
It is possible to transition the device from up to down only
|
||||
if the device is up and vice versa.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
Date: June 2022
|
||||
@@ -23,24 +23,28 @@ Contact: qat-linux@intel.com
|
||||
Description: (RW) Reports the current configuration of the QAT device.
|
||||
Write to the file to change the configured services.
|
||||
|
||||
The values are:
|
||||
One or more services can be enabled per device.
|
||||
Certain configurations are restricted to specific device types;
|
||||
where applicable this is explicitly indicated, for example
|
||||
(qat_6xxx) denotes applicability exclusively to that device series.
|
||||
|
||||
* sym;asym: the device is configured for running crypto
|
||||
services
|
||||
* asym;sym: identical to sym;asym
|
||||
* dc: the device is configured for running compression services
|
||||
* dcc: identical to dc but enables the dc chaining feature,
|
||||
hash then compression. If this is not required chose dc
|
||||
* sym: the device is configured for running symmetric crypto
|
||||
services
|
||||
* asym: the device is configured for running asymmetric crypto
|
||||
services
|
||||
* asym;dc: the device is configured for running asymmetric
|
||||
crypto services and compression services
|
||||
* dc;asym: identical to asym;dc
|
||||
* sym;dc: the device is configured for running symmetric crypto
|
||||
services and compression services
|
||||
* dc;sym: identical to sym;dc
|
||||
The available services include:
|
||||
|
||||
* sym: Configures the device for symmetric cryptographic operations.
|
||||
* asym: Configures the device for asymmetric cryptographic operations.
|
||||
* dc: Configures the device for compression and decompression
|
||||
operations.
|
||||
* dcc: Similar to dc, but with the additional dc chaining feature
|
||||
enabled, cipher then compress (qat_6xxx), hash then compression.
|
||||
If this is not required choose dc.
|
||||
* decomp: Configures the device for decompression operations (qat_6xxx).
|
||||
|
||||
Service combinations are permitted for all services except dcc.
|
||||
On QAT GEN4 devices (qat_4xxx driver) a maximum of two services can be
|
||||
combined and on QAT GEN6 devices (qat_6xxx driver ) a maximum of three
|
||||
services can be combined.
|
||||
The order of services is not significant. For instance, sym;asym is
|
||||
functionally equivalent to asym;sym.
|
||||
|
||||
It is possible to set the configuration only if the device
|
||||
is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
|
||||
@@ -59,7 +63,7 @@ Description: (RW) Reports the current configuration of the QAT device.
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
dc
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
|
||||
Date: June 2023
|
||||
@@ -94,7 +98,7 @@ Description: (RW) This configuration option provides a way to force the device i
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
|
||||
0
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/rp2srv
|
||||
Date: January 2024
|
||||
@@ -126,7 +130,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/rp2srv
|
||||
sym
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/num_rps
|
||||
Date: January 2024
|
||||
@@ -140,7 +144,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/num_rps
|
||||
64
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/auto_reset
|
||||
Date: May 2024
|
||||
@@ -160,4 +164,4 @@ Description: (RW) Reports the current state of the autoreset feature
|
||||
* 0/Nn/off: auto reset disabled. If the device encounters an
|
||||
unrecoverable error, it will not be reset.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
@@ -31,7 +31,7 @@ Description:
|
||||
* rm_all: Removes all the configured SLAs.
|
||||
* Inputs: None
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/rp
|
||||
Date: January 2024
|
||||
@@ -68,7 +68,7 @@ Description:
|
||||
## Write
|
||||
# echo 0x5 > /sys/bus/pci/devices/<BDF>/qat_rl/rp
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/id
|
||||
Date: January 2024
|
||||
@@ -101,7 +101,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_rl/rp
|
||||
0x5 ## ring pair ID 0 and ring pair ID 2
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/cir
|
||||
Date: January 2024
|
||||
@@ -135,7 +135,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_rl/cir
|
||||
500
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/pir
|
||||
Date: January 2024
|
||||
@@ -169,7 +169,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_rl/pir
|
||||
750
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/srv
|
||||
Date: January 2024
|
||||
@@ -202,7 +202,7 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_rl/srv
|
||||
dc
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
|
||||
Date: January 2024
|
||||
@@ -223,4 +223,4 @@ Description:
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
|
||||
0
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
This attribute is only available for qat_4xxx and qat_6xxx devices.
|
||||
|
||||
@@ -20,17 +20,6 @@ Description: Some Samsung laptops have different "performance levels"
|
||||
and it's still unknown if this value even changes
|
||||
anything, other than making the user feel a bit better.
|
||||
|
||||
What: /sys/devices/platform/samsung/battery_life_extender
|
||||
Date: December 1, 2011
|
||||
KernelVersion: 3.3
|
||||
Contact: Corentin Chary <corentin.chary@gmail.com>
|
||||
Description: Max battery charge level can be modified, battery cycle
|
||||
life can be extended by reducing the max battery charge
|
||||
level.
|
||||
|
||||
- 0 means normal battery mode (100% charge)
|
||||
- 1 means battery life extender mode (80% charge)
|
||||
|
||||
What: /sys/devices/platform/samsung/usb_charge
|
||||
Date: December 1, 2011
|
||||
KernelVersion: 3.3
|
||||
|
||||
@@ -62,3 +62,13 @@ Description:
|
||||
by VESA DisplayPort Alt Mode on USB Type-C Standard.
|
||||
- 0 when HPD’s logical state is low (HPD_Low) as defined by
|
||||
VESA DisplayPort Alt Mode on USB Type-C Standard.
|
||||
|
||||
What: /sys/bus/typec/devices/.../displayport/irq_hpd
|
||||
Date: June 2025
|
||||
Contact: RD Babiera <rdbabiera@google.com>
|
||||
Description:
|
||||
IRQ_HPD events are sent over the USB PD protocol in Status Update and
|
||||
Attention messages. IRQ_HPD can only be asserted when HPD is high,
|
||||
and is asserted when an IRQ_HPD has been issued since the last Status
|
||||
Update. This is a read only node that returns the number of IRQ events
|
||||
raised in the driver's lifetime.
|
||||
|
||||
@@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
|
||||
What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
|
||||
Date: February 2018
|
||||
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
|
||||
Description: This file shows the total physical memory resources. This is
|
||||
@@ -1685,3 +1685,86 @@ Description:
|
||||
================ ========================================
|
||||
|
||||
The file is read only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/analysis_trigger
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/analysis_trigger
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The host can enable or disable HID analysis operation.
|
||||
|
||||
======= =========================================
|
||||
disable disable HID analysis operation
|
||||
enable enable HID analysis operation
|
||||
======= =========================================
|
||||
|
||||
The file is write only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_trigger
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/defrag_trigger
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The host can enable or disable HID defragmentation operation.
|
||||
|
||||
======= =========================================
|
||||
disable disable HID defragmentation operation
|
||||
enable enable HID defragmentation operation
|
||||
======= =========================================
|
||||
|
||||
The attribute is write only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/fragmented_size
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/fragmented_size
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The total fragmented size in the device is reported through
|
||||
this attribute.
|
||||
|
||||
The attribute is read only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_size
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/defrag_size
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The host sets the size to be defragmented by an HID
|
||||
defragmentation operation.
|
||||
|
||||
The attribute is read/write.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/progress_ratio
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/progress_ratio
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
Defragmentation progress is reported by this attribute,
|
||||
indicates the ratio of the completed defragmentation size
|
||||
over the requested defragmentation size.
|
||||
|
||||
==== ============================================
|
||||
1 1%
|
||||
...
|
||||
100 100%
|
||||
==== ============================================
|
||||
|
||||
The attribute is read only.
|
||||
|
||||
What: /sys/bus/platform/drivers/ufshcd/*/hid/state
|
||||
What: /sys/bus/platform/devices/*.ufs/hid/state
|
||||
Date: May 2025
|
||||
Contact: Huan Tang <tanghuan@vivo.com>
|
||||
Description:
|
||||
The HID state is reported by this attribute.
|
||||
|
||||
==================== ===========================
|
||||
idle Idle (analysis required)
|
||||
analysis_in_progress Analysis in progress
|
||||
defrag_required Defrag required
|
||||
defrag_in_progress Defrag in progress
|
||||
defrag_completed Defrag completed
|
||||
defrag_not_required Defrag is not required
|
||||
==================== ===========================
|
||||
|
||||
The attribute is read only.
|
||||
|
||||
@@ -49,6 +49,12 @@ Description:
|
||||
(RO) Supported minimum scrub cycle duration in seconds
|
||||
by the memory scrubber.
|
||||
|
||||
Device-based scrub: returns the minimum scrub cycle
|
||||
supported by the memory device.
|
||||
|
||||
Region-based scrub: returns the max of minimum scrub cycles
|
||||
supported by individual memory devices that back the region.
|
||||
|
||||
What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
@@ -57,6 +63,16 @@ Description:
|
||||
(RO) Supported maximum scrub cycle duration in seconds
|
||||
by the memory scrubber.
|
||||
|
||||
Device-based scrub: returns the maximum scrub cycle supported
|
||||
by the memory device.
|
||||
|
||||
Region-based scrub: returns the min of maximum scrub cycles
|
||||
supported by individual memory devices that back the region.
|
||||
|
||||
If the memory device does not provide maximum scrub cycle
|
||||
information, return the maximum supported value of the scrub
|
||||
cycle field.
|
||||
|
||||
What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
|
||||
Date: March 2025
|
||||
KernelVersion: 6.15
|
||||
|
||||
@@ -108,15 +108,15 @@ Description:
|
||||
number of a "General Purpose Events" (GPE).
|
||||
|
||||
A GPE vectors to a specified handler in AML, which
|
||||
can do a anything the BIOS writer wants from
|
||||
can do anything the BIOS writer wants from
|
||||
OS context. GPE 0x12, for example, would vector
|
||||
to a level or edge handler called _L12 or _E12.
|
||||
The handler may do its business and return.
|
||||
Or the handler may send send a Notify event
|
||||
Or the handler may send a Notify event
|
||||
to a Linux device driver registered on an ACPI device,
|
||||
such as a battery, or a processor.
|
||||
|
||||
To figure out where all the SCI's are coming from,
|
||||
To figure out where all the SCIs are coming from,
|
||||
/sys/firmware/acpi/interrupts contains a file listing
|
||||
every possible source, and the count of how many
|
||||
times it has triggered::
|
||||
|
||||
@@ -36,3 +36,10 @@ Description: Displays the content of the Runtime Configuration Interface
|
||||
Table version 2 on Dell EMC PowerEdge systems in binary format
|
||||
Users: It is used by Dell EMC OpenManage Server Administrator tool to
|
||||
populate BIOS setup page.
|
||||
|
||||
What: /sys/firmware/efi/ovmf_debug_log
|
||||
Date: July 2025
|
||||
Contact: Gerd Hoffmann <kraxel@redhat.com>, linux-efi@vger.kernel.org
|
||||
Description: Displays the content of the OVMF debug log buffer. The file is
|
||||
only present in case the firmware supports logging to a memory
|
||||
buffer.
|
||||
|
||||
@@ -5,7 +5,7 @@ Description: Shows all enabled kernel features.
|
||||
Supported features:
|
||||
zero_padding, compr_cfgs, big_pcluster, chunked_file,
|
||||
device_table, compr_head2, sb_chksum, ztailpacking,
|
||||
dedupe, fragments.
|
||||
dedupe, fragments, 48bit, metabox.
|
||||
|
||||
What: /sys/fs/erofs/<disk>/sync_decompress
|
||||
Date: November 2021
|
||||
@@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect
|
||||
and multiple accelerators are separated by '\n'.
|
||||
Supported accelerator(s): qat_deflate.
|
||||
Disable all accelerators with an empty string (echo > accel).
|
||||
|
||||
What: /sys/fs/erofs/<disk>/dir_ra_bytes
|
||||
Date: July 2025
|
||||
Contact: "Chao Yu" <chao@kernel.org>
|
||||
Description: Used to set or show readahead bytes during readdir(), by
|
||||
default the value is 16384.
|
||||
|
||||
- 0: disable readahead.
|
||||
|
||||
@@ -861,3 +861,25 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags,
|
||||
SB_ENC_STRICT_MODE_FL 0x00000001
|
||||
SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002
|
||||
============================ ==========
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/reserved_pin_section
|
||||
Date: June 2025
|
||||
Contact: "Chao Yu" <chao@kernel.org>
|
||||
Description: This threshold is used to control triggering garbage collection while
|
||||
fallocating on pinned file, so, it can guarantee there is enough free
|
||||
reserved section before preallocating on pinned file.
|
||||
By default, the value is ovp_sections, especially, for zoned ufs, the
|
||||
value is 1.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple
|
||||
Date: June 2025
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Set a multiplier for the background GC migration window when F2FS GC is
|
||||
boosted. The range should be from 1 to the segment count in a section.
|
||||
Default: 5
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy
|
||||
Date: June 2025
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
|
||||
Default: 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
What: /sys/kernel/address_bit
|
||||
What: /sys/kernel/address_bits
|
||||
Date: May 2023
|
||||
KernelVersion: 6.3
|
||||
Contact: Thomas Weißschuh <linux@weissschuh.net>
|
||||
|
||||
@@ -44,6 +44,13 @@ Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the pid of the kdamond if it is
|
||||
running.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/refresh_ms
|
||||
Date: Jul 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the time interval for
|
||||
automatic DAMON status file contents update. Writing '0'
|
||||
disables the update. Reading this file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
@@ -431,6 +438,28 @@ Description: Directory for DAMON operations set layer-handled DAMOS filters.
|
||||
/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters
|
||||
directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/nr_dests
|
||||
Date: Jul 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for setting action destinations of the scheme named
|
||||
'0' to 'N-1' under the dests/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/id
|
||||
Date: Jul 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the id of
|
||||
the DAMOS action destination. For DAMOS_MIGRATE_{HOT,COLD}
|
||||
actions, the destination node's node id can be written and
|
||||
read.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/weight
|
||||
Date: Jul 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the weight
|
||||
of the DAMOS action destination to select as the destination of
|
||||
each action among the destinations.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
|
||||
@@ -37,7 +37,8 @@ Description:
|
||||
The alloc_calls file is read-only and lists the kernel code
|
||||
locations from which allocations for this cache were performed.
|
||||
The alloc_calls file only contains information if debugging is
|
||||
enabled for that cache (see Documentation/mm/slub.rst).
|
||||
enabled for that cache (see
|
||||
Documentation/admin-guide/mm/slab.rst).
|
||||
|
||||
What: /sys/kernel/slab/<cache>/alloc_fastpath
|
||||
Date: February 2008
|
||||
@@ -219,7 +220,7 @@ Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
||||
Description:
|
||||
The free_calls file is read-only and lists the locations of
|
||||
object frees if slab debugging is enabled (see
|
||||
Documentation/mm/slub.rst).
|
||||
Documentation/admin-guide/mm/slab.rst).
|
||||
|
||||
What: /sys/kernel/slab/<cache>/free_fastpath
|
||||
Date: February 2008
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_supported_type
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: "<perry.yuan@dell.com>"
|
||||
@@ -29,12 +29,12 @@ Description:
|
||||
|
||||
For example to check which privacy devices are supported::
|
||||
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_supported_type
|
||||
[Microphone Mute] [supported]
|
||||
[Camera Shutter] [supported]
|
||||
[ePrivacy Screen] [unsupported]
|
||||
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_current_state
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: "<perry.yuan@dell.com>"
|
||||
@@ -66,6 +66,6 @@ Description:
|
||||
|
||||
For example to check all supported current privacy device states::
|
||||
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_current_state
|
||||
[Microphone] [unmuted]
|
||||
[Camera Shutter] [unmuted]
|
||||
|
||||
@@ -27,15 +27,6 @@ Description:
|
||||
* 1 -> Switched On
|
||||
* 0 -> Switched Off
|
||||
|
||||
What: /sys/bus/platform/devices/VPC2004:*/conservation_mode
|
||||
Date: Aug 2017
|
||||
KernelVersion: 4.14
|
||||
Contact: platform-driver-x86@vger.kernel.org
|
||||
Description:
|
||||
Controls whether the conservation mode is enabled or not.
|
||||
This feature limits the maximum battery charge percentage to
|
||||
around 50-60% in order to prolong the lifetime of the battery.
|
||||
|
||||
What: /sys/bus/platform/devices/VPC2004:*/fn_lock
|
||||
Date: May 2018
|
||||
KernelVersion: 4.18
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
What: /sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651/firmware_update_request
|
||||
What: /sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651[-X]/firmware_update_request
|
||||
Date: April 2020
|
||||
KernelVersion: 5.7
|
||||
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
What: /sys/devices/platform/<platform>/force_power
|
||||
What: /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341[-X]/force_power
|
||||
Date: September 2017
|
||||
KernelVersion: 4.15
|
||||
Contact: "Mario Limonciello" <mario.limonciello@outlook.com>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
What: /sys/power/
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power directory will contain files that will
|
||||
provide a unified interface to the power management
|
||||
@@ -8,7 +8,7 @@ Description:
|
||||
|
||||
What: /sys/power/state
|
||||
Date: November 2016
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/state file controls system sleep states.
|
||||
Reading from this file returns the available sleep state
|
||||
@@ -23,7 +23,7 @@ Description:
|
||||
|
||||
What: /sys/power/mem_sleep
|
||||
Date: November 2016
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/mem_sleep file controls the operating mode of
|
||||
system suspend. Reading from it returns the available modes
|
||||
@@ -41,7 +41,7 @@ Description:
|
||||
|
||||
What: /sys/power/disk
|
||||
Date: September 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/disk file controls the operating mode of the
|
||||
suspend-to-disk mechanism. Reading from this file returns
|
||||
@@ -90,7 +90,7 @@ Description:
|
||||
|
||||
What: /sys/power/image_size
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/image_size file controls the size of the image
|
||||
created by the suspend-to-disk mechanism. It can be written a
|
||||
@@ -107,7 +107,7 @@ Description:
|
||||
|
||||
What: /sys/power/pm_trace
|
||||
Date: August 2006
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/pm_trace file controls the code which saves the
|
||||
last PM event point in the RTC across reboots, so that you can
|
||||
@@ -156,7 +156,7 @@ Description:
|
||||
|
||||
What: /sys/power/pm_async
|
||||
Date: January 2009
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/pm_async file controls the switch allowing the
|
||||
user space to enable or disable asynchronous suspend and resume
|
||||
@@ -169,7 +169,7 @@ Description:
|
||||
|
||||
What: /sys/power/wakeup_count
|
||||
Date: July 2010
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/wakeup_count file allows user space to put the
|
||||
system into a sleep state while taking into account the
|
||||
@@ -184,7 +184,7 @@ Description:
|
||||
|
||||
What: /sys/power/reserved_size
|
||||
Date: May 2011
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/reserved_size file allows user space to control
|
||||
the amount of memory reserved for allocations made by device
|
||||
@@ -198,7 +198,7 @@ Description:
|
||||
|
||||
What: /sys/power/autosleep
|
||||
Date: April 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/autosleep file can be written one of the strings
|
||||
returned by reads from /sys/power/state. If that happens, a
|
||||
@@ -215,7 +215,7 @@ Description:
|
||||
|
||||
What: /sys/power/wake_lock
|
||||
Date: February 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/wake_lock file allows user space to create
|
||||
wakeup source objects and activate them on demand (if one of
|
||||
@@ -242,7 +242,7 @@ Description:
|
||||
|
||||
What: /sys/power/wake_unlock
|
||||
Date: February 2012
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/wake_unlock file allows user space to deactivate
|
||||
wakeup sources created with the help of /sys/power/wake_lock.
|
||||
@@ -283,7 +283,7 @@ Description:
|
||||
|
||||
What: /sys/power/pm_debug_messages
|
||||
Date: July 2017
|
||||
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
|
||||
Contact: Rafael J. Wysocki <rafael@kernel.org>
|
||||
Description:
|
||||
The /sys/power/pm_debug_messages file controls the printing
|
||||
of debug messages from the system suspend/hiberbation
|
||||
|
||||
@@ -22,9 +22,13 @@ Description: A string indicating which backend is in use by the firmware.
|
||||
and is expected to be "ibm,edk2-compat-v1".
|
||||
|
||||
On pseries/PLPKS, this is generated by the kernel based on the
|
||||
version number in the SB_VERSION variable in the keystore, and
|
||||
has the form "ibm,plpks-sb-v<version>", or
|
||||
"ibm,plpks-sb-unknown" if there is no SB_VERSION variable.
|
||||
version number in the SB_VERSION variable in the keystore. The
|
||||
version numbering in the SB_VERSION variable starts from 1. The
|
||||
format string takes the form "ibm,plpks-sb-v<version>" in the
|
||||
case of dynamic key management mode. If the SB_VERSION variable
|
||||
does not exist (or there is an error while reading it), it takes
|
||||
the form "ibm,plpks-sb-v0", indicating that the key management
|
||||
mode is static.
|
||||
|
||||
What: /sys/firmware/secvar/vars/<variable name>
|
||||
Date: August 2019
|
||||
@@ -34,6 +38,13 @@ Description: Each secure variable is represented as a directory named as
|
||||
representation. The data and size can be determined by reading
|
||||
their respective attribute files.
|
||||
|
||||
Only secvars relevant to the key management mode are exposed.
|
||||
Only in the dynamic key management mode should the user have
|
||||
access (read and write) to the secure boot secvars db, dbx,
|
||||
grubdb, grubdbx, and sbat. These secvars are not consumed in the
|
||||
static key management mode. PK, trustedcadb and moduledb are the
|
||||
secvars common to both static and dynamic key management modes.
|
||||
|
||||
What: /sys/firmware/secvar/vars/<variable_name>/size
|
||||
Date: August 2019
|
||||
Contact: Nayna Jain <nayna@linux.ibm.com>
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# for cleaning
|
||||
subdir- := devicetree/bindings
|
||||
|
||||
ifneq ($(MAKECMDGOALS),cleandocs)
|
||||
# Check for broken documentation file references
|
||||
ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y)
|
||||
$(shell $(srctree)/scripts/documentation-file-ref-check --warn)
|
||||
@@ -14,6 +15,7 @@ endif
|
||||
ifeq ($(CONFIG_WARN_ABI_ERRORS),y)
|
||||
$(shell $(srctree)/scripts/get_abi.py --dir $(srctree)/Documentation/ABI validate)
|
||||
endif
|
||||
endif
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXBUILD = sphinx-build
|
||||
|
||||
@@ -203,3 +203,18 @@ controllers, it is advisable to skip this testcase using this
|
||||
command::
|
||||
|
||||
# pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
|
||||
|
||||
Kselftest EP Doorbell
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If the Endpoint MSI controller is used for the doorbell usecase, run below
|
||||
command for testing it:
|
||||
|
||||
# pci_endpoint_test -f pcie_ep_doorbell
|
||||
|
||||
# Starting 1 tests from 1 test cases.
|
||||
# RUN pcie_ep_doorbell.DOORBELL_TEST ...
|
||||
# OK pcie_ep_doorbell.DOORBELL_TEST
|
||||
ok 1 pcie_ep_doorbell.DOORBELL_TEST
|
||||
# PASSED: 1 / 1 tests passed.
|
||||
# Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
|
||||
|
||||
@@ -286,6 +286,39 @@ in order to detect the beginnings and ends of grace periods in a
|
||||
distributed fashion. The values flow from ``rcu_state`` to ``rcu_node``
|
||||
(down the tree from the root to the leaves) to ``rcu_data``.
|
||||
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Quick Quiz**: |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Given that the root rcu_node structure has a gp_seq field, |
|
||||
| why does RCU maintain a separate gp_seq in the rcu_state structure? |
|
||||
| Why not just use the root rcu_node's gp_seq as the official record |
|
||||
| and update it directly when starting a new grace period? |
|
||||
+-----------------------------------------------------------------------+
|
||||
| **Answer**: |
|
||||
+-----------------------------------------------------------------------+
|
||||
| On single-node RCU trees (where the root node is also a leaf), |
|
||||
| updating the root node's gp_seq immediately would create unnecessary |
|
||||
| lock contention. Here's why: |
|
||||
| |
|
||||
| If we did rcu_seq_start() directly on the root node's gp_seq: |
|
||||
| |
|
||||
| 1. All CPUs would immediately see their node's gp_seq from their rdp's|
|
||||
| gp_seq, in rcu_pending(). They would all then invoke the RCU-core. |
|
||||
| 2. Which calls note_gp_changes() and try to acquire the node lock. |
|
||||
| 3. But rnp->qsmask isn't initialized yet (happens later in |
|
||||
| rcu_gp_init()) |
|
||||
| 4. So each CPU would acquire the lock, find it can't determine if it |
|
||||
| needs to report quiescent state (no qsmask), update rdp->gp_seq, |
|
||||
| and release the lock. |
|
||||
| 5. Result: Lots of lock acquisitions with no grace period progress |
|
||||
| |
|
||||
| By having a separate rcu_state.gp_seq, we can increment the official |
|
||||
| grace period counter without immediately affecting what CPUs see in |
|
||||
| their nodes. The hierarchical propagation in rcu_gp_init() then |
|
||||
| updates the root node's gp_seq and qsmask together under the same lock|
|
||||
| acquisition, avoiding this useless contention. |
|
||||
+-----------------------------------------------------------------------+
|
||||
|
||||
Miscellaneous
|
||||
'''''''''''''
|
||||
|
||||
|
||||
@@ -1970,6 +1970,134 @@ corresponding CPU's leaf node lock is held. This avoids race conditions
|
||||
between RCU's hotplug notifier hooks, the grace period initialization
|
||||
code, and the FQS loop, all of which refer to or modify this bookkeeping.
|
||||
|
||||
Note that grace period initialization (rcu_gp_init()) must carefully sequence
|
||||
CPU hotplug scanning with grace period state changes. For example, the
|
||||
following race could occur in rcu_gp_init() if rcu_seq_start() were to happen
|
||||
after the CPU hotplug scanning.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
CPU0 (rcu_gp_init) CPU1 CPU2
|
||||
--------------------- ---- ----
|
||||
// Hotplug scan first (WRONG ORDER)
|
||||
rcu_for_each_leaf_node(rnp) {
|
||||
rnp->qsmaskinit = rnp->qsmaskinitnext;
|
||||
}
|
||||
rcutree_report_cpu_starting()
|
||||
rnp->qsmaskinitnext |= mask;
|
||||
rcu_read_lock()
|
||||
r0 = *X;
|
||||
r1 = *X;
|
||||
X = NULL;
|
||||
cookie = get_state_synchronize_rcu();
|
||||
// cookie = 8 (future GP)
|
||||
rcu_seq_start(&rcu_state.gp_seq);
|
||||
// gp_seq = 5
|
||||
|
||||
// CPU1 now invisible to this GP!
|
||||
rcu_for_each_node_breadth_first() {
|
||||
rnp->qsmask = rnp->qsmaskinit;
|
||||
// CPU1 not included!
|
||||
}
|
||||
|
||||
// GP completes without CPU1
|
||||
rcu_seq_end(&rcu_state.gp_seq);
|
||||
// gp_seq = 8
|
||||
poll_state_synchronize_rcu(cookie);
|
||||
// Returns true!
|
||||
kfree(r1);
|
||||
r2 = *r0; // USE-AFTER-FREE!
|
||||
|
||||
By incrementing gp_seq first, CPU1's RCU read-side critical section
|
||||
is guaranteed to not be missed by CPU2.
|
||||
|
||||
**Concurrent Quiescent State Reporting for Offline CPUs**
|
||||
|
||||
RCU must ensure that CPUs going offline report quiescent states to avoid
|
||||
blocking grace periods. This requires careful synchronization to handle
|
||||
race conditions
|
||||
|
||||
**Race condition causing Offline CPU to hang GP**
|
||||
|
||||
A race between CPU offlining and new GP initialization (gp_init) may occur
|
||||
because `rcu_report_qs_rnp()` in `rcutree_report_cpu_dead()` must temporarily
|
||||
release the `rcu_node` lock to wake the RCU grace-period kthread:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
CPU1 (going offline) CPU0 (GP kthread)
|
||||
-------------------- -----------------
|
||||
rcutree_report_cpu_dead()
|
||||
rcu_report_qs_rnp()
|
||||
// Must release rnp->lock to wake GP kthread
|
||||
raw_spin_unlock_irqrestore_rcu_node()
|
||||
// Wakes up and starts new GP
|
||||
rcu_gp_init()
|
||||
// First loop:
|
||||
copies qsmaskinitnext->qsmaskinit
|
||||
// CPU1 still in qsmaskinitnext!
|
||||
|
||||
// Second loop:
|
||||
rnp->qsmask = rnp->qsmaskinit
|
||||
mask = rnp->qsmask & ~rnp->qsmaskinitnext
|
||||
// mask is 0! CPU1 still in both masks
|
||||
// Reacquire lock (but too late)
|
||||
rnp->qsmaskinitnext &= ~mask // Finally clears bit
|
||||
|
||||
Without `ofl_lock`, the new grace period includes the offline CPU and waits
|
||||
forever for its quiescent state causing a GP hang.
|
||||
|
||||
**A solution with ofl_lock**
|
||||
|
||||
The `ofl_lock` (offline lock) prevents `rcu_gp_init()` from running during
|
||||
the vulnerable window when `rcu_report_qs_rnp()` has released `rnp->lock`:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
CPU0 (rcu_gp_init) CPU1 (rcutree_report_cpu_dead)
|
||||
------------------ ------------------------------
|
||||
rcu_for_each_leaf_node(rnp) {
|
||||
arch_spin_lock(&ofl_lock) -----> arch_spin_lock(&ofl_lock) [BLOCKED]
|
||||
|
||||
// Safe: CPU1 can't interfere
|
||||
rnp->qsmaskinit = rnp->qsmaskinitnext
|
||||
|
||||
arch_spin_unlock(&ofl_lock) ---> // Now CPU1 can proceed
|
||||
} // But snapshot already taken
|
||||
|
||||
**Another race causing GP hangs in rcu_gpu_init(): Reporting QS for Now-offline CPUs**
|
||||
|
||||
After the first loop takes an atomic snapshot of online CPUs, as shown above,
|
||||
the second loop in `rcu_gp_init()` detects CPUs that went offline between
|
||||
releasing `ofl_lock` and acquiring the per-node `rnp->lock`. This detection is
|
||||
crucial because:
|
||||
|
||||
1. The CPU might have gone offline after the snapshot but before the second loop
|
||||
2. The offline CPU cannot report its own QS if it's already dead
|
||||
3. Without this detection, the grace period would wait forever for CPUs that
|
||||
are now offline.
|
||||
|
||||
The second loop performs this detection safely:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
rcu_for_each_node_breadth_first(rnp) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
rnp->qsmask = rnp->qsmaskinit; // Apply the snapshot
|
||||
|
||||
// Detect CPUs offline after snapshot
|
||||
mask = rnp->qsmask & ~rnp->qsmaskinitnext;
|
||||
|
||||
if (mask && rcu_is_leaf_node(rnp))
|
||||
rcu_report_qs_rnp(mask, ...) // Report QS for offline CPUs
|
||||
}
|
||||
|
||||
This approach ensures atomicity: quiescent state reporting for offline CPUs
|
||||
happens either in `rcu_gp_init()` (second loop) or in `rcutree_report_cpu_dead()`,
|
||||
never both and never neither. The `rnp->lock` held throughout the sequence
|
||||
prevents races - `rcutree_report_cpu_dead()` also acquires this lock when
|
||||
clearing `qsmaskinitnext`, ensuring mutual exclusion.
|
||||
|
||||
Scheduler and RCU
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
@@ -131,3 +131,59 @@ Get IO accounting for pid 1, it works only with -p::
|
||||
linuxrc: read=65536, write=0, cancelled_write=0
|
||||
|
||||
The above command can be used with -v to get more debug information.
|
||||
|
||||
After the system starts, use `delaytop` to get the system-wide delay information,
|
||||
which includes system-wide PSI information and Top-N high-latency tasks.
|
||||
|
||||
`delaytop` supports sorting by CPU latency in descending order by default,
|
||||
displays the top 20 high-latency tasks by default, and refreshes the latency
|
||||
data every 2 seconds by default.
|
||||
|
||||
Get PSI information and Top-N tasks delay, since system boot::
|
||||
|
||||
bash# ./delaytop
|
||||
System Pressure Information: (avg10/avg60/avg300/total)
|
||||
CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms)
|
||||
CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms)
|
||||
IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms)
|
||||
IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
|
||||
Top 20 processes (sorted by CPU delay):
|
||||
PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)
|
||||
----------------------------------------------------------------------------------------------
|
||||
161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
|
||||
|
||||
Dynamic interactive interface of delaytop::
|
||||
|
||||
# ./delaytop -p pid
|
||||
Print delayacct stats
|
||||
|
||||
# ./delaytop -P num
|
||||
Display the top N tasks
|
||||
|
||||
# ./delaytop -n num
|
||||
Set delaytop refresh frequency (num times)
|
||||
|
||||
# ./delaytop -d secs
|
||||
Specify refresh interval as secs
|
||||
|
||||
@@ -2,6 +2,17 @@
|
||||
SELinux
|
||||
=======
|
||||
|
||||
Information about the SELinux kernel subsystem can be found at the
|
||||
following links:
|
||||
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git/tree/README.md
|
||||
|
||||
https://github.com/selinuxproject/selinux-kernel/wiki
|
||||
|
||||
Information about the SELinux userspace can be found at:
|
||||
|
||||
https://github.com/SELinuxProject/selinux/wiki
|
||||
|
||||
If you want to use SELinux, chances are you will want
|
||||
to use the distro-provided policies, or install the
|
||||
latest reference policy release from
|
||||
|
||||
@@ -79,7 +79,7 @@ zone_capacity_mb Device zone capacity (must always be equal to or lower than
|
||||
the zone size. Default: zone size.
|
||||
conv_zones Total number of conventioanl zones starting from sector 0.
|
||||
Default: 8.
|
||||
base_dir Path to the base directoy where to create the directory
|
||||
base_dir Path to the base directory where to create the directory
|
||||
containing the zone files of the device.
|
||||
Default=/var/local/zloop.
|
||||
The device directory containing the zone files is always
|
||||
|
||||
@@ -265,7 +265,7 @@ The final kernel cmdline will be the following::
|
||||
Config File Limitation
|
||||
======================
|
||||
|
||||
Currently the maximum config size size is 32KB and the total key-words (not
|
||||
Currently the maximum config size is 32KB and the total key-words (not
|
||||
key-value entries) must be under 1024 nodes.
|
||||
Note: this is not the number of entries but nodes, an entry must consume
|
||||
more than 2 nodes (a key-word and a value). So theoretically, it will be
|
||||
|
||||
@@ -435,6 +435,15 @@ both cgroups.
|
||||
Controlling Controllers
|
||||
-----------------------
|
||||
|
||||
Availablity
|
||||
~~~~~~~~~~~
|
||||
|
||||
A controller is available in a cgroup when it is supported by the kernel (i.e.,
|
||||
compiled in, not disabled and not attached to a v1 hierarchy) and listed in the
|
||||
"cgroup.controllers" file. Availability means the controller's interface files
|
||||
are exposed in the cgroup’s directory, allowing the distribution of the target
|
||||
resource to be observed or controlled within that cgroup.
|
||||
|
||||
Enabling and Disabling
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -1732,12 +1741,6 @@ The following nested keys are defined.
|
||||
numa_hint_faults (npn)
|
||||
Number of NUMA hinting faults.
|
||||
|
||||
numa_task_migrated (npn)
|
||||
Number of task migration by NUMA balancing.
|
||||
|
||||
numa_task_swapped (npn)
|
||||
Number of task swap by NUMA balancing.
|
||||
|
||||
pgdemote_kswapd
|
||||
Number of pages demoted by kswapd.
|
||||
|
||||
|
||||
@@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled
|
||||
illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
|
||||
is the default for SMB3). This remap (``mapposix``) range is also
|
||||
compatible with Mac (and "Services for Mac" on some older Windows).
|
||||
When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
|
||||
disabled.
|
||||
|
||||
CIFS VFS Mount Options
|
||||
======================
|
||||
|
||||
@@ -80,11 +80,11 @@ less sharing than average you'll need a larger-than-average metadata device.
|
||||
|
||||
As a guide, we suggest you calculate the number of bytes to use in the
|
||||
metadata device as 48 * $data_dev_size / $data_block_size but round it up
|
||||
to 2MB if the answer is smaller. If you're creating large numbers of
|
||||
to 2MiB if the answer is smaller. If you're creating large numbers of
|
||||
snapshots which are recording large amounts of change, you may find you
|
||||
need to increase this.
|
||||
|
||||
The largest size supported is 16GB: If the device is larger,
|
||||
The largest size supported is 16GiB: If the device is larger,
|
||||
a warning will be issued and the excess space will not be used.
|
||||
|
||||
Reloading a pool table
|
||||
@@ -107,13 +107,13 @@ Using an existing pool device
|
||||
|
||||
$data_block_size gives the smallest unit of disk space that can be
|
||||
allocated at a time expressed in units of 512-byte sectors.
|
||||
$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
|
||||
multiple of 128 (64KB). $data_block_size cannot be changed after the
|
||||
$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a
|
||||
multiple of 128 (64KiB). $data_block_size cannot be changed after the
|
||||
thin-pool is created. People primarily interested in thin provisioning
|
||||
may want to use a value such as 1024 (512KB). People doing lots of
|
||||
snapshotting may want a smaller value such as 128 (64KB). If you are
|
||||
may want to use a value such as 1024 (512KiB). People doing lots of
|
||||
snapshotting may want a smaller value such as 128 (64KiB). If you are
|
||||
not zeroing newly-allocated data, a larger $data_block_size in the
|
||||
region of 256000 (128MB) is suggested.
|
||||
region of 262144 (128MiB) is suggested.
|
||||
|
||||
$low_water_mark is expressed in blocks of size $data_block_size. If
|
||||
free space on the data device drops below this level then a dm event
|
||||
@@ -291,7 +291,7 @@ i) Constructor
|
||||
error_if_no_space:
|
||||
Error IOs, instead of queueing, if no space.
|
||||
|
||||
Data block size must be between 64KB (128 sectors) and 1GB
|
||||
Data block size must be between 64KiB (128 sectors) and 1GiB
|
||||
(2097152 sectors) inclusive.
|
||||
|
||||
|
||||
|
||||
@@ -50,8 +50,11 @@ the number of lines exposed by this bank.
|
||||
|
||||
**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/name``
|
||||
|
||||
This group represents a single line at the offset Y. The 'name' attribute
|
||||
allows to set the line name as represented by the 'gpio-line-names' property.
|
||||
**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/valid``
|
||||
|
||||
This group represents a single line at the offset Y. The ``valid`` attribute
|
||||
indicates whether the line can be used as GPIO. The ``name`` attribute allows
|
||||
to set the line name as represented by the 'gpio-line-names' property.
|
||||
|
||||
**Item:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/hog``
|
||||
|
||||
|
||||
238
Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
Normal file
238
Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
Normal file
@@ -0,0 +1,238 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Attack Vector Controls
|
||||
======================
|
||||
|
||||
Attack vector controls provide a simple method to configure only the mitigations
|
||||
for CPU vulnerabilities which are relevant given the intended use of a system.
|
||||
Administrators are encouraged to consider which attack vectors are relevant and
|
||||
disable all others in order to recoup system performance.
|
||||
|
||||
When new relevant CPU vulnerabilities are found, they will be added to these
|
||||
attack vector controls so administrators will likely not need to reconfigure
|
||||
their command line parameters as mitigations will continue to be correctly
|
||||
applied based on the chosen attack vector controls.
|
||||
|
||||
Attack Vectors
|
||||
--------------
|
||||
|
||||
There are 5 sets of attack-vector mitigations currently supported by the kernel:
|
||||
|
||||
#. :ref:`user_kernel`
|
||||
#. :ref:`user_user`
|
||||
#. :ref:`guest_host`
|
||||
#. :ref:`guest_guest`
|
||||
#. :ref:`smt`
|
||||
|
||||
To control the enabled attack vectors, see :ref:`cmdline`.
|
||||
|
||||
.. _user_kernel:
|
||||
|
||||
User-to-Kernel
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
The user-to-kernel attack vector involves a malicious userspace program
|
||||
attempting to leak kernel data into userspace by exploiting a CPU vulnerability.
|
||||
The kernel data involved might be limited to certain kernel memory, or include
|
||||
all memory in the system, depending on the vulnerability exploited.
|
||||
|
||||
If no untrusted userspace applications are being run, such as with single-user
|
||||
systems, consider disabling user-to-kernel mitigations.
|
||||
|
||||
Note that the CPU vulnerabilities mitigated by Linux have generally not been
|
||||
shown to be exploitable from browser-based sandboxes. User-to-kernel
|
||||
mitigations are therefore mostly relevant if unknown userspace applications may
|
||||
be run by untrusted users.
|
||||
|
||||
*user-to-kernel mitigations are enabled by default*
|
||||
|
||||
.. _user_user:
|
||||
|
||||
User-to-User
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The user-to-user attack vector involves a malicious userspace program attempting
|
||||
to influence the behavior of another unsuspecting userspace program in order to
|
||||
exfiltrate data. The vulnerability of a userspace program is based on the
|
||||
program itself and the interfaces it provides.
|
||||
|
||||
If no untrusted userspace applications are being run, consider disabling
|
||||
user-to-user mitigations.
|
||||
|
||||
Note that because the Linux kernel contains a mapping of all physical memory,
|
||||
preventing a malicious userspace program from leaking data from another
|
||||
userspace program requires mitigating user-to-kernel attacks as well for
|
||||
complete protection.
|
||||
|
||||
*user-to-user mitigations are enabled by default*
|
||||
|
||||
.. _guest_host:
|
||||
|
||||
Guest-to-Host
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
The guest-to-host attack vector involves a malicious VM attempting to leak
|
||||
hypervisor data into the VM. The data involved may be limited, or may
|
||||
potentially include all memory in the system, depending on the vulnerability
|
||||
exploited.
|
||||
|
||||
If no untrusted VMs are being run, consider disabling guest-to-host mitigations.
|
||||
|
||||
*guest-to-host mitigations are enabled by default if KVM support is present*
|
||||
|
||||
.. _guest_guest:
|
||||
|
||||
Guest-to-Guest
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
The guest-to-guest attack vector involves a malicious VM attempting to influence
|
||||
the behavior of another unsuspecting VM in order to exfiltrate data. The
|
||||
vulnerability of a VM is based on the code inside the VM itself and the
|
||||
interfaces it provides.
|
||||
|
||||
If no untrusted VMs, or only a single VM is being run, consider disabling
|
||||
guest-to-guest mitigations.
|
||||
|
||||
Similar to the user-to-user attack vector, preventing a malicious VM from
|
||||
leaking data from another VM requires mitigating guest-to-host attacks as well
|
||||
due to the Linux kernel phys map.
|
||||
|
||||
*guest-to-guest mitigations are enabled by default if KVM support is present*
|
||||
|
||||
.. _smt:
|
||||
|
||||
Cross-Thread
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The cross-thread attack vector involves a malicious userspace program or
|
||||
malicious VM either observing or attempting to influence the behavior of code
|
||||
running on the SMT sibling thread in order to exfiltrate data.
|
||||
|
||||
Many cross-thread attacks can only be mitigated if SMT is disabled, which will
|
||||
result in reduced CPU core count and reduced performance.
|
||||
|
||||
If cross-thread mitigations are fully enabled ('auto,nosmt'), all mitigations
|
||||
for cross-thread attacks will be enabled. SMT may be disabled depending on
|
||||
which vulnerabilities are present in the CPU.
|
||||
|
||||
If cross-thread mitigations are partially enabled ('auto'), mitigations for
|
||||
cross-thread attacks will be enabled but SMT will not be disabled.
|
||||
|
||||
If cross-thread mitigations are disabled, no mitigations for cross-thread
|
||||
attacks will be enabled.
|
||||
|
||||
Cross-thread mitigation may not be required if core-scheduling or similar
|
||||
techniques are used to prevent untrusted workloads from running on SMT siblings.
|
||||
|
||||
*cross-thread mitigations default to partially enabled*
|
||||
|
||||
.. _cmdline:
|
||||
|
||||
Command Line Controls
|
||||
---------------------
|
||||
|
||||
Attack vectors are controlled through the mitigations= command line option. The
|
||||
value provided begins with a global option and then may optionally include one
|
||||
or more options to disable various attack vectors.
|
||||
|
||||
Format:
|
||||
| ``mitigations=[global]``
|
||||
| ``mitigations=[global],[attack vectors]``
|
||||
|
||||
Global options:
|
||||
|
||||
============ =============================================================
|
||||
Option Description
|
||||
============ =============================================================
|
||||
'off' All attack vectors disabled.
|
||||
'auto' All attack vectors enabled, partial cross-thread mitigations.
|
||||
'auto,nosmt' All attack vectors enabled, full cross-thread mitigations.
|
||||
============ =============================================================
|
||||
|
||||
Attack vector options:
|
||||
|
||||
================= =======================================
|
||||
Option Description
|
||||
================= =======================================
|
||||
'no_user_kernel' Disables user-to-kernel mitigations.
|
||||
'no_user_user' Disables user-to-user mitigations.
|
||||
'no_guest_host' Disables guest-to-host mitigations.
|
||||
'no_guest_guest' Disables guest-to-guest mitigations
|
||||
'no_cross_thread' Disables all cross-thread mitigations.
|
||||
================= =======================================
|
||||
|
||||
Multiple attack vector options may be specified in a comma-separated list. If
|
||||
the global option is not specified, it defaults to 'auto'. The global option
|
||||
'off' is equivalent to disabling all attack vectors.
|
||||
|
||||
Examples:
|
||||
| ``mitigations=auto,no_user_kernel``
|
||||
|
||||
Enable all attack vectors except user-to-kernel. Partial cross-thread
|
||||
mitigations.
|
||||
|
||||
| ``mitigations=auto,nosmt,no_guest_host,no_guest_guest``
|
||||
|
||||
Enable all attack vectors and cross-thread mitigations except for
|
||||
guest-to-host and guest-to-guest mitigations.
|
||||
|
||||
| ``mitigations=,no_cross_thread``
|
||||
|
||||
Enable all attack vectors but not cross-thread mitigations.
|
||||
|
||||
Interactions with command-line options
|
||||
--------------------------------------
|
||||
|
||||
Vulnerability-specific controls (e.g. "retbleed=off") take precedence over all
|
||||
attack vector controls. Mitigations for individual vulnerabilities may be
|
||||
turned on or off via their command-line options regardless of the attack vector
|
||||
controls.
|
||||
|
||||
Summary of attack-vector mitigations
|
||||
------------------------------------
|
||||
|
||||
When a vulnerability is mitigated due to an attack-vector control, the default
|
||||
mitigation option for that particular vulnerability is used. To use a different
|
||||
mitigation, please use the vulnerability-specific command line option.
|
||||
|
||||
The table below summarizes which vulnerabilities are mitigated when different
|
||||
attack vectors are enabled and assuming the CPU is vulnerable.
|
||||
|
||||
=============== ============== ============ ============= ============== ============ ========
|
||||
Vulnerability User-to-Kernel User-to-User Guest-to-Host Guest-to-Guest Cross-Thread Notes
|
||||
=============== ============== ============ ============= ============== ============ ========
|
||||
BHI X X
|
||||
ITS X X
|
||||
GDS X X X X * (Note 1)
|
||||
L1TF X X * (Note 2)
|
||||
MDS X X X X * (Note 2)
|
||||
MMIO X X X X * (Note 2)
|
||||
Meltdown X
|
||||
Retbleed X X * (Note 3)
|
||||
RFDS X X X X
|
||||
Spectre_v1 X
|
||||
Spectre_v2 X X
|
||||
Spectre_v2_user X X * (Note 1)
|
||||
SRBDS X X X X
|
||||
SRSO X X X X
|
||||
SSB (Note 4)
|
||||
TAA X X X X * (Note 2)
|
||||
TSA X X X X
|
||||
=============== ============== ============ ============= ============== ============ ========
|
||||
|
||||
Notes:
|
||||
1 -- Can be mitigated without disabling SMT.
|
||||
|
||||
2 -- Disables SMT if cross-thread mitigations are fully enabled and the CPU
|
||||
is vulnerable
|
||||
|
||||
3 -- Disables SMT if cross-thread mitigations are fully enabled, the CPU is
|
||||
vulnerable, and STIBP is not supported
|
||||
|
||||
4 -- Speculative store bypass is always enabled by default (no kernel
|
||||
mitigation applied) unless overridden with spec_store_bypass_disable option
|
||||
|
||||
When an attack-vector is disabled, all mitigations for the vulnerabilities
|
||||
listed in the above table are disabled, unless mitigation is required for a
|
||||
different enabled attack-vector or a mitigation is explicitly selected via a
|
||||
vulnerability-specific command line option.
|
||||
@@ -9,6 +9,7 @@ are configurable at compile, boot or run time.
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
attack_vector_controls
|
||||
spectre
|
||||
l1tf
|
||||
mds
|
||||
|
||||
@@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
|
||||
combination with a microcode update. The microcode clears the affected CPU
|
||||
buffers when the VERW instruction is executed.
|
||||
|
||||
Kernel reuses the MDS function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
Kernel does the buffer clearing with x86_clear_cpu_buffers().
|
||||
|
||||
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
|
||||
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
|
||||
|
||||
@@ -311,6 +311,27 @@ crashkernel syntax
|
||||
|
||||
crashkernel=0,low
|
||||
|
||||
4) crashkernel=size,cma
|
||||
|
||||
Reserve additional crash kernel memory from CMA. This reservation is
|
||||
usable by the first system's userspace memory and kernel movable
|
||||
allocations (memory balloon, zswap). Pages allocated from this memory
|
||||
range will not be included in the vmcore so this should not be used if
|
||||
dumping of userspace memory is intended and it has to be expected that
|
||||
some movable kernel pages may be missing from the dump.
|
||||
|
||||
A standard crashkernel reservation, as described above, is still needed
|
||||
to hold the crash kernel and initrd.
|
||||
|
||||
This option increases the risk of a kdump failure: DMA transfers
|
||||
configured by the first kernel may end up corrupting the second
|
||||
kernel's memory.
|
||||
|
||||
This reservation method is intended for systems that can't afford to
|
||||
sacrifice enough memory for standard crashkernel reservation and where
|
||||
less reliable and possibly incomplete kdump is preferable to no kdump at
|
||||
all.
|
||||
|
||||
Boot into System Kernel
|
||||
-----------------------
|
||||
1) Update the boot loader (such as grub, yaboot, or lilo) configuration
|
||||
|
||||
@@ -325,14 +325,14 @@ NR_FREE_PAGES
|
||||
On linux-2.6.21 or later, the number of free pages is in
|
||||
vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
|
||||
|
||||
PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|PG_hugetlb
|
||||
-----------------------------------------------------------------------------------------
|
||||
PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_hwpoison|PG_head_mask
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
Page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_unaccepted)
|
||||
-------------------------------------------------------------------------------------------------------------------------
|
||||
PAGE_SLAB_MAPCOUNT_VALUE|PAGE_BUDDY_MAPCOUNT_VALUE|PAGE_OFFLINE_MAPCOUNT_VALUE|PAGE_HUGETLB_MAPCOUNT_VALUE|PAGE_UNACCEPTED_MAPCOUNT_VALUE
|
||||
------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
More page attributes. These flags are used to filter various unnecessary for
|
||||
dumping pages.
|
||||
|
||||
@@ -633,6 +633,14 @@
|
||||
named mounts. Specifying both "all" and "named" disables
|
||||
all v1 hierarchies.
|
||||
|
||||
cgroup_v1_proc= [KNL] Show also missing controllers in /proc/cgroups
|
||||
Format: { "true" | "false" }
|
||||
/proc/cgroups lists only v1 controllers by default.
|
||||
This compatibility option enables listing also v2
|
||||
controllers (whose v1 code is not compiled!), so that
|
||||
semi-legacy software can check this file to decide
|
||||
about usage of v2 (sic) controllers.
|
||||
|
||||
cgroup_favordynmods= [KNL] Enable or Disable favordynmods.
|
||||
Format: { "true" | "false" }
|
||||
Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS.
|
||||
@@ -986,6 +994,28 @@
|
||||
0: to disable low allocation.
|
||||
It will be ignored when crashkernel=X,high is not used
|
||||
or memory reserved is below 4G.
|
||||
crashkernel=size[KMG],cma
|
||||
[KNL, X86] Reserve additional crash kernel memory from
|
||||
CMA. This reservation is usable by the first system's
|
||||
userspace memory and kernel movable allocations (memory
|
||||
balloon, zswap). Pages allocated from this memory range
|
||||
will not be included in the vmcore so this should not
|
||||
be used if dumping of userspace memory is intended and
|
||||
it has to be expected that some movable kernel pages
|
||||
may be missing from the dump.
|
||||
|
||||
A standard crashkernel reservation, as described above,
|
||||
is still needed to hold the crash kernel and initrd.
|
||||
|
||||
This option increases the risk of a kdump failure: DMA
|
||||
transfers configured by the first kernel may end up
|
||||
corrupting the second kernel's memory.
|
||||
|
||||
This reservation method is intended for systems that
|
||||
can't afford to sacrifice enough memory for standard
|
||||
crashkernel reservation and where less reliable and
|
||||
possibly incomplete kdump is preferable to no kdump at
|
||||
all.
|
||||
|
||||
cryptomgr.notests
|
||||
[KNL] Disable crypto self-tests
|
||||
@@ -1798,6 +1828,27 @@
|
||||
backtraces on all cpus.
|
||||
Format: 0 | 1
|
||||
|
||||
hash_pointers=
|
||||
[KNL,EARLY]
|
||||
By default, when pointers are printed to the console
|
||||
or buffers via the %p format string, that pointer is
|
||||
"hashed", i.e. obscured by hashing the pointer value.
|
||||
This is a security feature that hides actual kernel
|
||||
addresses from unprivileged users, but it also makes
|
||||
debugging the kernel more difficult since unequal
|
||||
pointers can no longer be compared. The choices are:
|
||||
Format: { auto | always | never }
|
||||
Default: auto
|
||||
|
||||
auto - Hash pointers unless slab_debug is enabled.
|
||||
always - Always hash pointers (even if slab_debug is
|
||||
enabled).
|
||||
never - Never hash pointers. This option should only
|
||||
be specified when debugging the kernel. Do
|
||||
not use on production kernels. The boot
|
||||
param "no_hash_pointers" is an alias for
|
||||
this mode.
|
||||
|
||||
hashdist= [KNL,NUMA] Large hashes allocated during boot
|
||||
are distributed across NUMA nodes. Defaults on
|
||||
for 64-bit NUMA, off otherwise.
|
||||
@@ -2212,6 +2263,11 @@
|
||||
different crypto accelerators. This option can be used
|
||||
to achieve best performance for particular HW.
|
||||
|
||||
ima= [IMA] Enable or disable IMA
|
||||
Format: { "off" | "on" }
|
||||
Default: "on"
|
||||
Note that disabling IMA is limited to kdump kernel.
|
||||
|
||||
indirect_target_selection= [X86,Intel] Mitigation control for Indirect
|
||||
Target Selection(ITS) bug in Intel CPUs. Updated
|
||||
microcode is also required for a fix in IBPB.
|
||||
@@ -2538,6 +2594,13 @@
|
||||
requires the kernel to be built with
|
||||
CONFIG_ARM64_PSEUDO_NMI.
|
||||
|
||||
irqchip.riscv_imsic_noipi
|
||||
[RISC-V,EARLY]
|
||||
Force the kernel to not use IMSIC software injected MSIs
|
||||
as IPIs. Intended for system where IMSIC is trap-n-emulated,
|
||||
and thus want to reduce MMIO traps when triggering IPIs
|
||||
to multiple harts.
|
||||
|
||||
irqfixup [HW]
|
||||
When an interrupt is not handled search all handlers
|
||||
for it. Intended to get systems with badly broken
|
||||
@@ -3790,6 +3853,10 @@
|
||||
mmio_stale_data=full,nosmt [X86]
|
||||
retbleed=auto,nosmt [X86]
|
||||
|
||||
[X86] After one of the above options, additionally
|
||||
supports attack-vector based controls as documented in
|
||||
Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
|
||||
|
||||
mminit_loglevel=
|
||||
[KNL,EARLY] When CONFIG_DEBUG_MEMORY_INIT is set, this
|
||||
parameter allows control of the logging verbosity for
|
||||
@@ -4170,18 +4237,7 @@
|
||||
|
||||
no_hash_pointers
|
||||
[KNL,EARLY]
|
||||
Force pointers printed to the console or buffers to be
|
||||
unhashed. By default, when a pointer is printed via %p
|
||||
format string, that pointer is "hashed", i.e. obscured
|
||||
by hashing the pointer value. This is a security feature
|
||||
that hides actual kernel addresses from unprivileged
|
||||
users, but it also makes debugging the kernel more
|
||||
difficult since unequal pointers can no longer be
|
||||
compared. However, if this command-line option is
|
||||
specified, then all normal pointers will have their true
|
||||
value printed. This option should only be specified when
|
||||
debugging the kernel. Please do not use on production
|
||||
kernels.
|
||||
Alias for "hash_pointers=never".
|
||||
|
||||
nohibernate [HIBERNATION] Disable hibernation and resume.
|
||||
|
||||
@@ -4533,7 +4589,7 @@
|
||||
bit 2: print timer info
|
||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||
bit 4: print ftrace buffer
|
||||
bit 5: print all printk messages in buffer
|
||||
bit 5: replay all messages on consoles at the end of panic
|
||||
bit 6: print all CPUs backtrace (if available in the arch)
|
||||
bit 7: print only tasks in uninterruptible (blocked) state
|
||||
*Be aware* that this option may print a _lot_ of lines,
|
||||
@@ -4541,6 +4597,25 @@
|
||||
Use this option carefully, maybe worth to setup a
|
||||
bigger log buffer with "log_buf_len" along with this.
|
||||
|
||||
panic_sys_info= A comma separated list of extra information to be dumped
|
||||
on panic.
|
||||
Format: val[,val...]
|
||||
Where @val can be any of the following:
|
||||
|
||||
tasks: print all tasks info
|
||||
mem: print system memory info
|
||||
timers: print timers info
|
||||
locks: print locks info if CONFIG_LOCKDEP is on
|
||||
ftrace: print ftrace buffer
|
||||
all_bt: print all CPUs backtrace (if available in the arch)
|
||||
blocked_tasks: print only tasks in uninterruptible (blocked) state
|
||||
|
||||
This is a human readable alternative to the 'panic_print' option.
|
||||
|
||||
panic_console_replay
|
||||
When panic happens, replay all kernel messages on
|
||||
consoles at the end of panic.
|
||||
|
||||
parkbd.port= [HW] Parallel port number the keyboard adapter is
|
||||
connected to, default is 0.
|
||||
Format: <parport#>
|
||||
@@ -5000,6 +5075,18 @@
|
||||
that number, otherwise (e.g., 'pmu_override=on'), MMCR1
|
||||
remains 0.
|
||||
|
||||
pm_async= [PM]
|
||||
Format: off
|
||||
This parameter sets the initial value of the
|
||||
/sys/power/pm_async sysfs knob at boot time.
|
||||
If set to "off", disables asynchronous suspend and
|
||||
resume of devices during system-wide power transitions.
|
||||
This can be useful on platforms where device
|
||||
dependencies are not well-defined, or for debugging
|
||||
power management issues. Asynchronous operations are
|
||||
enabled by default.
|
||||
|
||||
|
||||
pm_debug_messages [SUSPEND,KNL]
|
||||
Enable suspend/resume debug messages during boot up.
|
||||
|
||||
@@ -5485,7 +5572,8 @@
|
||||
echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
|
||||
or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
|
||||
|
||||
Default is 0.
|
||||
Default is 1 if num_possible_cpus() <= 16 and it is not explicitly
|
||||
disabled by the boot parameter passing 0.
|
||||
|
||||
rcuscale.gp_async= [KNL]
|
||||
Measure performance of asynchronous
|
||||
@@ -6387,6 +6475,11 @@
|
||||
sa1100ir [NET]
|
||||
See drivers/net/irda/sa1100_ir.c.
|
||||
|
||||
sched_proxy_exec= [KNL]
|
||||
Enables or disables "proxy execution" style
|
||||
solution to mutex-based priority inversion.
|
||||
Format: <bool>
|
||||
|
||||
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
|
||||
|
||||
schedstats= [KNL,X86] Enable or disable scheduled statistics.
|
||||
@@ -6558,14 +6651,18 @@
|
||||
slab_debug can create guard zones around objects and
|
||||
may poison objects when not in use. Also tracks the
|
||||
last alloc / free. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_debug legacy name also accepted for now)
|
||||
|
||||
Using this option implies the "no_hash_pointers"
|
||||
option which can be undone by adding the
|
||||
"hash_pointers=always" option.
|
||||
|
||||
slab_max_order= [MM]
|
||||
Determines the maximum allowed order for slabs.
|
||||
A high setting may cause OOMs due to memory
|
||||
fragmentation. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_max_order legacy name also accepted for now)
|
||||
|
||||
slab_merge [MM]
|
||||
@@ -6580,13 +6677,14 @@
|
||||
the number of objects indicated. The higher the number
|
||||
of objects the smaller the overhead of tracking slabs
|
||||
and the less frequently locks need to be acquired.
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
For more information see
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_min_objects legacy name also accepted for now)
|
||||
|
||||
slab_min_order= [MM]
|
||||
Determines the minimum page order for slabs. Must be
|
||||
lower or equal to slab_max_order. For more information see
|
||||
Documentation/mm/slub.rst.
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_min_order legacy name also accepted for now)
|
||||
|
||||
slab_nomerge [MM]
|
||||
@@ -6600,7 +6698,8 @@
|
||||
cache (risks via metadata attacks are mostly
|
||||
unchanged). Debug options disable merging on their
|
||||
own.
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
For more information see
|
||||
Documentation/admin-guide/mm/slab.rst.
|
||||
(slub_nomerge legacy name also accepted for now)
|
||||
|
||||
slab_strict_numa [MM]
|
||||
@@ -6988,6 +7087,11 @@
|
||||
consumed by the stack hash table. By default this is set
|
||||
to false.
|
||||
|
||||
stack_depot_max_pools= [KNL,EARLY]
|
||||
Specify the maximum number of pools to use for storing
|
||||
stack traces. Pools are allocated on-demand up to this
|
||||
limit. Default value is 8191 pools.
|
||||
|
||||
stacktrace [FTRACE]
|
||||
Enabled the stack tracer on boot up.
|
||||
|
||||
@@ -7214,6 +7318,14 @@
|
||||
causing a major performance hit, and the space where
|
||||
machines are deployed is by other means guarded.
|
||||
|
||||
tpm_crb_ffa.busy_timeout_ms= [ARM64,TPM]
|
||||
Maximum time in milliseconds to retry sending a message
|
||||
to the TPM service before giving up. This parameter controls
|
||||
how long the system will continue retrying when the TPM
|
||||
service is busy.
|
||||
Format: <unsigned int>
|
||||
Default: 2000 (2 seconds)
|
||||
|
||||
tpm_suspend_pcr=[HW,TPM]
|
||||
Format: integer pcr id
|
||||
Specify that at suspend time, the tpm driver
|
||||
@@ -7488,6 +7600,19 @@
|
||||
having this key zero'ed is acceptable. E.g. in testing
|
||||
scenarios.
|
||||
|
||||
tsa= [X86] Control mitigation for Transient Scheduler
|
||||
Attacks on AMD CPUs. Search the following in your
|
||||
favourite search engine for more details:
|
||||
|
||||
"Technical guidance for mitigating transient scheduler
|
||||
attacks".
|
||||
|
||||
off - disable the mitigation
|
||||
on - enable the mitigation (default)
|
||||
user - mitigate only user/kernel transitions
|
||||
vm - mitigate only guest/host transitions
|
||||
|
||||
|
||||
tsc= Disable clocksource stability checks for TSC.
|
||||
Format: <string>
|
||||
[x86] reliable: mark tsc clocksource as reliable, this
|
||||
|
||||
@@ -14,3 +14,4 @@ access monitoring and access-aware system operations.
|
||||
usage
|
||||
reclaim
|
||||
lru_sort
|
||||
stat
|
||||
|
||||
69
Documentation/admin-guide/mm/damon/stat.rst
Normal file
69
Documentation/admin-guide/mm/damon/stat.rst
Normal file
@@ -0,0 +1,69 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Data Access Monitoring Results Stat
|
||||
===================================
|
||||
|
||||
Data Access Monitoring Results Stat (DAMON_STAT) is a static kernel module that
|
||||
is aimed to be used for simple access pattern monitoring. It monitors accesses
|
||||
on the system's entire physical memory using DAMON, and provides simplified
|
||||
access monitoring results statistics, namely idle time percentiles and
|
||||
estimated memory bandwidth.
|
||||
|
||||
Monitoring Accuracy and Overhead
|
||||
================================
|
||||
|
||||
DAMON_STAT uses monitoring intervals :ref:`auto-tuning
|
||||
<damon_design_monitoring_intervals_autotuning>` to make its accuracy high and
|
||||
overhead minimum. It auto-tunes the intervals aiming 4 % of observable access
|
||||
events to be captured in each snapshot, while limiting the resulting sampling
|
||||
events to be 5 milliseconds in minimum and 10 seconds in maximum. On a few
|
||||
production server systems, it resulted in consuming only 0.x % single CPU time,
|
||||
while capturing reasonable quality of access patterns.
|
||||
|
||||
Interface: Module Parameters
|
||||
============================
|
||||
|
||||
To use this feature, you should first ensure your system is running on a kernel
|
||||
that is built with ``CONFIG_DAMON_STAT=y``. The feature can be enabled by
|
||||
default at build time, by setting ``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` true.
|
||||
|
||||
To let sysadmins enable or disable it at boot and/or runtime, and read the
|
||||
monitoring results, DAMON_STAT provides module parameters. Following
|
||||
sections are descriptions of the parameters.
|
||||
|
||||
enabled
|
||||
-------
|
||||
|
||||
Enable or disable DAMON_STAT.
|
||||
|
||||
You can enable DAMON_STAT by setting the value of this parameter as ``Y``.
|
||||
Setting it as ``N`` disables DAMON_STAT. The default value is set by
|
||||
``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` build config option.
|
||||
|
||||
estimated_memory_bandwidth
|
||||
--------------------------
|
||||
|
||||
Estimated memory bandwidth consumption (bytes per second) of the system.
|
||||
|
||||
DAMON_STAT reads observed access events on the current DAMON results snapshot
|
||||
and converts it to memory bandwidth consumption estimation in bytes per second.
|
||||
The resulting metric is exposed to user via this read-only parameter. Because
|
||||
DAMON uses sampling, this is only an estimation of the access intensity rather
|
||||
than accurate memory bandwidth.
|
||||
|
||||
memory_idle_ms_percentiles
|
||||
--------------------------
|
||||
|
||||
Per-byte idle time (milliseconds) percentiles of the system.
|
||||
|
||||
DAMON_STAT calculates how long each byte of the memory was not accessed until
|
||||
now (idle time), based on the current DAMON results snapshot. If DAMON found a
|
||||
region of access frequency (nr_accesses) larger than zero, every byte of the
|
||||
region gets zero idle time. If a region has zero access frequency
|
||||
(nr_accesses), how long the region was keeping the zero access frequency (age)
|
||||
becomes the idle time of every byte of the region. Then, DAMON_STAT exposes
|
||||
the percentiles of the idle time values via this read-only parameter. Reading
|
||||
the parameter returns 101 idle time values in milliseconds, separated by comma.
|
||||
Each value represents 0-th, 1st, 2nd, 3rd, ..., 99th and 100th percentile idle
|
||||
times.
|
||||
@@ -59,7 +59,7 @@ comma (",").
|
||||
|
||||
:ref:`/sys/kernel/mm/damon <sysfs_root>`/admin
|
||||
│ :ref:`kdamonds <sysfs_kdamonds>`/nr_kdamonds
|
||||
│ │ :ref:`0 <sysfs_kdamond>`/state,pid
|
||||
│ │ :ref:`0 <sysfs_kdamond>`/state,pid,refresh_ms
|
||||
│ │ │ :ref:`contexts <sysfs_contexts>`/nr_contexts
|
||||
│ │ │ │ :ref:`0 <sysfs_context>`/avail_operations,operations
|
||||
│ │ │ │ │ :ref:`monitoring_attrs <sysfs_monitoring_attrs>`/
|
||||
@@ -85,6 +85,8 @@ comma (",").
|
||||
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
|
||||
│ │ │ │ │ │ │ :ref:`{core_,ops_,}filters <sysfs_filters>`/nr_filters
|
||||
│ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max
|
||||
│ │ │ │ │ │ │ :ref:`dests <damon_sysfs_dests>`/nr_dests
|
||||
│ │ │ │ │ │ │ │ 0/id,weight
|
||||
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds
|
||||
│ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
|
||||
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed
|
||||
@@ -121,8 +123,8 @@ kdamond.
|
||||
kdamonds/<N>/
|
||||
-------------
|
||||
|
||||
In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||
(``contexts``) exist.
|
||||
In each kdamond directory, three files (``state``, ``pid`` and ``refresh_ms``)
|
||||
and one directory (``contexts``) exist.
|
||||
|
||||
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||
``off`` if it is not running.
|
||||
@@ -159,6 +161,13 @@ Users can write below commands for the kdamond to the ``state`` file.
|
||||
|
||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||
|
||||
Users can ask the kernel to periodically update files showing auto-tuned
|
||||
parameters and DAMOS stats instead of manually writing
|
||||
``update_tuned_intervals`` like keywords to ``state`` file. For this, users
|
||||
should write the desired update time interval in milliseconds to ``refresh_ms``
|
||||
file. If the interval is zero, the periodic update is disabled. Reading the
|
||||
file shows currently set time interval.
|
||||
|
||||
``contexts`` directory contains files for controlling the monitoring contexts
|
||||
that this kdamond will execute.
|
||||
|
||||
@@ -307,10 +316,10 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||
schemes/<N>/
|
||||
------------
|
||||
|
||||
In each scheme directory, seven directories (``access_pattern``, ``quotas``,
|
||||
``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``stats``, and
|
||||
``tried_regions``) and three files (``action``, ``target_nid`` and
|
||||
``apply_interval``) exist.
|
||||
In each scheme directory, eight directories (``access_pattern``, ``quotas``,
|
||||
``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``dests``,
|
||||
``stats``, and ``tried_regions``) and three files (``action``, ``target_nid``
|
||||
and ``apply_interval``) exist.
|
||||
|
||||
The ``action`` file is for setting and getting the scheme's :ref:`action
|
||||
<damon_design_damos_action>`. The keywords that can be written to and read
|
||||
@@ -484,6 +493,29 @@ Refer to the :ref:`DAMOS filters design documentation
|
||||
of different ``allow`` works, when each of the filters are supported, and
|
||||
differences on stats.
|
||||
|
||||
.. _damon_sysfs_dests:
|
||||
|
||||
schemes/<N>/dests/
|
||||
------------------
|
||||
|
||||
Directory for specifying the destinations of given DAMON-based operation
|
||||
scheme's action. This directory is ignored if the action of the given scheme
|
||||
is not supporting multiple destinations. Only ``DAMOS_MIGRATE_{HOT,COLD}``
|
||||
actions are supporting multiple destinations.
|
||||
|
||||
In the beginning, the directory has only one file, ``nr_dests``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each action destination.
|
||||
|
||||
Each destination directory contains two files, namely ``id`` and ``weight``.
|
||||
Users can write and read the identifier of the destination to ``id`` file.
|
||||
For ``DAMOS_MIGRATE_{HOT,COLD}`` actions, the migrate destination node's node
|
||||
id should be written to ``id`` file. Users can write and read the weight of
|
||||
the destination among the given destinations to the ``weight`` file. The
|
||||
weight can be an arbitrary integer. When DAMOS apply the action to each entity
|
||||
of the memory region, it will select the destination of the action based on the
|
||||
relative weights of the destinations.
|
||||
|
||||
.. _sysfs_schemes_stats:
|
||||
|
||||
schemes/<N>/stats/
|
||||
|
||||
@@ -37,6 +37,7 @@ the Linux memory management.
|
||||
numaperf
|
||||
pagemap
|
||||
shrinker_debugfs
|
||||
slab
|
||||
soft-dirty
|
||||
swap_numa
|
||||
transhuge
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
==========================
|
||||
Short users guide for SLUB
|
||||
==========================
|
||||
========================================
|
||||
Short users guide for the slab allocator
|
||||
========================================
|
||||
|
||||
The basic philosophy of SLUB is very different from SLAB. SLAB
|
||||
requires rebuilding the kernel to activate debug options for all
|
||||
slab caches. SLUB always includes full debugging but it is off by default.
|
||||
SLUB can enable debugging only for selected slabs in order to avoid
|
||||
an impact on overall system performance which may make a bug more
|
||||
difficult to find.
|
||||
The slab allocator includes full debugging support (when built with
|
||||
CONFIG_SLUB_DEBUG=y) but it is off by default (unless built with
|
||||
CONFIG_SLUB_DEBUG_ON=y). You can enable debugging only for selected
|
||||
slabs in order to avoid an impact on overall system performance which
|
||||
may make a bug more difficult to find.
|
||||
|
||||
In order to switch debugging on one can add an option ``slab_debug``
|
||||
to the kernel command line. That will enable full debugging for
|
||||
@@ -107,7 +107,7 @@ sysfs
|
||||
Global THP controls
|
||||
-------------------
|
||||
|
||||
Transparent Hugepage Support for anonymous memory can be entirely disabled
|
||||
Transparent Hugepage Support for anonymous memory can be disabled
|
||||
(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
|
||||
regions (to avoid the risk of consuming more memory resources) or enabled
|
||||
system wide. This can be achieved per-supported-THP-size with one of::
|
||||
@@ -119,6 +119,11 @@ system wide. This can be achieved per-supported-THP-size with one of::
|
||||
where <size> is the hugepage size being addressed, the available sizes
|
||||
for which vary by system.
|
||||
|
||||
.. note:: Setting "never" in all sysfs THP controls does **not** disable
|
||||
Transparent Huge Pages globally. This is because ``madvise(...,
|
||||
MADV_COLLAPSE)`` ignores these settings and collapses ranges to
|
||||
PMD-sized huge pages unconditionally.
|
||||
|
||||
For example::
|
||||
|
||||
echo always >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
|
||||
@@ -187,7 +192,9 @@ madvise
|
||||
behaviour.
|
||||
|
||||
never
|
||||
should be self-explanatory.
|
||||
should be self-explanatory. Note that ``madvise(...,
|
||||
MADV_COLLAPSE)`` can still cause transparent huge pages to be
|
||||
obtained even if this mode is specified everywhere.
|
||||
|
||||
By default kernel tries to use huge, PMD-mappable zero page on read
|
||||
page fault to anonymous mapping. It's possible to disable huge zero
|
||||
@@ -378,7 +385,9 @@ always
|
||||
Attempt to allocate huge pages every time we need a new page;
|
||||
|
||||
never
|
||||
Do not allocate huge pages;
|
||||
Do not allocate huge pages. Note that ``madvise(..., MADV_COLLAPSE)``
|
||||
can still cause transparent huge pages to be obtained even if this mode
|
||||
is specified everywhere;
|
||||
|
||||
within_size
|
||||
Only allocate huge page if it will be fully within i_size.
|
||||
@@ -434,7 +443,9 @@ inherit
|
||||
have enabled="inherit" and all other hugepage sizes have enabled="never";
|
||||
|
||||
never
|
||||
Do not allocate <size> huge pages;
|
||||
Do not allocate <size> huge pages. Note that ``madvise(...,
|
||||
MADV_COLLAPSE)`` can still cause transparent huge pages to be obtained
|
||||
even if this mode is specified everywhere;
|
||||
|
||||
within_size
|
||||
Only allocate <size> huge page if it will be fully within i_size.
|
||||
|
||||
@@ -72,7 +72,7 @@ to manage each performance update behavior. ::
|
||||
Lowest non- | | | |
|
||||
linear perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
| | Lowest perf ---->| |
|
||||
| | Min perf ---->| |
|
||||
| | | |
|
||||
Lowest perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
|
||||
@@ -398,7 +398,9 @@ policy limits change after that.
|
||||
|
||||
This governor does not do anything by itself. Instead, it allows user space
|
||||
to set the CPU frequency for the policy it is attached to by writing to the
|
||||
``scaling_setspeed`` attribute of that policy.
|
||||
``scaling_setspeed`` attribute of that policy. Though the intention may be to
|
||||
set an exact frequency for the policy, the actual frequency may vary depending
|
||||
on hardware coordination, thermal and power limits, and other factors.
|
||||
|
||||
``schedutil``
|
||||
-------------
|
||||
|
||||
@@ -53,20 +53,25 @@ following prctl:
|
||||
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
|
||||
|
||||
<op> is either PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF, to enable and
|
||||
disable the mechanism globally for that thread. When
|
||||
PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
|
||||
or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
|
||||
that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
|
||||
[<offset>, <offset>+<length>) delimit a memory region interval
|
||||
from which syscalls are always executed directly, regardless of the
|
||||
userspace selector. This provides a fast path for the C library, which
|
||||
includes the most common syscall dispatchers in the native code
|
||||
applications, and also provides a way for the signal handler to return
|
||||
For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are always executed directly,
|
||||
regardless of the userspace selector. This provides a fast path for the
|
||||
C library, which includes the most common syscall dispatchers in the native
|
||||
code applications, and also provides a way for the signal handler to return
|
||||
without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
|
||||
interface should make sure that at least the signal trampoline code is
|
||||
included in this region. In addition, for syscalls that implement the
|
||||
trampoline code on the vDSO, that trampoline is never intercepted.
|
||||
|
||||
For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are dispatched based on
|
||||
the userspace selector. Syscalls from outside of the range are always
|
||||
executed directly.
|
||||
|
||||
[selector] is a pointer to a char-sized region in the process memory
|
||||
region, that provides a quick way to enable disable syscall redirection
|
||||
thread-wide, without the need to invoke the kernel directly. selector
|
||||
|
||||
@@ -177,6 +177,7 @@ core_pattern
|
||||
%E executable path
|
||||
%c maximum size of core file by resource limit RLIMIT_CORE
|
||||
%C CPU the task ran on
|
||||
%F pidfd number
|
||||
%<OTHER> both are dropped
|
||||
======== ==========================================
|
||||
|
||||
@@ -889,7 +890,7 @@ bit 1 print system memory info
|
||||
bit 2 print timer info
|
||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||
bit 4 print ftrace buffer
|
||||
bit 5 print all printk messages in buffer
|
||||
bit 5 replay all messages on consoles at the end of panic
|
||||
bit 6 print all CPUs backtrace (if available in the arch)
|
||||
bit 7 print only tasks in uninterruptible (blocked) state
|
||||
===== ============================================
|
||||
@@ -899,6 +900,24 @@ So for example to print tasks and memory info on panic, user can::
|
||||
echo 3 > /proc/sys/kernel/panic_print
|
||||
|
||||
|
||||
panic_sys_info
|
||||
==============
|
||||
|
||||
A comma separated list of extra information to be dumped on panic,
|
||||
for example, "tasks,mem,timers,...". It is a human readable alternative
|
||||
to 'panic_print'. Possible values are:
|
||||
|
||||
============= ===================================================
|
||||
tasks print all tasks info
|
||||
mem print system memory info
|
||||
timer print timers info
|
||||
lock print locks info if CONFIG_LOCKDEP is on
|
||||
ftrace print ftrace buffer
|
||||
all_bt print all CPUs backtrace (if available in the arch)
|
||||
blocked_tasks print only tasks in uninterruptible (blocked) state
|
||||
============= ===================================================
|
||||
|
||||
|
||||
panic_on_rcu_stall
|
||||
==================
|
||||
|
||||
@@ -1014,30 +1033,26 @@ perf_user_access (arm64 and riscv only)
|
||||
|
||||
Controls user space access for reading perf event counters.
|
||||
|
||||
arm64
|
||||
=====
|
||||
* for arm64
|
||||
The default value is 0 (access disabled).
|
||||
|
||||
The default value is 0 (access disabled).
|
||||
When set to 1, user space can read performance monitor counter registers
|
||||
directly.
|
||||
|
||||
When set to 1, user space can read performance monitor counter registers
|
||||
directly.
|
||||
See Documentation/arch/arm64/perf.rst for more information.
|
||||
|
||||
See Documentation/arch/arm64/perf.rst for more information.
|
||||
* for riscv
|
||||
When set to 0, user space access is disabled.
|
||||
|
||||
riscv
|
||||
=====
|
||||
The default value is 1, user space can read performance monitor counter
|
||||
registers through perf, any direct access without perf intervention will trigger
|
||||
an illegal instruction.
|
||||
|
||||
When set to 0, user space access is disabled.
|
||||
When set to 2, which enables legacy mode (user space has direct access to cycle
|
||||
and insret CSRs only). Note that this legacy value is deprecated and will be
|
||||
removed once all user space applications are fixed.
|
||||
|
||||
The default value is 1, user space can read performance monitor counter
|
||||
registers through perf, any direct access without perf intervention will trigger
|
||||
an illegal instruction.
|
||||
|
||||
When set to 2, which enables legacy mode (user space has direct access to cycle
|
||||
and insret CSRs only). Note that this legacy value is deprecated and will be
|
||||
removed once all user space applications are fixed.
|
||||
|
||||
Note that the time CSR is always directly accessible to all modes.
|
||||
Note that the time CSR is always directly accessible to all modes.
|
||||
|
||||
pid_max
|
||||
=======
|
||||
@@ -1110,7 +1125,8 @@ printk_ratelimit_burst
|
||||
While long term we enforce one message per `printk_ratelimit`_
|
||||
seconds, we do allow a burst of messages to pass through.
|
||||
``printk_ratelimit_burst`` specifies the number of messages we can
|
||||
send before ratelimiting kicks in.
|
||||
send before ratelimiting kicks in. After `printk_ratelimit`_ seconds
|
||||
have elapsed, another burst of messages may be sent.
|
||||
|
||||
The default value is 10 messages.
|
||||
|
||||
@@ -1465,7 +1481,7 @@ stack_erasing
|
||||
=============
|
||||
|
||||
This parameter can be used to control kernel stack erasing at the end
|
||||
of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``.
|
||||
of syscalls for kernels built with ``CONFIG_KSTACK_ERASE``.
|
||||
|
||||
That erasing reduces the information which kernel stack leak bugs
|
||||
can reveal and blocks some uninitialized stack variable attacks.
|
||||
@@ -1473,7 +1489,7 @@ The tradeoff is the performance impact: on a single CPU system kernel
|
||||
compilation sees a 1% slowdown, other systems and workloads may vary.
|
||||
|
||||
= ====================================================================
|
||||
0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
|
||||
0 Kernel stack erasing is disabled, KSTACK_ERASE_METRICS are not updated.
|
||||
1 Kernel stack erasing is enabled (default), it is performed before
|
||||
returning to the userspace at the end of syscalls.
|
||||
= ====================================================================
|
||||
|
||||
@@ -465,8 +465,8 @@ The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
|
||||
disables protection of the pages.
|
||||
|
||||
|
||||
max_map_count:
|
||||
==============
|
||||
max_map_count
|
||||
=============
|
||||
|
||||
This file contains the maximum number of memory map areas a process
|
||||
may have. Memory map areas are used as a side-effect of calling
|
||||
@@ -495,8 +495,8 @@ memory allocations.
|
||||
The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
|
||||
|
||||
|
||||
memory_failure_early_kill:
|
||||
==========================
|
||||
memory_failure_early_kill
|
||||
=========================
|
||||
|
||||
Control how to kill processes when uncorrected memory error (typically
|
||||
a 2bit error in a memory module) is detected in the background by hardware
|
||||
|
||||
@@ -358,12 +358,7 @@ Forcing power
|
||||
Many OEMs include a method that can be used to force the power of a
|
||||
Thunderbolt controller to an "On" state even if nothing is connected.
|
||||
If supported by your machine this will be exposed by the WMI bus with
|
||||
a sysfs attribute called "force_power".
|
||||
|
||||
For example the intel-wmi-thunderbolt driver exposes this attribute in:
|
||||
/sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
|
||||
|
||||
To force the power to on, write 1 to this attribute file.
|
||||
To disable force power, write 0 to this attribute file.
|
||||
a sysfs attribute called "force_power", see
|
||||
Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt for details.
|
||||
|
||||
Note: it's currently not possible to query the force power state of a platform.
|
||||
|
||||
@@ -223,6 +223,47 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- SCR_EL3.HCE (bit 8) must be initialised to 0b1.
|
||||
|
||||
For systems with a GICv5 interrupt controller to be used in v5 mode:
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- ICH_HFGRTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_PPI_HMRn_EL1 (bit 16) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_IAFFIDR_EL1 (bit 7) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_HPPIR_EL1 (bit 4) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_HAPR_EL1 (bit 3) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_IDRn_EL1 (bit 1) must be initialised to 0b1.
|
||||
- ICH_HFGRTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
|
||||
|
||||
- ICH_HFGWTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
|
||||
- ICH_HFGWTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
|
||||
|
||||
- ICH_HFGITR_EL2.GICRCDNMIA (bit 10) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICRCDIA (bit 9) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDDI (bit 8) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDEOI (bit 7) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDHM (bit 6) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDRCFG (bit 5) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDPEND (bit 4) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDAFF (bit 3) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDPRI (bit 2) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDDIS (bit 1) must be initialised to 0b1.
|
||||
- ICH_HFGITR_EL2.GICCDEN (bit 0) must be initialised to 0b1.
|
||||
|
||||
- The DT or ACPI tables must describe a GICv5 interrupt controller.
|
||||
|
||||
For systems with a GICv3 interrupt controller to be used in v3 mode:
|
||||
- If EL3 is present:
|
||||
|
||||
@@ -234,7 +275,7 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- If the kernel is entered at EL1:
|
||||
|
||||
- ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
|
||||
- ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1
|
||||
- ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
|
||||
|
||||
- The DT or ACPI tables must describe a GICv3 interrupt controller.
|
||||
@@ -388,6 +429,27 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Branch Record Buffer Extension (FEAT_BRBE):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- MDCR_EL3.SBRBE (bits 33:32) must be initialised to 0b01 or 0b11.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- BRBCR_EL2.CC (bit 3) must be initialised to 0b1.
|
||||
- BRBCR_EL2.MPRED (bit 4) must be initialised to 0b1.
|
||||
|
||||
- HDFGRTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
|
||||
- HDFGRTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
|
||||
- HDFGRTR_EL2.nBRBIDR (bit 59) must be initialised to 0b1.
|
||||
|
||||
- HDFGWTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
|
||||
- HDFGWTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nBRBIALL (bit 56) must be initialised to 0b1.
|
||||
- HFGITR_EL2.nBRBINJ (bit 55) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
@@ -435,6 +435,12 @@ HWCAP2_SME_SF8DP4
|
||||
HWCAP2_POE
|
||||
Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
|
||||
|
||||
HWCAP3_MTE_FAR
|
||||
Functionality implied by ID_AA64PFR2_EL1.MTEFAR == 0b0001.
|
||||
|
||||
HWCAP3_MTE_STORE_ONLY
|
||||
Functionality implied by ID_AA64PFR2_EL1.MTESTOREONLY == 0b0001.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
||||
@@ -60,11 +60,12 @@ that signal handlers in applications making use of tags cannot rely
|
||||
on the tag information for user virtual addresses being maintained
|
||||
in these fields unless the flag was set.
|
||||
|
||||
Due to architecture limitations, bits 63:60 of the fault address
|
||||
are not preserved in response to synchronous tag check faults
|
||||
(SEGV_MTESERR) even if SA_EXPOSE_TAGBITS was set. Applications should
|
||||
treat the values of these bits as undefined in order to accommodate
|
||||
future architecture revisions which may preserve the bits.
|
||||
If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address
|
||||
are preserved in response to synchronous tag check faults (SEGV_MTESERR)
|
||||
otherwise not preserved even if SA_EXPOSE_TAGBITS was set.
|
||||
Applications should interpret the values of these bits based on
|
||||
the support for the HWCAP3_MTE_FAR. If the support is not present,
|
||||
the values of these bits should be considered as undefined otherwise valid.
|
||||
|
||||
For signals raised in response to watchpoint debug exceptions, the
|
||||
tag information will be preserved regardless of the SA_EXPOSE_TAGBITS
|
||||
|
||||
@@ -19,6 +19,7 @@ powerpc
|
||||
elf_hwcaps
|
||||
elfnote
|
||||
firmware-assisted-dump
|
||||
htm
|
||||
hvcs
|
||||
imc
|
||||
isa-versions
|
||||
|
||||
@@ -305,24 +305,3 @@ xpram shows up under devices/system/ as 'xpram'.
|
||||
|
||||
For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
|
||||
attribute 'online' which can be 0 or 1.
|
||||
|
||||
|
||||
4. Other devices
|
||||
----------------
|
||||
|
||||
4.1 Netiucv
|
||||
-----------
|
||||
|
||||
The netiucv driver creates an attribute 'connection' under
|
||||
bus/iucv/drivers/netiucv. Piping to this attribute creates a new netiucv
|
||||
connection to the specified host.
|
||||
|
||||
Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interface
|
||||
number is assigned sequentially to the connections defined via the 'connection'
|
||||
attribute.
|
||||
|
||||
user
|
||||
- shows the connection partner.
|
||||
|
||||
buffer
|
||||
- maximum buffer size. Pipe to it to change buffer size.
|
||||
|
||||
133
Documentation/arch/x86/amd-hfi.rst
Normal file
133
Documentation/arch/x86/amd-hfi.rst
Normal file
@@ -0,0 +1,133 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================================================================
|
||||
Hardware Feedback Interface For Hetero Core Scheduling On AMD Platform
|
||||
======================================================================
|
||||
|
||||
:Copyright: 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
|
||||
:Author: Perry Yuan <perry.yuan@amd.com>
|
||||
:Author: Mario Limonciello <mario.limonciello@amd.com>
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
AMD Heterogeneous Core implementations are comprised of more than one
|
||||
architectural class and CPUs are comprised of cores of various efficiency and
|
||||
power capabilities: performance-oriented *classic cores* and power-efficient
|
||||
*dense cores*. As such, power management strategies must be designed to
|
||||
accommodate the complexities introduced by incorporating different core types.
|
||||
Heterogeneous systems can also extend to more than two architectural classes
|
||||
as well. The purpose of the scheduling feedback mechanism is to provide
|
||||
information to the operating system scheduler in real time such that the
|
||||
scheduler can direct threads to the optimal core.
|
||||
|
||||
The goal of AMD's heterogeneous architecture is to attain power benefit by
|
||||
sending background threads to the dense cores while sending high priority
|
||||
threads to the classic cores. From a performance perspective, sending
|
||||
background threads to dense cores can free up power headroom and allow the
|
||||
classic cores to optimally service demanding threads. Furthermore, the area
|
||||
optimized nature of the dense cores allows for an increasing number of
|
||||
physical cores. This improved core density will have positive multithreaded
|
||||
performance impact.
|
||||
|
||||
AMD Heterogeneous Core Driver
|
||||
-----------------------------
|
||||
|
||||
The ``amd_hfi`` driver delivers the operating system a performance and energy
|
||||
efficiency capability data for each CPU in the system. The scheduler can use
|
||||
the ranking data from the HFI driver to make task placement decisions.
|
||||
|
||||
Thread Classification and Ranking Table Interaction
|
||||
----------------------------------------------------
|
||||
|
||||
The thread classification is used to select into a ranking table that
|
||||
describes an efficiency and performance ranking for each classification.
|
||||
|
||||
Threads are classified during runtime into enumerated classes. The classes
|
||||
represent thread performance/power characteristics that may benefit from
|
||||
special scheduling behaviors. The below table depicts an example of thread
|
||||
classification and a preference where a given thread should be scheduled
|
||||
based on its thread class. The real time thread classification is consumed
|
||||
by the operating system and is used to inform the scheduler of where the
|
||||
thread should be placed.
|
||||
|
||||
Thread Classification Example Table
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
+----------+----------------+-------------------------------+---------------------+---------+
|
||||
| class ID | Classification | Preferred scheduling behavior | Preemption priority | Counter |
|
||||
+----------+----------------+-------------------------------+---------------------+---------+
|
||||
| 0 | Default | Performant | Highest | |
|
||||
+----------+----------------+-------------------------------+---------------------+---------+
|
||||
| 1 | Non-scalable | Efficient | Lowest | PMCx1A1 |
|
||||
+----------+----------------+-------------------------------+---------------------+---------+
|
||||
| 2 | I/O bound | Efficient | Lowest | PMCx044 |
|
||||
+----------+----------------+-------------------------------+---------------------+---------+
|
||||
|
||||
Thread classification is performed by the hardware each time that the thread is switched out.
|
||||
Threads that don't meet any hardware specified criteria are classified as "default".
|
||||
|
||||
AMD Hardware Feedback Interface
|
||||
--------------------------------
|
||||
|
||||
The Hardware Feedback Interface provides to the operating system information
|
||||
about the performance and energy efficiency of each CPU in the system. Each
|
||||
capability is given as a unit-less quantity in the range [0-255]. A higher
|
||||
performance value indicates higher performance capability, and a higher
|
||||
efficiency value indicates more efficiency. Energy efficiency and performance
|
||||
are reported in separate capabilities in the shared memory based ranking table.
|
||||
|
||||
These capabilities may change at runtime as a result of changes in the
|
||||
operating conditions of the system or the action of external factors.
|
||||
Power Management firmware is responsible for detecting events that require
|
||||
a reordering of the performance and efficiency ranking. Table updates happen
|
||||
relatively infrequently and occur on the time scale of seconds or more.
|
||||
|
||||
The following events trigger a table update:
|
||||
* Thermal Stress Events
|
||||
* Silent Compute
|
||||
* Extreme Low Battery Scenarios
|
||||
|
||||
The kernel or a userspace policy daemon can use these capabilities to modify
|
||||
task placement decisions. For instance, if either the performance or energy
|
||||
capabilities of a given logical processor becomes zero, it is an indication
|
||||
that the hardware recommends to the operating system to not schedule any tasks
|
||||
on that processor for performance or energy efficiency reasons, respectively.
|
||||
|
||||
Implementation details for Linux
|
||||
--------------------------------
|
||||
|
||||
The implementation of threads scheduling consists of the following steps:
|
||||
|
||||
1. A thread is spawned and scheduled to the ideal core using the default
|
||||
heterogeneous scheduling policy.
|
||||
2. The processor profiles thread execution and assigns an enumerated
|
||||
classification ID.
|
||||
This classification is communicated to the OS via logical processor
|
||||
scope MSR.
|
||||
3. During the thread context switch out the operating system consumes the
|
||||
workload (WL) classification which resides in a logical processor scope MSR.
|
||||
4. The OS triggers the hardware to clear its history by writing to an MSR,
|
||||
after consuming the WL classification and before switching in the new thread.
|
||||
5. If due to the classification, ranking table, and processor availability,
|
||||
the thread is not on its ideal processor, the OS will then consider
|
||||
scheduling the thread on its ideal processor (if available).
|
||||
|
||||
Ranking Table
|
||||
-------------
|
||||
The ranking table is a shared memory region that is used to communicate the
|
||||
performance and energy efficiency capabilities of each CPU in the system.
|
||||
|
||||
The ranking table design includes rankings for each APIC ID in the system and
|
||||
rankings both for performance and efficiency for each workload classification.
|
||||
|
||||
.. kernel-doc:: drivers/platform/x86/amd/hfi/hfi.c
|
||||
:doc: amd_shmem_info
|
||||
|
||||
Ranking Table update
|
||||
---------------------------
|
||||
The power management firmware issues an platform interrupt after updating the
|
||||
ranking table and is ready for the operating system to consume it. CPUs receive
|
||||
such interrupt and read new ranking table from shared memory which PCCT table
|
||||
has provided, then ``amd_hfi`` driver parses the new table to provide new
|
||||
consume data for scheduling decisions.
|
||||
@@ -28,6 +28,7 @@ x86-specific Documentation
|
||||
amd-debugging
|
||||
amd-memory-encryption
|
||||
amd_hsmp
|
||||
amd-hfi
|
||||
tdx
|
||||
pti
|
||||
mds
|
||||
|
||||
@@ -93,7 +93,7 @@ enters a C-state.
|
||||
|
||||
The kernel provides a function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
x86_clear_cpu_buffers()
|
||||
|
||||
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
|
||||
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
|
||||
@@ -185,9 +185,9 @@ Mitigation points
|
||||
idle clearing would be a window dressing exercise and is therefore not
|
||||
activated.
|
||||
|
||||
The invocation is controlled by the static key mds_idle_clear which is
|
||||
switched depending on the chosen mitigation mode and the SMT state of
|
||||
the system.
|
||||
The invocation is controlled by the static key cpu_buf_idle_clear which is
|
||||
switched depending on the chosen mitigation mode and the SMT state of the
|
||||
system.
|
||||
|
||||
The buffer clear is only invoked before entering the C-State to prevent
|
||||
that stale data from the idling CPU from spilling to the Hyper-Thread
|
||||
|
||||
@@ -176,5 +176,5 @@ Be very careful vs. KASLR when changing anything here. The KASLR address
|
||||
range must not overlap with anything except the KASAN shadow area, which is
|
||||
correct as KASAN disables KASLR.
|
||||
|
||||
For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
|
||||
For both 4- and 5-level layouts, the KSTACK_ERASE_POISON value in the last 2MB
|
||||
hole: ffffffffffff4111
|
||||
|
||||
@@ -352,6 +352,83 @@ For reaching best IO performance, ublk server should align its segment
|
||||
parameter of `struct ublk_param_segment` with backend for avoiding
|
||||
unnecessary IO split, which usually hurts io_uring performance.
|
||||
|
||||
Auto Buffer Registration
|
||||
------------------------
|
||||
|
||||
The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration
|
||||
and unregistration for I/O requests, which simplifies the buffer management
|
||||
process and reduces overhead in the ublk server implementation.
|
||||
|
||||
This is another feature flag for using zero copy, and it is compatible with
|
||||
``UBLK_F_SUPPORT_ZERO_COPY``.
|
||||
|
||||
Feature Overview
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
This feature automatically registers request buffers to the io_uring context
|
||||
before delivering I/O commands to the ublk server and unregisters them when
|
||||
completing I/O commands. This eliminates the need for manual buffer
|
||||
registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and
|
||||
``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server
|
||||
can avoid dependency on the two uring_cmd operations.
|
||||
|
||||
IOs can't be issued concurrently to io_uring if there is any dependency
|
||||
among these IOs. So this way not only simplifies ublk server implementation,
|
||||
but also makes concurrent IO handling becomes possible by removing the
|
||||
dependency on buffer registration & unregistration commands.
|
||||
|
||||
Usage Requirements
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx``
|
||||
used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If
|
||||
uring_cmd is issued on a different ``io_ring_ctx``, manual buffer
|
||||
unregistration is required.
|
||||
|
||||
2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the
|
||||
following structure::
|
||||
|
||||
struct ublk_auto_buf_reg {
|
||||
__u16 index; /* Buffer index for registration */
|
||||
__u8 flags; /* Registration flags */
|
||||
__u8 reserved0; /* Reserved for future use */
|
||||
__u32 reserved1; /* Reserved for future use */
|
||||
};
|
||||
|
||||
ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into
|
||||
``sqe->addr``.
|
||||
|
||||
3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed.
|
||||
|
||||
4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``.
|
||||
|
||||
Fallback Behavior
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
If auto buffer registration fails:
|
||||
|
||||
1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled:
|
||||
|
||||
- The uring_cmd is completed
|
||||
- ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags``
|
||||
- The ublk server must manually deal with the failure, such as, register
|
||||
the buffer manually, or using user copy feature for retrieving the data
|
||||
for handling ublk IO
|
||||
|
||||
2. If fallback is not enabled:
|
||||
|
||||
- The ublk I/O request fails silently
|
||||
- The uring_cmd won't be completed
|
||||
|
||||
Limitations
|
||||
~~~~~~~~~~~
|
||||
|
||||
- Requires same ``io_ring_ctx`` for all operations
|
||||
- May require manual buffer management in fallback cases
|
||||
- io_ring_ctx buffer table has a max size of 16K, which may not be enough
|
||||
in case that too many ublk devices are handled by this single io_ring_ctx
|
||||
and each one has very large queue depth
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
||||
@@ -611,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate
|
||||
it into LLVM?
|
||||
|
||||
A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
|
||||
the selection of BPF instruction set extensions. By default the
|
||||
``generic`` processor target is used, which is the base instruction set
|
||||
(v1) of BPF.
|
||||
the selection of BPF instruction set extensions. Before llvm version 20,
|
||||
the ``generic`` processor target is used, which is the base instruction
|
||||
set (v1) of BPF. Since llvm 20, the default processor target has changed
|
||||
to instruction set v3.
|
||||
|
||||
LLVM has an option to select ``-mcpu=probe`` where it will probe the host
|
||||
kernel for supported BPF instruction set extensions and selects the
|
||||
|
||||
@@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on
|
||||
other CPUs involved in the following operation attempts:
|
||||
|
||||
- Attempt to use CPU-local state to batch operations
|
||||
- Attempt to fetch free nodes from global lists
|
||||
- Attempt to fetch ``target_free`` free nodes from global lists
|
||||
- Attempt to pull any node from a global list and remove it from the hashmap
|
||||
- Attempt to pull any node from any CPU's list and remove it from the hashmap
|
||||
|
||||
The number of nodes to borrow from the global list in a batch, ``target_free``,
|
||||
depends on the size of the map. Larger batch size reduces lock contention, but
|
||||
may also exhaust the global structure. The value is computed at map init to
|
||||
avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map
|
||||
size. With a minimum of a single element and maximum budget of 128 at a time.
|
||||
|
||||
This algorithm is described visually in the following diagram. See the
|
||||
description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
|
||||
the corresponding operations:
|
||||
|
||||
@@ -35,18 +35,18 @@ digraph {
|
||||
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
|
||||
label="Flush local pending,
|
||||
Rotate Global list, move
|
||||
LOCAL_FREE_TARGET
|
||||
target_free
|
||||
from global -> local"]
|
||||
// Also corresponds to:
|
||||
// fn__local_list_flush()
|
||||
// fn_bpf_lru_list_rotate()
|
||||
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
|
||||
label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
|
||||
label="Able to free\ntarget_free\nnodes?"]
|
||||
|
||||
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
|
||||
label="Shrink inactive list
|
||||
up to remaining
|
||||
LOCAL_FREE_TARGET
|
||||
target_free
|
||||
(global LRU -> local)"]
|
||||
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
|
||||
label="> 0 entries in\nlocal free list?"]
|
||||
|
||||
@@ -350,9 +350,9 @@ Underflow and overflow are allowed during arithmetic operations, meaning
|
||||
the 64-bit or 32-bit value will wrap. If BPF program execution would
|
||||
result in division by zero, the destination register is instead set to zero.
|
||||
Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
|
||||
dividing -1, the desination register is instead set to ``LLONG_MIN``. For
|
||||
``ALU``, if execution would result in ``INT_MIN`` dividing -1, the
|
||||
desination register is instead set to ``INT_MIN``.
|
||||
divided by -1, the destination register is instead set to ``LLONG_MIN``. For
|
||||
``ALU``, if execution would result in ``INT_MIN`` divided by -1, the
|
||||
destination register is instead set to ``INT_MIN``.
|
||||
|
||||
If execution would result in modulo by zero, for ``ALU64`` the value of
|
||||
the destination register is unchanged whereas for ``ALU`` the upper
|
||||
|
||||
@@ -273,7 +273,6 @@ The drive-specific, minor-like information that is registered with
|
||||
__u8 media_written; /* dirty flag, DVD+RW bookkeeping */
|
||||
unsigned short mmc3_profile; /* current MMC3 profile */
|
||||
int for_data; /* unknown:TBD */
|
||||
int (*exit)(struct cdrom_device_info *);/* unknown:TBD */
|
||||
int mrw_mode_page; /* which MRW mode page is in use */
|
||||
};
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ CD-ROM
|
||||
:maxdepth: 1
|
||||
|
||||
cdrom-standard
|
||||
packet-writing
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
==============
|
||||
Packet writing
|
||||
==============
|
||||
|
||||
Getting started quick
|
||||
---------------------
|
||||
|
||||
- Select packet support in the block device section and UDF support in
|
||||
the file system section.
|
||||
|
||||
- Compile and install kernel and modules, reboot.
|
||||
|
||||
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
|
||||
Download from https://github.com/pali/udftools
|
||||
|
||||
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
|
||||
as appropriate)::
|
||||
|
||||
# cdrwtool -d /dev/hdc -q
|
||||
|
||||
- Setup your writer::
|
||||
|
||||
# pktsetup dev_name /dev/hdc
|
||||
|
||||
- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy::
|
||||
|
||||
# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
|
||||
|
||||
|
||||
Packet writing for DVD-RW media
|
||||
-------------------------------
|
||||
|
||||
DVD-RW discs can be written to much like CD-RW discs if they are in
|
||||
the so called "restricted overwrite" mode. To put a disc in restricted
|
||||
overwrite mode, run::
|
||||
|
||||
# dvd+rw-format /dev/hdc
|
||||
|
||||
You can then use the disc the same way you would use a CD-RW disc::
|
||||
|
||||
# pktsetup dev_name /dev/hdc
|
||||
# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
|
||||
|
||||
|
||||
Packet writing for DVD+RW media
|
||||
-------------------------------
|
||||
|
||||
According to the DVD+RW specification, a drive supporting DVD+RW discs
|
||||
shall implement "true random writes with 2KB granularity", which means
|
||||
that it should be possible to put any filesystem with a block size >=
|
||||
2KB on such a disc. For example, it should be possible to do::
|
||||
|
||||
# dvd+rw-format /dev/hdc (only needed if the disc has never
|
||||
been formatted)
|
||||
# mkudffs /dev/hdc
|
||||
# mount /dev/hdc /cdrom -t udf -o rw,noatime
|
||||
|
||||
However, some drives don't follow the specification and expect the
|
||||
host to perform aligned writes at 32KB boundaries. Other drives do
|
||||
follow the specification, but suffer bad performance problems if the
|
||||
writes are not 32KB aligned.
|
||||
|
||||
Both problems can be solved by using the pktcdvd driver, which always
|
||||
generates aligned writes::
|
||||
|
||||
# dvd+rw-format /dev/hdc
|
||||
# pktsetup dev_name /dev/hdc
|
||||
# mkudffs /dev/pktcdvd/dev_name
|
||||
# mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
|
||||
|
||||
|
||||
Packet writing for DVD-RAM media
|
||||
--------------------------------
|
||||
|
||||
DVD-RAM discs are random writable, so using the pktcdvd driver is not
|
||||
necessary. However, using the pktcdvd driver can improve performance
|
||||
in the same way it does for DVD+RW media.
|
||||
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
- CD-RW media can usually not be overwritten more than about 1000
|
||||
times, so to avoid unnecessary wear on the media, you should always
|
||||
use the noatime mount option.
|
||||
|
||||
- Defect management (ie automatic remapping of bad sectors) has not
|
||||
been implemented yet, so you are likely to get at least some
|
||||
filesystem corruption if the disc wears out.
|
||||
|
||||
- Since the pktcdvd driver makes the disc appear as a regular block
|
||||
device with a 2KB block size, you can put any filesystem you like on
|
||||
the disc. For example, run::
|
||||
|
||||
# /sbin/mke2fs /dev/pktcdvd/dev_name
|
||||
|
||||
to create an ext2 filesystem on the disc.
|
||||
|
||||
|
||||
Using the pktcdvd sysfs interface
|
||||
---------------------------------
|
||||
|
||||
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
|
||||
and can be controlled by it. For example the "pktcdvd" tool uses
|
||||
this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
|
||||
|
||||
"pktcdvd" works similar to "pktsetup", e.g.::
|
||||
|
||||
# pktcdvd -a dev_name /dev/hdc
|
||||
# mkudffs /dev/pktcdvd/dev_name
|
||||
# mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
|
||||
# cp files /dvdram
|
||||
# umount /dvdram
|
||||
# pktcdvd -r dev_name
|
||||
|
||||
|
||||
For a description of the sysfs interface look into the file:
|
||||
|
||||
Documentation/ABI/testing/sysfs-class-pktcdvd
|
||||
|
||||
|
||||
Using the pktcdvd debugfs interface
|
||||
-----------------------------------
|
||||
|
||||
To read pktcdvd device infos in human readable form, do::
|
||||
|
||||
# cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
|
||||
|
||||
For a description of the debugfs interface look into the file:
|
||||
|
||||
Documentation/ABI/testing/debugfs-pktcdvd
|
||||
|
||||
|
||||
|
||||
Links
|
||||
-----
|
||||
|
||||
See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
|
||||
about DVD writing.
|
||||
@@ -1,25 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# The Linux Kernel documentation build configuration file, created by
|
||||
# sphinx-quickstart on Fri Feb 12 13:51:46 2016.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
# containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
# pylint: disable=C0103,C0209
|
||||
|
||||
"""
|
||||
The Linux Kernel documentation build configuration file.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import sphinx
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import sphinx
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
sys.path.insert(0, os.path.abspath("sphinx"))
|
||||
|
||||
from load_config import loadConfig # pylint: disable=C0413,E0401
|
||||
|
||||
# Minimal supported version
|
||||
needs_sphinx = "3.4.3"
|
||||
|
||||
# Get Sphinx version
|
||||
major, minor, patch = sphinx.version_info[:3] # pylint: disable=I1101
|
||||
|
||||
# Include_patterns were added on Sphinx 5.1
|
||||
if (major < 5) or (major == 5 and minor < 1):
|
||||
has_include_patterns = False
|
||||
else:
|
||||
has_include_patterns = True
|
||||
# Include patterns that don't contain directory names, in glob format
|
||||
include_patterns = ["**.rst"]
|
||||
|
||||
# Location of Documentation/ directory
|
||||
doctree = os.path.abspath(".")
|
||||
|
||||
# Exclude of patterns that don't contain directory names, in glob format.
|
||||
exclude_patterns = []
|
||||
|
||||
# List of patterns that contain directory names in glob format.
|
||||
dyn_include_patterns = []
|
||||
dyn_exclude_patterns = ["output"]
|
||||
|
||||
# Properly handle include/exclude patterns
|
||||
# ----------------------------------------
|
||||
|
||||
def update_patterns(app, config):
|
||||
"""
|
||||
On Sphinx, all directories are relative to what it is passed as
|
||||
SOURCEDIR parameter for sphinx-build. Due to that, all patterns
|
||||
that have directory names on it need to be dynamically set, after
|
||||
converting them to a relative patch.
|
||||
|
||||
As Sphinx doesn't include any patterns outside SOURCEDIR, we should
|
||||
exclude relative patterns that start with "../".
|
||||
"""
|
||||
|
||||
# setup include_patterns dynamically
|
||||
if has_include_patterns:
|
||||
for p in dyn_include_patterns:
|
||||
full = os.path.join(doctree, p)
|
||||
|
||||
rel_path = os.path.relpath(full, start=app.srcdir)
|
||||
if rel_path.startswith("../"):
|
||||
continue
|
||||
|
||||
config.include_patterns.append(rel_path)
|
||||
|
||||
# setup exclude_patterns dynamically
|
||||
for p in dyn_exclude_patterns:
|
||||
full = os.path.join(doctree, p)
|
||||
|
||||
rel_path = os.path.relpath(full, start=app.srcdir)
|
||||
if rel_path.startswith("../"):
|
||||
continue
|
||||
|
||||
config.exclude_patterns.append(rel_path)
|
||||
|
||||
|
||||
# helper
|
||||
# ------
|
||||
|
||||
|
||||
def have_command(cmd):
|
||||
"""Search ``cmd`` in the ``PATH`` environment.
|
||||
|
||||
@@ -28,24 +90,23 @@ def have_command(cmd):
|
||||
"""
|
||||
return shutil.which(cmd) is not None
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
sys.path.insert(0, os.path.abspath('sphinx'))
|
||||
from load_config import loadConfig
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
needs_sphinx = '3.4.3'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
|
||||
'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
|
||||
'maintainers_include', 'sphinx.ext.autosectionlabel',
|
||||
'kernel_abi', 'kernel_feat', 'translations']
|
||||
# Add any Sphinx extensions in alphabetic order
|
||||
extensions = [
|
||||
"automarkup",
|
||||
"kernel_abi",
|
||||
"kerneldoc",
|
||||
"kernel_feat",
|
||||
"kernel_include",
|
||||
"kfigure",
|
||||
"maintainers_include",
|
||||
"rstFlatTable",
|
||||
"sphinx.ext.autosectionlabel",
|
||||
"sphinx.ext.ifconfig",
|
||||
"translations",
|
||||
]
|
||||
|
||||
# Since Sphinx version 3, the C function parser is more pedantic with regards
|
||||
# to type checking. Due to that, having macros at c:function cause problems.
|
||||
@@ -120,28 +181,28 @@ autosectionlabel_maxdepth = 2
|
||||
# Load math renderer:
|
||||
# For html builder, load imgmath only when its dependencies are met.
|
||||
# mathjax is the default math renderer since Sphinx 1.8.
|
||||
have_latex = have_command('latex')
|
||||
have_dvipng = have_command('dvipng')
|
||||
have_latex = have_command("latex")
|
||||
have_dvipng = have_command("dvipng")
|
||||
load_imgmath = have_latex and have_dvipng
|
||||
|
||||
# Respect SPHINX_IMGMATH (for html docs only)
|
||||
if 'SPHINX_IMGMATH' in os.environ:
|
||||
env_sphinx_imgmath = os.environ['SPHINX_IMGMATH']
|
||||
if 'yes' in env_sphinx_imgmath:
|
||||
if "SPHINX_IMGMATH" in os.environ:
|
||||
env_sphinx_imgmath = os.environ["SPHINX_IMGMATH"]
|
||||
if "yes" in env_sphinx_imgmath:
|
||||
load_imgmath = True
|
||||
elif 'no' in env_sphinx_imgmath:
|
||||
elif "no" in env_sphinx_imgmath:
|
||||
load_imgmath = False
|
||||
else:
|
||||
sys.stderr.write("Unknown env SPHINX_IMGMATH=%s ignored.\n" % env_sphinx_imgmath)
|
||||
|
||||
if load_imgmath:
|
||||
extensions.append("sphinx.ext.imgmath")
|
||||
math_renderer = 'imgmath'
|
||||
math_renderer = "imgmath"
|
||||
else:
|
||||
math_renderer = 'mathjax'
|
||||
math_renderer = "mathjax"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['sphinx/templates']
|
||||
templates_path = ["sphinx/templates"]
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
@@ -149,15 +210,15 @@ templates_path = ['sphinx/templates']
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
# source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
master_doc = "index"
|
||||
|
||||
# General information about the project.
|
||||
project = 'The Linux Kernel'
|
||||
copyright = 'The kernel development community'
|
||||
author = 'The kernel development community'
|
||||
project = "The Linux Kernel"
|
||||
copyright = "The kernel development community" # pylint: disable=W0622
|
||||
author = "The kernel development community"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
@@ -172,86 +233,86 @@ author = 'The kernel development community'
|
||||
try:
|
||||
makefile_version = None
|
||||
makefile_patchlevel = None
|
||||
for line in open('../Makefile'):
|
||||
key, val = [x.strip() for x in line.split('=', 2)]
|
||||
if key == 'VERSION':
|
||||
makefile_version = val
|
||||
elif key == 'PATCHLEVEL':
|
||||
makefile_patchlevel = val
|
||||
if makefile_version and makefile_patchlevel:
|
||||
break
|
||||
except:
|
||||
with open("../Makefile", encoding="utf=8") as fp:
|
||||
for line in fp:
|
||||
key, val = [x.strip() for x in line.split("=", 2)]
|
||||
if key == "VERSION":
|
||||
makefile_version = val
|
||||
elif key == "PATCHLEVEL":
|
||||
makefile_patchlevel = val
|
||||
if makefile_version and makefile_patchlevel:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if makefile_version and makefile_patchlevel:
|
||||
version = release = makefile_version + '.' + makefile_patchlevel
|
||||
version = release = makefile_version + "." + makefile_patchlevel
|
||||
else:
|
||||
version = release = "unknown version"
|
||||
|
||||
#
|
||||
# HACK: there seems to be no easy way for us to get at the version and
|
||||
# release information passed in from the makefile...so go pawing through the
|
||||
# command-line options and find it for ourselves.
|
||||
#
|
||||
|
||||
def get_cline_version():
|
||||
c_version = c_release = ''
|
||||
"""
|
||||
HACK: There seems to be no easy way for us to get at the version and
|
||||
release information passed in from the makefile...so go pawing through the
|
||||
command-line options and find it for ourselves.
|
||||
"""
|
||||
|
||||
c_version = c_release = ""
|
||||
for arg in sys.argv:
|
||||
if arg.startswith('version='):
|
||||
if arg.startswith("version="):
|
||||
c_version = arg[8:]
|
||||
elif arg.startswith('release='):
|
||||
elif arg.startswith("release="):
|
||||
c_release = arg[8:]
|
||||
if c_version:
|
||||
if c_release:
|
||||
return c_version + '-' + c_release
|
||||
return c_version + "-" + c_release
|
||||
return c_version
|
||||
return version # Whatever we came up with before
|
||||
return version # Whatever we came up with before
|
||||
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#
|
||||
# This is also used if you do content translation via gettext catalogs.
|
||||
# Usually you set "language" from the command line for these cases.
|
||||
language = 'en'
|
||||
language = "en"
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['output']
|
||||
# today_fmt = '%B %d, %Y'
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all
|
||||
# documents.
|
||||
#default_role = None
|
||||
# default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
# add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
# add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
#show_authors = False
|
||||
# show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
pygments_style = "sphinx"
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
# modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
#keep_warnings = False
|
||||
# keep_warnings = False
|
||||
|
||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||
todo_include_todos = False
|
||||
|
||||
primary_domain = 'c'
|
||||
highlight_language = 'none'
|
||||
primary_domain = "c"
|
||||
highlight_language = "none"
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
@@ -259,43 +320,45 @@ highlight_language = 'none'
|
||||
# a list of builtin themes.
|
||||
|
||||
# Default theme
|
||||
html_theme = 'alabaster'
|
||||
html_theme = "alabaster"
|
||||
html_css_files = []
|
||||
|
||||
if "DOCS_THEME" in os.environ:
|
||||
html_theme = os.environ["DOCS_THEME"]
|
||||
|
||||
if html_theme == 'sphinx_rtd_theme' or html_theme == 'sphinx_rtd_dark_mode':
|
||||
if html_theme in ["sphinx_rtd_theme", "sphinx_rtd_dark_mode"]:
|
||||
# Read the Docs theme
|
||||
try:
|
||||
import sphinx_rtd_theme
|
||||
|
||||
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_css_files = [
|
||||
'theme_overrides.css',
|
||||
"theme_overrides.css",
|
||||
]
|
||||
|
||||
# Read the Docs dark mode override theme
|
||||
if html_theme == 'sphinx_rtd_dark_mode':
|
||||
if html_theme == "sphinx_rtd_dark_mode":
|
||||
try:
|
||||
import sphinx_rtd_dark_mode
|
||||
extensions.append('sphinx_rtd_dark_mode')
|
||||
except ImportError:
|
||||
html_theme == 'sphinx_rtd_theme'
|
||||
import sphinx_rtd_dark_mode # pylint: disable=W0611
|
||||
|
||||
if html_theme == 'sphinx_rtd_theme':
|
||||
# Add color-specific RTD normal mode
|
||||
html_css_files.append('theme_rtd_colors.css')
|
||||
extensions.append("sphinx_rtd_dark_mode")
|
||||
except ImportError:
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
if html_theme == "sphinx_rtd_theme":
|
||||
# Add color-specific RTD normal mode
|
||||
html_css_files.append("theme_rtd_colors.css")
|
||||
|
||||
html_theme_options = {
|
||||
'navigation_depth': -1,
|
||||
"navigation_depth": -1,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
html_theme = 'alabaster'
|
||||
html_theme = "alabaster"
|
||||
|
||||
if "DOCS_CSS" in os.environ:
|
||||
css = os.environ["DOCS_CSS"].split(" ")
|
||||
@@ -303,14 +366,14 @@ if "DOCS_CSS" in os.environ:
|
||||
for l in css:
|
||||
html_css_files.append(l)
|
||||
|
||||
if html_theme == 'alabaster':
|
||||
if html_theme == "alabaster":
|
||||
html_theme_options = {
|
||||
'description': get_cline_version(),
|
||||
'page_width': '65em',
|
||||
'sidebar_width': '15em',
|
||||
'fixed_sidebar': 'true',
|
||||
'font_size': 'inherit',
|
||||
'font_family': 'serif',
|
||||
"description": get_cline_version(),
|
||||
"page_width": "65em",
|
||||
"sidebar_width": "15em",
|
||||
"fixed_sidebar": "true",
|
||||
"font_size": "inherit",
|
||||
"font_family": "serif",
|
||||
}
|
||||
|
||||
sys.stderr.write("Using %s theme\n" % html_theme)
|
||||
@@ -318,104 +381,79 @@ sys.stderr.write("Using %s theme\n" % html_theme)
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['sphinx-static']
|
||||
html_static_path = ["sphinx-static"]
|
||||
|
||||
# If true, Docutils "smart quotes" will be used to convert quotes and dashes
|
||||
# to typographically correct entities. However, conversion of "--" to "—"
|
||||
# is not always what we want, so enable only quotes.
|
||||
smartquotes_action = 'q'
|
||||
smartquotes_action = "q"
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
# Note that the RTD theme ignores this
|
||||
html_sidebars = { '**': ['searchbox.html', 'kernel-toc.html', 'sourcelink.html']}
|
||||
html_sidebars = {"**": ["searchbox.html",
|
||||
"kernel-toc.html",
|
||||
"sourcelink.html"]}
|
||||
|
||||
# about.html is available for alabaster theme. Add it at the front.
|
||||
if html_theme == 'alabaster':
|
||||
html_sidebars['**'].insert(0, 'about.html')
|
||||
if html_theme == "alabaster":
|
||||
html_sidebars["**"].insert(0, "about.html")
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
html_logo = 'images/logo.svg'
|
||||
html_logo = "images/logo.svg"
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'TheLinuxKerneldoc'
|
||||
htmlhelp_basename = "TheLinuxKerneldoc"
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
'papersize': 'a4paper',
|
||||
|
||||
"papersize": "a4paper",
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
'pointsize': '11pt',
|
||||
|
||||
"pointsize": "11pt",
|
||||
# Latex figure (float) alignment
|
||||
#'figure_align': 'htbp',
|
||||
|
||||
# 'figure_align': 'htbp',
|
||||
# Don't mangle with UTF-8 chars
|
||||
'inputenc': '',
|
||||
'utf8extra': '',
|
||||
|
||||
"inputenc": "",
|
||||
"utf8extra": "",
|
||||
# Set document margins
|
||||
'sphinxsetup': '''
|
||||
"sphinxsetup": """
|
||||
hmargin=0.5in, vmargin=1in,
|
||||
parsedliteralwraps=true,
|
||||
verbatimhintsturnover=false,
|
||||
''',
|
||||
|
||||
""",
|
||||
#
|
||||
# Some of our authors are fond of deep nesting; tell latex to
|
||||
# cope.
|
||||
#
|
||||
'maxlistdepth': '10',
|
||||
|
||||
"maxlistdepth": "10",
|
||||
# For CJK One-half spacing, need to be in front of hyperref
|
||||
'extrapackages': r'\usepackage{setspace}',
|
||||
|
||||
"extrapackages": r"\usepackage{setspace}",
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
'preamble': '''
|
||||
"preamble": """
|
||||
% Use some font with UTF-8 support with XeLaTeX
|
||||
\\usepackage{fontspec}
|
||||
\\setsansfont{DejaVu Sans}
|
||||
\\setromanfont{DejaVu Serif}
|
||||
\\setmonofont{DejaVu Sans Mono}
|
||||
''',
|
||||
""",
|
||||
}
|
||||
|
||||
# Load kerneldoc specific LaTeX settings
|
||||
latex_elements['preamble'] += '''
|
||||
latex_elements["preamble"] += """
|
||||
% Load kerneldoc specific LaTeX settings
|
||||
\\input{kerneldoc-preamble.sty}
|
||||
'''
|
||||
|
||||
# With Sphinx 1.6, it is possible to change the Bg color directly
|
||||
# by using:
|
||||
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
|
||||
# \definecolor{sphinxwarningBgColor}{RGB}{255,204,204}
|
||||
# \definecolor{sphinxattentionBgColor}{RGB}{255,255,204}
|
||||
# \definecolor{sphinximportantBgColor}{RGB}{192,255,204}
|
||||
#
|
||||
# However, it require to use sphinx heavy box with:
|
||||
#
|
||||
# \renewenvironment{sphinxlightbox} {%
|
||||
# \\begin{sphinxheavybox}
|
||||
# }
|
||||
# \\end{sphinxheavybox}
|
||||
# }
|
||||
#
|
||||
# Unfortunately, the implementation is buggy: if a note is inside a
|
||||
# table, it isn't displayed well. So, for now, let's use boring
|
||||
# black and white notes.
|
||||
\\input{kerneldoc-preamble.sty}
|
||||
"""
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
# Sorted in alphabetical order
|
||||
latex_documents = [
|
||||
]
|
||||
latex_documents = []
|
||||
|
||||
# Add all other index files from Documentation/ subdirectories
|
||||
for fn in os.listdir('.'):
|
||||
for fn in os.listdir("."):
|
||||
doc = os.path.join(fn, "index")
|
||||
if os.path.exists(doc + ".rst"):
|
||||
has = False
|
||||
@@ -424,34 +462,39 @@ for fn in os.listdir('.'):
|
||||
has = True
|
||||
break
|
||||
if not has:
|
||||
latex_documents.append((doc, fn + '.tex',
|
||||
'Linux %s Documentation' % fn.capitalize(),
|
||||
'The kernel development community',
|
||||
'manual'))
|
||||
latex_documents.append(
|
||||
(
|
||||
doc,
|
||||
fn + ".tex",
|
||||
"Linux %s Documentation" % fn.capitalize(),
|
||||
"The kernel development community",
|
||||
"manual",
|
||||
)
|
||||
)
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
# latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
# latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
# latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
# latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
# latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
# latex_domain_indices = True
|
||||
|
||||
# Additional LaTeX stuff to be copied to build directory
|
||||
latex_additional_files = [
|
||||
'sphinx/kerneldoc-preamble.sty',
|
||||
"sphinx/kerneldoc-preamble.sty",
|
||||
]
|
||||
|
||||
|
||||
@@ -460,12 +503,11 @@ latex_additional_files = [
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
(master_doc, 'thelinuxkernel', 'The Linux Kernel Documentation',
|
||||
[author], 1)
|
||||
(master_doc, "thelinuxkernel", "The Linux Kernel Documentation", [author], 1)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
# man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
@@ -473,11 +515,15 @@ man_pages = [
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(master_doc, 'TheLinuxKernel', 'The Linux Kernel Documentation',
|
||||
author, 'TheLinuxKernel', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
texinfo_documents = [(
|
||||
master_doc,
|
||||
"TheLinuxKernel",
|
||||
"The Linux Kernel Documentation",
|
||||
author,
|
||||
"TheLinuxKernel",
|
||||
"One line description of project.",
|
||||
"Miscellaneous",
|
||||
),]
|
||||
|
||||
# -- Options for Epub output ----------------------------------------------
|
||||
|
||||
@@ -488,9 +534,9 @@ epub_publisher = author
|
||||
epub_copyright = copyright
|
||||
|
||||
# A list of files that should not be packed into the epub file.
|
||||
epub_exclude_files = ['search.html']
|
||||
epub_exclude_files = ["search.html"]
|
||||
|
||||
#=======
|
||||
# =======
|
||||
# rst2pdf
|
||||
#
|
||||
# Grouping the document tree into PDF files. List of tuples
|
||||
@@ -502,17 +548,23 @@ epub_exclude_files = ['search.html']
|
||||
# multiple PDF files here actually tries to get the cross-referencing right
|
||||
# *between* PDF files.
|
||||
pdf_documents = [
|
||||
('kernel-documentation', u'Kernel', u'Kernel', u'J. Random Bozo'),
|
||||
("kernel-documentation", "Kernel", "Kernel", "J. Random Bozo"),
|
||||
]
|
||||
|
||||
# kernel-doc extension configuration for running Sphinx directly (e.g. by Read
|
||||
# the Docs). In a normal build, these are supplied from the Makefile via command
|
||||
# line arguments.
|
||||
kerneldoc_bin = '../scripts/kernel-doc.py'
|
||||
kerneldoc_srctree = '..'
|
||||
kerneldoc_bin = "../scripts/kernel-doc.py"
|
||||
kerneldoc_srctree = ".."
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Since loadConfig overwrites settings from the global namespace, it has to be
|
||||
# the last statement in the conf.py file
|
||||
# ------------------------------------------------------------------------------
|
||||
loadConfig(globals())
|
||||
|
||||
|
||||
def setup(app):
|
||||
"""Patterns need to be updated at init time on older Sphinx versions"""
|
||||
|
||||
app.connect('config-inited', update_patterns)
|
||||
|
||||
@@ -155,7 +155,7 @@ a device with limitations, it needs to be decreased.
|
||||
|
||||
Special note about PCI: PCI-X specification requires PCI-X devices to support
|
||||
64-bit addressing (DAC) for all transactions. And at least one platform (SGI
|
||||
SN2) requires 64-bit consistent allocations to operate correctly when the IO
|
||||
SN2) requires 64-bit coherent allocations to operate correctly when the IO
|
||||
bus is in PCI-X mode.
|
||||
|
||||
For correct operation, you must set the DMA mask to inform the kernel about
|
||||
@@ -174,7 +174,7 @@ used instead:
|
||||
|
||||
int dma_set_mask(struct device *dev, u64 mask);
|
||||
|
||||
The setup for consistent allocations is performed via a call
|
||||
The setup for coherent allocations is performed via a call
|
||||
to dma_set_coherent_mask()::
|
||||
|
||||
int dma_set_coherent_mask(struct device *dev, u64 mask);
|
||||
@@ -241,7 +241,7 @@ it would look like this::
|
||||
|
||||
The coherent mask will always be able to set the same or a smaller mask as
|
||||
the streaming mask. However for the rare case that a device driver only
|
||||
uses consistent allocations, one would have to check the return value from
|
||||
uses coherent allocations, one would have to check the return value from
|
||||
dma_set_coherent_mask().
|
||||
|
||||
Finally, if your device can only drive the low 24-bits of
|
||||
@@ -298,20 +298,20 @@ Types of DMA mappings
|
||||
|
||||
There are two types of DMA mappings:
|
||||
|
||||
- Consistent DMA mappings which are usually mapped at driver
|
||||
- Coherent DMA mappings which are usually mapped at driver
|
||||
initialization, unmapped at the end and for which the hardware should
|
||||
guarantee that the device and the CPU can access the data
|
||||
in parallel and will see updates made by each other without any
|
||||
explicit software flushing.
|
||||
|
||||
Think of "consistent" as "synchronous" or "coherent".
|
||||
Think of "coherent" as "synchronous".
|
||||
|
||||
The current default is to return consistent memory in the low 32
|
||||
The current default is to return coherent memory in the low 32
|
||||
bits of the DMA space. However, for future compatibility you should
|
||||
set the consistent mask even if this default is fine for your
|
||||
set the coherent mask even if this default is fine for your
|
||||
driver.
|
||||
|
||||
Good examples of what to use consistent mappings for are:
|
||||
Good examples of what to use coherent mappings for are:
|
||||
|
||||
- Network card DMA ring descriptors.
|
||||
- SCSI adapter mailbox command data structures.
|
||||
@@ -320,13 +320,13 @@ There are two types of DMA mappings:
|
||||
|
||||
The invariant these examples all require is that any CPU store
|
||||
to memory is immediately visible to the device, and vice
|
||||
versa. Consistent mappings guarantee this.
|
||||
versa. Coherent mappings guarantee this.
|
||||
|
||||
.. important::
|
||||
|
||||
Consistent DMA memory does not preclude the usage of
|
||||
Coherent DMA memory does not preclude the usage of
|
||||
proper memory barriers. The CPU may reorder stores to
|
||||
consistent memory just as it may normal memory. Example:
|
||||
coherent memory just as it may normal memory. Example:
|
||||
if it is important for the device to see the first word
|
||||
of a descriptor updated before the second, you must do
|
||||
something like::
|
||||
@@ -365,10 +365,10 @@ Also, systems with caches that aren't DMA-coherent will work better
|
||||
when the underlying buffers don't share cache lines with other data.
|
||||
|
||||
|
||||
Using Consistent DMA mappings
|
||||
=============================
|
||||
Using Coherent DMA mappings
|
||||
===========================
|
||||
|
||||
To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
|
||||
To allocate and map large (PAGE_SIZE or so) coherent DMA regions,
|
||||
you should do::
|
||||
|
||||
dma_addr_t dma_handle;
|
||||
@@ -385,10 +385,10 @@ __get_free_pages() (but takes size instead of a page order). If your
|
||||
driver needs regions sized smaller than a page, you may prefer using
|
||||
the dma_pool interface, described below.
|
||||
|
||||
The consistent DMA mapping interfaces, will by default return a DMA address
|
||||
The coherent DMA mapping interfaces, will by default return a DMA address
|
||||
which is 32-bit addressable. Even if the device indicates (via the DMA mask)
|
||||
that it may address the upper 32-bits, consistent allocation will only
|
||||
return > 32-bit addresses for DMA if the consistent DMA mask has been
|
||||
that it may address the upper 32-bits, coherent allocation will only
|
||||
return > 32-bit addresses for DMA if the coherent DMA mask has been
|
||||
explicitly changed via dma_set_coherent_mask(). This is true of the
|
||||
dma_pool interface as well.
|
||||
|
||||
@@ -497,7 +497,7 @@ program address space. Such platforms can and do report errors in the
|
||||
kernel logs when the DMA controller hardware detects violation of the
|
||||
permission setting.
|
||||
|
||||
Only streaming mappings specify a direction, consistent mappings
|
||||
Only streaming mappings specify a direction, coherent mappings
|
||||
implicitly have a direction attribute setting of
|
||||
DMA_BIDIRECTIONAL.
|
||||
|
||||
|
||||
@@ -8,15 +8,15 @@ This document describes the DMA API. For a more gentle introduction
|
||||
of the API (and actual examples), see Documentation/core-api/dma-api-howto.rst.
|
||||
|
||||
This API is split into two pieces. Part I describes the basic API.
|
||||
Part II describes extensions for supporting non-consistent memory
|
||||
Part II describes extensions for supporting non-coherent memory
|
||||
machines. Unless you know that your driver absolutely has to support
|
||||
non-consistent platforms (this is usually only legacy platforms) you
|
||||
non-coherent platforms (this is usually only legacy platforms) you
|
||||
should only use the API described in part I.
|
||||
|
||||
Part I - dma_API
|
||||
Part I - DMA API
|
||||
----------------
|
||||
|
||||
To get the dma_API, you must #include <linux/dma-mapping.h>. This
|
||||
To get the DMA API, you must #include <linux/dma-mapping.h>. This
|
||||
provides dma_addr_t and the interfaces described below.
|
||||
|
||||
A dma_addr_t can hold any valid DMA address for the platform. It can be
|
||||
@@ -33,13 +33,13 @@ Part Ia - Using large DMA-coherent buffers
|
||||
dma_alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t flag)
|
||||
|
||||
Consistent memory is memory for which a write by either the device or
|
||||
Coherent memory is memory for which a write by either the device or
|
||||
the processor can immediately be read by the processor or device
|
||||
without having to worry about caching effects. (You may however need
|
||||
to make sure to flush the processor's write buffers before telling
|
||||
devices to read that memory.)
|
||||
|
||||
This routine allocates a region of <size> bytes of consistent memory.
|
||||
This routine allocates a region of <size> bytes of coherent memory.
|
||||
|
||||
It returns a pointer to the allocated region (in the processor's virtual
|
||||
address space) or NULL if the allocation failed.
|
||||
@@ -48,15 +48,14 @@ It also returns a <dma_handle> which may be cast to an unsigned integer the
|
||||
same width as the bus and given to the device as the DMA address base of
|
||||
the region.
|
||||
|
||||
Note: consistent memory can be expensive on some platforms, and the
|
||||
Note: coherent memory can be expensive on some platforms, and the
|
||||
minimum allocation length may be as big as a page, so you should
|
||||
consolidate your requests for consistent memory as much as possible.
|
||||
consolidate your requests for coherent memory as much as possible.
|
||||
The simplest way to do that is to use the dma_pool calls (see below).
|
||||
|
||||
The flag parameter (dma_alloc_coherent() only) allows the caller to
|
||||
specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
|
||||
implementation may choose to ignore flags that affect the location of
|
||||
the returned memory, like GFP_DMA).
|
||||
The flag parameter allows the caller to specify the ``GFP_`` flags (see
|
||||
kmalloc()) for the allocation (the implementation may ignore flags that affect
|
||||
the location of the returned memory, like GFP_DMA).
|
||||
|
||||
::
|
||||
|
||||
@@ -64,19 +63,18 @@ the returned memory, like GFP_DMA).
|
||||
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle)
|
||||
|
||||
Free a region of consistent memory you previously allocated. dev,
|
||||
size and dma_handle must all be the same as those passed into
|
||||
dma_alloc_coherent(). cpu_addr must be the virtual address returned by
|
||||
the dma_alloc_coherent().
|
||||
Free a previously allocated region of coherent memory. dev, size and dma_handle
|
||||
must all be the same as those passed into dma_alloc_coherent(). cpu_addr must
|
||||
be the virtual address returned by dma_alloc_coherent().
|
||||
|
||||
Note that unlike their sibling allocation calls, these routines
|
||||
may only be called with IRQs enabled.
|
||||
Note that unlike the sibling allocation call, this routine may only be called
|
||||
with IRQs enabled.
|
||||
|
||||
|
||||
Part Ib - Using small DMA-coherent buffers
|
||||
------------------------------------------
|
||||
|
||||
To get this part of the dma_API, you must #include <linux/dmapool.h>
|
||||
To get this part of the DMA API, you must #include <linux/dmapool.h>
|
||||
|
||||
Many drivers need lots of small DMA-coherent memory regions for DMA
|
||||
descriptors or I/O buffers. Rather than allocating in units of a page
|
||||
@@ -85,78 +83,29 @@ much like a struct kmem_cache, except that they use the DMA-coherent allocator,
|
||||
not __get_free_pages(). Also, they understand common hardware constraints
|
||||
for alignment, like queue heads needing to be aligned on N-byte boundaries.
|
||||
|
||||
.. kernel-doc:: mm/dmapool.c
|
||||
:export:
|
||||
|
||||
::
|
||||
|
||||
struct dma_pool *
|
||||
dma_pool_create(const char *name, struct device *dev,
|
||||
size_t size, size_t align, size_t alloc);
|
||||
|
||||
dma_pool_create() initializes a pool of DMA-coherent buffers
|
||||
for use with a given device. It must be called in a context which
|
||||
can sleep.
|
||||
|
||||
The "name" is for diagnostics (like a struct kmem_cache name); dev and size
|
||||
are like what you'd pass to dma_alloc_coherent(). The device's hardware
|
||||
alignment requirement for this type of data is "align" (which is expressed
|
||||
in bytes, and must be a power of two). If your device has no boundary
|
||||
crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
|
||||
from this pool must not cross 4KByte boundaries.
|
||||
|
||||
::
|
||||
|
||||
void *
|
||||
dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
|
||||
dma_addr_t *handle)
|
||||
|
||||
Wraps dma_pool_alloc() and also zeroes the returned memory if the
|
||||
allocation attempt succeeded.
|
||||
|
||||
|
||||
::
|
||||
|
||||
void *
|
||||
dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
|
||||
dma_addr_t *dma_handle);
|
||||
|
||||
This allocates memory from the pool; the returned memory will meet the
|
||||
size and alignment requirements specified at creation time. Pass
|
||||
GFP_ATOMIC to prevent blocking, or if it's permitted (not
|
||||
in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
|
||||
blocking. Like dma_alloc_coherent(), this returns two values: an
|
||||
address usable by the CPU, and the DMA address usable by the pool's
|
||||
device.
|
||||
|
||||
::
|
||||
|
||||
void
|
||||
dma_pool_free(struct dma_pool *pool, void *vaddr,
|
||||
dma_addr_t addr);
|
||||
|
||||
This puts memory back into the pool. The pool is what was passed to
|
||||
dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
|
||||
were returned when that routine allocated the memory being freed.
|
||||
|
||||
::
|
||||
|
||||
void
|
||||
dma_pool_destroy(struct dma_pool *pool);
|
||||
|
||||
dma_pool_destroy() frees the resources of the pool. It must be
|
||||
called in a context which can sleep. Make sure you've freed all allocated
|
||||
memory back to the pool before you destroy it.
|
||||
.. kernel-doc:: include/linux/dmapool.h
|
||||
|
||||
|
||||
Part Ic - DMA addressing limitations
|
||||
------------------------------------
|
||||
|
||||
DMA mask is a bit mask of the addressable region for the device. In other words,
|
||||
if applying the DMA mask (a bitwise AND operation) to the DMA address of a
|
||||
memory region does not clear any bits in the address, then the device can
|
||||
perform DMA to that memory region.
|
||||
|
||||
All the below functions which set a DMA mask may fail if the requested mask
|
||||
cannot be used with the device, or if the device is not capable of doing DMA.
|
||||
|
||||
::
|
||||
|
||||
int
|
||||
dma_set_mask_and_coherent(struct device *dev, u64 mask)
|
||||
|
||||
Checks to see if the mask is possible and updates the device
|
||||
streaming and coherent DMA mask parameters if it is.
|
||||
Updates both streaming and coherent DMA masks.
|
||||
|
||||
Returns: 0 if successful and a negative error if not.
|
||||
|
||||
@@ -165,8 +114,7 @@ Returns: 0 if successful and a negative error if not.
|
||||
int
|
||||
dma_set_mask(struct device *dev, u64 mask)
|
||||
|
||||
Checks to see if the mask is possible and updates the device
|
||||
parameters if it is.
|
||||
Updates only the streaming DMA mask.
|
||||
|
||||
Returns: 0 if successful and a negative error if not.
|
||||
|
||||
@@ -175,8 +123,7 @@ Returns: 0 if successful and a negative error if not.
|
||||
int
|
||||
dma_set_coherent_mask(struct device *dev, u64 mask)
|
||||
|
||||
Checks to see if the mask is possible and updates the device
|
||||
parameters if it is.
|
||||
Updates only the coherent DMA mask.
|
||||
|
||||
Returns: 0 if successful and a negative error if not.
|
||||
|
||||
@@ -231,12 +178,32 @@ transfer memory ownership. Returns %false if those calls can be skipped.
|
||||
unsigned long
|
||||
dma_get_merge_boundary(struct device *dev);
|
||||
|
||||
Returns the DMA merge boundary. If the device cannot merge any the DMA address
|
||||
Returns the DMA merge boundary. If the device cannot merge any DMA address
|
||||
segments, the function returns 0.
|
||||
|
||||
Part Id - Streaming DMA mappings
|
||||
--------------------------------
|
||||
|
||||
Streaming DMA allows to map an existing buffer for DMA transfers and then
|
||||
unmap it when finished. Map functions are not guaranteed to succeed, so the
|
||||
return value must be checked.
|
||||
|
||||
.. note::
|
||||
|
||||
In particular, mapping may fail for memory not addressable by the
|
||||
device, e.g. if it is not within the DMA mask of the device and/or a
|
||||
connecting bus bridge. Streaming DMA functions try to overcome such
|
||||
addressing constraints, either by using an IOMMU (a device which maps
|
||||
I/O DMA addresses to physical memory addresses), or by copying the
|
||||
data to/from a bounce buffer if the kernel is configured with a
|
||||
:doc:`SWIOTLB <swiotlb>`. However, these methods are not always
|
||||
available, and even if they are, they may still fail for a number of
|
||||
reasons.
|
||||
|
||||
In short, a device driver may need to be wary of where buffers are
|
||||
located in physical memory, especially if the DMA mask is less than 32
|
||||
bits.
|
||||
|
||||
::
|
||||
|
||||
dma_addr_t
|
||||
@@ -246,9 +213,7 @@ Part Id - Streaming DMA mappings
|
||||
Maps a piece of processor virtual memory so it can be accessed by the
|
||||
device and returns the DMA address of the memory.
|
||||
|
||||
The direction for both APIs may be converted freely by casting.
|
||||
However the dma_API uses a strongly typed enumerator for its
|
||||
direction:
|
||||
The DMA API uses a strongly typed enumerator for its direction:
|
||||
|
||||
======================= =============================================
|
||||
DMA_NONE no direction (used for debugging)
|
||||
@@ -259,31 +224,13 @@ DMA_BIDIRECTIONAL direction isn't known
|
||||
|
||||
.. note::
|
||||
|
||||
Not all memory regions in a machine can be mapped by this API.
|
||||
Further, contiguous kernel virtual space may not be contiguous as
|
||||
Contiguous kernel virtual space may not be contiguous as
|
||||
physical memory. Since this API does not provide any scatter/gather
|
||||
capability, it will fail if the user tries to map a non-physically
|
||||
contiguous piece of memory. For this reason, memory to be mapped by
|
||||
this API should be obtained from sources which guarantee it to be
|
||||
physically contiguous (like kmalloc).
|
||||
|
||||
Further, the DMA address of the memory must be within the
|
||||
dma_mask of the device (the dma_mask is a bit mask of the
|
||||
addressable region for the device, i.e., if the DMA address of
|
||||
the memory ANDed with the dma_mask is still equal to the DMA
|
||||
address, then the device can perform DMA to the memory). To
|
||||
ensure that the memory allocated by kmalloc is within the dma_mask,
|
||||
the driver may specify various platform-dependent flags to restrict
|
||||
the DMA address range of the allocation (e.g., on x86, GFP_DMA
|
||||
guarantees to be within the first 16MB of available DMA addresses,
|
||||
as required by ISA devices).
|
||||
|
||||
Note also that the above constraints on physical contiguity and
|
||||
dma_mask may not apply if the platform has an IOMMU (a device which
|
||||
maps an I/O DMA address to a physical memory address). However, to be
|
||||
portable, device driver writers may *not* assume that such an IOMMU
|
||||
exists.
|
||||
|
||||
.. warning::
|
||||
|
||||
Memory coherency operates at a granularity called the cache
|
||||
@@ -325,8 +272,7 @@ DMA_BIDIRECTIONAL direction isn't known
|
||||
enum dma_data_direction direction)
|
||||
|
||||
Unmaps the region previously mapped. All the parameters passed in
|
||||
must be identical to those passed in (and returned) by the mapping
|
||||
API.
|
||||
must be identical to those passed to (and returned by) dma_map_single().
|
||||
|
||||
::
|
||||
|
||||
@@ -376,10 +322,10 @@ action (e.g. reduce current DMA mapping usage or delay and try again later).
|
||||
dma_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
int nents, enum dma_data_direction direction)
|
||||
|
||||
Returns: the number of DMA address segments mapped (this may be shorter
|
||||
than <nents> passed in if some elements of the scatter/gather list are
|
||||
physically or virtually adjacent and an IOMMU maps them with a single
|
||||
entry).
|
||||
Maps a scatter/gather list for DMA. Returns the number of DMA address segments
|
||||
mapped, which may be smaller than <nents> passed in if several consecutive
|
||||
sglist entries are merged (e.g. with an IOMMU, or if some adjacent segments
|
||||
just happen to be physically contiguous).
|
||||
|
||||
Please note that the sg cannot be mapped again if it has been mapped once.
|
||||
The mapping process is allowed to destroy information in the sg.
|
||||
@@ -403,9 +349,8 @@ With scatterlists, you use the resulting mapping like this::
|
||||
where nents is the number of entries in the sglist.
|
||||
|
||||
The implementation is free to merge several consecutive sglist entries
|
||||
into one (e.g. with an IOMMU, or if several pages just happen to be
|
||||
physically contiguous) and returns the actual number of sg entries it
|
||||
mapped them to. On failure 0, is returned.
|
||||
into one. The returned number is the actual number of sg entries it
|
||||
mapped them to. On failure, 0 is returned.
|
||||
|
||||
Then you should loop count times (note: this can be less than nents times)
|
||||
and use sg_dma_address() and sg_dma_len() macros where you previously
|
||||
@@ -775,19 +720,19 @@ memory or doing partial flushes.
|
||||
of two for easy alignment.
|
||||
|
||||
|
||||
Part III - Debug drivers use of the DMA-API
|
||||
Part III - Debug drivers use of the DMA API
|
||||
-------------------------------------------
|
||||
|
||||
The DMA-API as described above has some constraints. DMA addresses must be
|
||||
The DMA API as described above has some constraints. DMA addresses must be
|
||||
released with the corresponding function with the same size for example. With
|
||||
the advent of hardware IOMMUs it becomes more and more important that drivers
|
||||
do not violate those constraints. In the worst case such a violation can
|
||||
result in data corruption up to destroyed filesystems.
|
||||
|
||||
To debug drivers and find bugs in the usage of the DMA-API checking code can
|
||||
To debug drivers and find bugs in the usage of the DMA API checking code can
|
||||
be compiled into the kernel which will tell the developer about those
|
||||
violations. If your architecture supports it you can select the "Enable
|
||||
debugging of DMA-API usage" option in your kernel configuration. Enabling this
|
||||
debugging of DMA API usage" option in your kernel configuration. Enabling this
|
||||
option has a performance impact. Do not enable it in production kernels.
|
||||
|
||||
If you boot the resulting kernel will contain code which does some bookkeeping
|
||||
@@ -826,7 +771,7 @@ example warning message may look like this::
|
||||
<EOI> <4>---[ end trace f6435a98e2a38c0e ]---
|
||||
|
||||
The driver developer can find the driver and the device including a stacktrace
|
||||
of the DMA-API call which caused this warning.
|
||||
of the DMA API call which caused this warning.
|
||||
|
||||
Per default only the first error will result in a warning message. All other
|
||||
errors will only silently counted. This limitation exist to prevent the code
|
||||
@@ -834,7 +779,7 @@ from flooding your kernel log. To support debugging a device driver this can
|
||||
be disabled via debugfs. See the debugfs interface documentation below for
|
||||
details.
|
||||
|
||||
The debugfs directory for the DMA-API debugging code is called dma-api/. In
|
||||
The debugfs directory for the DMA API debugging code is called dma-api/. In
|
||||
this directory the following files can currently be found:
|
||||
|
||||
=============================== ===============================================
|
||||
@@ -882,7 +827,7 @@ dma-api/driver_filter You can write a name of a driver into this file
|
||||
|
||||
If you have this code compiled into your kernel it will be enabled by default.
|
||||
If you want to boot without the bookkeeping anyway you can provide
|
||||
'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
|
||||
'dma_debug=off' as a boot parameter. This will disable DMA API debugging.
|
||||
Notice that you can not enable it again at runtime. You have to reboot to do
|
||||
so.
|
||||
|
||||
@@ -915,3 +860,9 @@ the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
|
||||
this flag is still set, prints warning message that includes call trace that
|
||||
leads up to the unmap. This interface can be called from dma_mapping_error()
|
||||
routines to enable DMA mapping error check debugging.
|
||||
|
||||
Functions and structures
|
||||
========================
|
||||
|
||||
.. kernel-doc:: include/linux/scatterlist.h
|
||||
.. kernel-doc:: lib/scatterlist.c
|
||||
|
||||
@@ -105,7 +105,7 @@ has to do extra work between the various steps. In such cases it has to
|
||||
ensure that enter_from_user_mode() is called first on entry and
|
||||
exit_to_user_mode() is called last on exit.
|
||||
|
||||
Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
|
||||
Do not nest syscalls. Nested syscalls will cause RCU and/or context tracking
|
||||
to print a warning.
|
||||
|
||||
KVM
|
||||
@@ -115,8 +115,8 @@ Entering or exiting guest mode is very similar to syscalls. From the host
|
||||
kernel point of view the CPU goes off into user space when entering the
|
||||
guest and returns to the kernel on exit.
|
||||
|
||||
kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
|
||||
and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
|
||||
guest_state_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
|
||||
and guest_state_exit_irqoff() is the KVM variant of enter_from_user_mode().
|
||||
The state operations have the same ordering.
|
||||
|
||||
Task work handling is done separately for guest at the boundary of the
|
||||
|
||||
@@ -54,6 +54,7 @@ Library functionality that is used throughout the kernel.
|
||||
union_find
|
||||
min_heap
|
||||
parser
|
||||
list
|
||||
|
||||
Low level entry and exit
|
||||
========================
|
||||
|
||||
@@ -3,12 +3,6 @@ The Linux Kernel API
|
||||
====================
|
||||
|
||||
|
||||
List Management Functions
|
||||
=========================
|
||||
|
||||
.. kernel-doc:: include/linux/list.h
|
||||
:internal:
|
||||
|
||||
Basic C Library Functions
|
||||
=========================
|
||||
|
||||
@@ -136,26 +130,28 @@ Arithmetic Overflow Checking
|
||||
CRC Functions
|
||||
-------------
|
||||
|
||||
.. kernel-doc:: lib/crc4.c
|
||||
.. kernel-doc:: lib/crc/crc4.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: lib/crc7.c
|
||||
.. kernel-doc:: lib/crc/crc7.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: lib/crc8.c
|
||||
.. kernel-doc:: lib/crc/crc8.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: lib/crc16.c
|
||||
.. kernel-doc:: lib/crc/crc16.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: lib/crc32.c
|
||||
|
||||
.. kernel-doc:: lib/crc-ccitt.c
|
||||
.. kernel-doc:: lib/crc/crc-ccitt.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: lib/crc-itu-t.c
|
||||
.. kernel-doc:: lib/crc/crc-itu-t.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: include/linux/crc32.h
|
||||
|
||||
.. kernel-doc:: include/linux/crc64.h
|
||||
|
||||
Base 2 log and power Functions
|
||||
------------------------------
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user